[FFmpeg-devel] [PATCH] libswscale/x86/yuv2rgb: Fix Segmentation Fault when load unaligned data
Signed-off-by: Ting Fu --- libswscale/x86/yuv_2_rgb.asm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libswscale/x86/yuv_2_rgb.asm b/libswscale/x86/yuv_2_rgb.asm index e05bbb89f5..575a84d921 100644 --- a/libswscale/x86/yuv_2_rgb.asm +++ b/libswscale/x86/yuv_2_rgb.asm @@ -139,7 +139,7 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters VBROADCASTSD vr_coff, [pointer_c_ditherq + 4 * 8] %endif %endif -mova m_y, [py_2indexq + 2 * indexq] +movu m_y, [py_2indexq + 2 * indexq] movh m_u, [pu_indexq + indexq] movh m_v, [pv_indexq + indexq] .loop0: @@ -347,7 +347,7 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters %endif ; PACK RGB15/16 %endif ; PACK RGB15/16/32 -mova m_y, [py_2indexq + 2 * indexq + 8 * time_num] +movu m_y, [py_2indexq + 2 * indexq + 8 * time_num] movh m_v, [pv_indexq + indexq + 4 * time_num] movh m_u, [pu_indexq + indexq + 4 * time_num] add imageq, 8 * depth * time_num -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2] libswscale/x86/yuv2rgb: Fix Segmentation Fault when load unaligned data
Fixes ticket #8532 Signed-off-by: Ting Fu --- V2: Add ticket info in commit message libswscale/x86/yuv_2_rgb.asm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libswscale/x86/yuv_2_rgb.asm b/libswscale/x86/yuv_2_rgb.asm index e05bbb89f5..575a84d921 100644 --- a/libswscale/x86/yuv_2_rgb.asm +++ b/libswscale/x86/yuv_2_rgb.asm @@ -139,7 +139,7 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters VBROADCASTSD vr_coff, [pointer_c_ditherq + 4 * 8] %endif %endif -mova m_y, [py_2indexq + 2 * indexq] +movu m_y, [py_2indexq + 2 * indexq] movh m_u, [pu_indexq + indexq] movh m_v, [pv_indexq + indexq] .loop0: @@ -347,7 +347,7 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters %endif ; PACK RGB15/16 %endif ; PACK RGB15/16/32 -mova m_y, [py_2indexq + 2 * indexq + 8 * time_num] +movu m_y, [py_2indexq + 2 * indexq + 8 * time_num] movh m_v, [pv_indexq + indexq + 4 * time_num] movh m_u, [pu_indexq + indexq + 4 * time_num] add imageq, 8 * depth * time_num -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/3] lavfi/dnn_backend_tensorflow.c: fix mem leak in load_tf_model
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_tf.c | 8 1 file changed, 8 insertions(+) diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index 750a476726..e016571304 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -282,6 +282,9 @@ static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename TF_SetConfig(sess_opts, sess_config, sess_config_length,tf_model->status); av_freep(&sess_config); if (TF_GetCode(tf_model->status) != TF_OK) { +TF_DeleteGraph(tf_model->graph); +TF_DeleteStatus(tf_model->status); +TF_DeleteSessionOptions(sess_opts); av_log(ctx, AV_LOG_ERROR, "Failed to set config for sess options with %s\n", tf_model->ctx.options.sess_config); return DNN_ERROR; @@ -292,6 +295,8 @@ static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename TF_DeleteSessionOptions(sess_opts); if (TF_GetCode(tf_model->status) != TF_OK) { +TF_DeleteGraph(tf_model->graph); +TF_DeleteStatus(tf_model->status); av_log(ctx, AV_LOG_ERROR, "Failed to create new session with model graph\n"); return DNN_ERROR; } @@ -304,6 +309,9 @@ static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename &init_op, 1, NULL, tf_model->status); if (TF_GetCode(tf_model->status) != TF_OK) { +TF_DeleteSession(tf_model->session, tf_model->status); +TF_DeleteGraph(tf_model->graph); +TF_DeleteStatus(tf_model->status); av_log(ctx, AV_LOG_ERROR, "Failed to run session when initializing\n"); return DNN_ERROR; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/3] lavfi/dnn_backend_tensorflow.c: fix mem leak in load_native_model
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_tf.c | 55 ++-- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index e016571304..c18cb4063f 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -330,7 +330,7 @@ static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_o TF_OperationDescription *op_desc; TF_Output input; int64_t strides[] = {1, 1, 1, 1}; -TF_Tensor *tensor; +TF_Tensor *kernel_tensor = NULL, *biases_tensor = NULL; int64_t dims[4]; int dims_len; char name_buffer[NAME_BUFFER_SIZE]; @@ -347,17 +347,15 @@ static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_o dims[2] = params->kernel_size; dims[3] = params->input_num; dims_len = 4; -tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float)); -memcpy(TF_TensorData(tensor), params->kernel, size * sizeof(float)); -TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status); +kernel_tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float)); +memcpy(TF_TensorData(kernel_tensor), params->kernel, size * sizeof(float)); +TF_SetAttrTensor(op_desc, "value", kernel_tensor, tf_model->status); if (TF_GetCode(tf_model->status) != TF_OK){ -av_log(ctx, AV_LOG_ERROR, "Failed to set value for kernel of conv layer %d\n", layer); -return DNN_ERROR; +goto err; } op = TF_FinishOperation(op_desc, tf_model->status); if (TF_GetCode(tf_model->status) != TF_OK){ -av_log(ctx, AV_LOG_ERROR, "Failed to add kernel to conv layer %d\n", layer); -return DNN_ERROR; +goto err; } snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer); @@ -370,8 +368,7 @@ static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_o TF_SetAttrType(op_desc, "Tperm", TF_INT32); op = TF_FinishOperation(op_desc, tf_model->status); if (TF_GetCode(tf_model->status) != TF_OK){ -av_log(ctx, AV_LOG_ERROR, "Failed to add transpose to conv layer %d\n", layer); -return DNN_ERROR; +goto err; } snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer); @@ -385,8 +382,7 @@ static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_o TF_SetAttrString(op_desc, "padding", "VALID", 5); *cur_op = TF_FinishOperation(op_desc, tf_model->status); if (TF_GetCode(tf_model->status) != TF_OK){ -av_log(ctx, AV_LOG_ERROR, "Failed to add conv2d to conv layer %d\n", layer); -return DNN_ERROR; +goto err; } snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer); @@ -394,17 +390,15 @@ static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_o TF_SetAttrType(op_desc, "dtype", TF_FLOAT); dims[0] = params->output_num; dims_len = 1; -tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float)); -memcpy(TF_TensorData(tensor), params->biases, params->output_num * sizeof(float)); -TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status); +biases_tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float)); +memcpy(TF_TensorData(biases_tensor), params->biases, params->output_num * sizeof(float)); +TF_SetAttrTensor(op_desc, "value", biases_tensor, tf_model->status); if (TF_GetCode(tf_model->status) != TF_OK){ -av_log(ctx, AV_LOG_ERROR, "Failed to set value for conv_biases of conv layer %d\n", layer); -return DNN_ERROR; +goto err; } op = TF_FinishOperation(op_desc, tf_model->status); if (TF_GetCode(tf_model->status) != TF_OK){ -av_log(ctx, AV_LOG_ERROR, "Failed to add conv_biases to conv layer %d\n", layer); -return DNN_ERROR; +goto err; } snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer); @@ -416,8 +410,7 @@ static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_o TF_SetAttrType(op_desc, "T", TF_FLOAT); *cur_op = TF_FinishOperation(op_desc, tf_model->status); if (TF_GetCode(tf_model->status) != TF_OK){ -av_log(ctx, AV_LOG_ERROR, "Failed to add bias_add to conv layer %d\n", layer); -return DNN_ERROR; +goto err; } snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer); @@ -440,11 +433,15 @@ static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_o TF_SetAttrType(op_desc, &q
[FFmpeg-devel] [PATCH 3/3] lavfi/dnn_backend_tensorflow.c: fix mem leak in execute_model_tf
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_tf.c | 5 + 1 file changed, 5 insertions(+) diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index c18cb4063f..c0aa510630 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -766,18 +766,21 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n if (nb_output != 1) { // currently, the filter does not need multiple outputs, // so we just pending the support until we really need it. +TF_DeleteTensor(input_tensor); avpriv_report_missing_feature(ctx, "multiple outputs"); return DNN_ERROR; } tf_outputs = av_malloc_array(nb_output, sizeof(*tf_outputs)); if (tf_outputs == NULL) { +TF_DeleteTensor(input_tensor); av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for *tf_outputs\n"); \ return DNN_ERROR; } output_tensors = av_mallocz_array(nb_output, sizeof(*output_tensors)); if (!output_tensors) { +TF_DeleteTensor(input_tensor); av_freep(&tf_outputs); av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for output tensor\n"); \ return DNN_ERROR; @@ -786,6 +789,7 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n for (int i = 0; i < nb_output; ++i) { tf_outputs[i].oper = TF_GraphOperationByName(tf_model->graph, output_names[i]); if (!tf_outputs[i].oper) { +TF_DeleteTensor(input_tensor); av_freep(&tf_outputs); av_freep(&output_tensors); av_log(ctx, AV_LOG_ERROR, "Could not find output \"%s\" in model\n", output_names[i]); \ @@ -799,6 +803,7 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n tf_outputs, output_tensors, nb_output, NULL, 0, NULL, tf_model->status); if (TF_GetCode(tf_model->status) != TF_OK) { +TF_DeleteTensor(input_tensor); av_freep(&tf_outputs); av_freep(&output_tensors); av_log(ctx, AV_LOG_ERROR, "Failed to run session when executing model\n"); -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] dnn_backend_native_layer_mathunary: add abs support
more math unary operations will be added here It can be tested with the model file generated with below python scripy: import tensorflow as tf import numpy as np import imageio in_img = imageio.imread('input.jpeg') in_img = in_img.astype(np.float32)/255.0 in_data = in_img[np.newaxis, :] x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in') x1 = tf.subtract(x, 0.5) x2 = tf.abs(x1) y = tf.identity(x2, name='dnn_out') sess=tf.Session() sess.run(tf.global_variables_initializer()) graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['dnn_out']) tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False) print("image_process.pb generated, please use \ path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n") output = sess.run(y, feed_dict={x: in_data}) imageio.imsave("out.jpg", np.squeeze(output)) Signed-off-by: Ting Fu --- libavfilter/dnn/Makefile | 1 + libavfilter/dnn/dnn_backend_native.h | 1 + .../dnn/dnn_backend_native_layer_mathunary.c | 80 +++ .../dnn/dnn_backend_native_layer_mathunary.h | 45 +++ libavfilter/dnn/dnn_backend_native_layers.c | 2 + tools/python/convert_from_tensorflow.py | 16 +++- tools/python/convert_header.py| 2 +- 7 files changed, 145 insertions(+), 2 deletions(-) create mode 100644 libavfilter/dnn/dnn_backend_native_layer_mathunary.c create mode 100644 libavfilter/dnn/dnn_backend_native_layer_mathunary.h diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile index ce529587e1..bb37298b58 100644 --- a/libavfilter/dnn/Makefile +++ b/libavfilter/dnn/Makefile @@ -6,6 +6,7 @@ OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_con OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_depth2space.o OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_maximum.o OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_mathbinary.o +OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_mathunary.o DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h index 5d76d87915..61f0cb202f 100644 --- a/libavfilter/dnn/dnn_backend_native.h +++ b/libavfilter/dnn/dnn_backend_native.h @@ -42,6 +42,7 @@ typedef enum { DLT_MIRROR_PAD = 3, DLT_MAXIMUM = 4, DLT_MATH_BINARY = 5, +DLT_MATH_UNARY = 6, DLT_COUNT } DNNLayerType; diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c new file mode 100644 index 00..d65af151cd --- /dev/null +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2020 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * DNN native backend implementation. + */ + +#include "dnn_backend_native.h" +#include "libavutil/avassert.h" +#include "dnn_backend_native_layer_mathunary.h" + +int dnn_load_layer_math_unary(Layer *layer, AVIOContext *model_file_context, int file_size) +{ +DnnLayerMathUnaryParams *params; +int dnn_size = 0; +params = av_malloc(sizeof(*params)); +if(!params) +return 0; + +params->un_op = (int32_t)avio_rl32(model_file_context); +dnn_size += 4; +layer->params = params; +layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context); +layer->output_operand_index = (int32_t)avio_rl32(model_file_context); +dnn_size += 8; + +return dnn_size; + +} + +int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_operand_indexes, +int32_t output_operand_index, const void *parameters) +{ +const DnnOperand *input = &operands[input_operand_indexes[0]]; +DnnOperand *output = &operands[output_operand_index]; +const DnnLayerMathUnaryParams *params = (const DnnLayerMathUnaryParams *)parameters; +int dims_count; +const float *src; +flo
[FFmpeg-devel] [PATCH 2/2] dnn-layer-mathunary-test: add unit test for abs
Signed-off-by: Ting Fu --- tests/dnn/.gitignore | 1 + tests/dnn/Makefile | 1 + tests/dnn/dnn-layer-mathunary-test.c | 81 tests/fate/dnn.mak | 5 ++ 4 files changed, 88 insertions(+) create mode 100644 tests/dnn/dnn-layer-mathunary-test.c diff --git a/tests/dnn/.gitignore b/tests/dnn/.gitignore index d78c5c1aec..1fcd2410b4 100644 --- a/tests/dnn/.gitignore +++ b/tests/dnn/.gitignore @@ -3,3 +3,4 @@ /dnn-layer-maximum-test /dnn-layer-pad-test /dnn-layer-mathbinary-test +/dnn-layer-mathunary-test diff --git a/tests/dnn/Makefile b/tests/dnn/Makefile index 1f96710821..64591b7851 100644 --- a/tests/dnn/Makefile +++ b/tests/dnn/Makefile @@ -3,6 +3,7 @@ DNNTESTPROGS += dnn-layer-conv2d DNNTESTPROGS += dnn-layer-depth2space DNNTESTPROGS += dnn-layer-mathbinary DNNTESTPROGS += dnn-layer-maximum +DNNTESTPROGS += dnn-layer-mathunary DNNTESTOBJS := $(DNNTESTOBJS:%=$(DNNTESTSDIR)%) $(DNNTESTPROGS:%=$(DNNTESTSDIR)/%-test.o) DNNTESTPROGS := $(DNNTESTPROGS:%=$(DNNTESTSDIR)/%-test$(EXESUF)) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c new file mode 100644 index 00..f032ca0684 --- /dev/null +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include "libavfilter/dnn/dnn_backend_native_layer_mathunary.h" +#include "libavutil/avassert.h" + +#define EPS 0.1 + +static float get_expected(float f, DNNMathUnaryOperation op) +{ +switch (op) +{ +case DMUO_ABS: +return (f >= 0) ? f : -f; +default: +av_assert0(!"not supported yet"); +return 0.f; +} +} + +static int test(DNNMathUnaryOperation op) +{ +DnnLayerMathUnaryParams params; +DnnOperand operands[2]; +int32_t input_indexes[1]; +float input[1*1*2*3] = { +-3, 2.5, 2, -2.1, 7.8, 100}; +float *output; + +params.un_op = op; + +operands[0].data = input; +operands[0].dims[0] = 1; +operands[0].dims[1] = 1; +operands[0].dims[2] = 2; +operands[0].dims[3] = 3; +operands[1].data = NULL; + +input_indexes[0] = 0; +dnn_execute_layer_math_unary(operands, input_indexes, 1, ¶ms); + +output = operands[1].data; +for (int i = 0; i < sizeof(input) / sizeof(float); ++i) { +float expected_output = get_expected(input[i], op); +if(fabs(output[i] - expected_output) > EPS) { +printf("at index %d, output: %f, expected_output: %f\n", i, output[i], expected_output); +av_freep(&output); +return 1; +} +} + +av_freep(&output); +return 0; +} + +int main(int agrc, char **argv) +{ +if (test(DMUO_ABS)) +return 1; +return 0; +} diff --git a/tests/fate/dnn.mak b/tests/fate/dnn.mak index 5a8e6296a6..4a50b16382 100644 --- a/tests/fate/dnn.mak +++ b/tests/fate/dnn.mak @@ -23,6 +23,11 @@ fate-dnn-layer-maximum: $(DNNTESTSDIR)/dnn-layer-maximum-test$(EXESUF) fate-dnn-layer-maximum: CMD = run $(DNNTESTSDIR)/dnn-layer-maximum-test$(EXESUF) fate-dnn-layer-maximum: CMP = null +FATE_DNN += fate-dnn-layer-mathunary +fate-dnn-layer-mathunary: $(DNNTESTSDIR)/dnn-layer-mathunary-test$(EXESUF) +fate-dnn-layer-mathunary: CMD = run $(DNNTESTSDIR)/dnn-layer-mathunary-test$(EXESUF) +fate-dnn-layer-mathunary: CMP = null + FATE-yes += $(FATE_DNN) fate-dnn: $(FATE_DNN) -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/6] dnn_backend_native_layer_mathunary: add sin support
It can be tested with the model file generated with below python scripy: import tensorflow as tf import numpy as np import imageio in_img = imageio.imread('input.jpeg') in_img = in_img.astype(np.float32)/255.0 in_data = in_img[np.newaxis, :] x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in') x1 = tf.multiply(x, 3.14) x2 = tf.sin(x1) y = tf.identity(x2, name='dnn_out') sess=tf.Session() sess.run(tf.global_variables_initializer()) graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['dnn_out']) tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False) print("image_process.pb generated, please use \ path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n") output = sess.run(y, feed_dict={x: in_data}) imageio.imsave("out.jpg", np.squeeze(output)) Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 6 ++ libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index d65af151cd..5324d15bc3 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -23,6 +23,8 @@ * DNN native backend implementation. */ +#include + #include "dnn_backend_native.h" #include "libavutil/avassert.h" #include "dnn_backend_native_layer_mathunary.h" @@ -74,6 +76,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = FFABS(src[i]); return 0; +case DMUO_SIN: +for (int i = 0; i < dims_count; ++i) +dst[i] = sin(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index 4e44003b66..31a1ea8fb6 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -31,6 +31,7 @@ typedef enum { DMUO_ABS = 0, +DMUO_SIN = 1, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index 8c0a9be7be..b17facdda8 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0} +self.mathun2code = {'Abs':0, 'Sin':1} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index ad4491729a..c79fef4be8 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 6 +minor = 7 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 3/6] dnn_backend_native_layer_mathunary: add cos support
It can be tested with the model generated with below python scripy import tensorflow as tf import numpy as np import imageio in_img = imageio.imread('input.jpeg') in_img = in_img.astype(np.float32)/255.0 in_data = in_img[np.newaxis, :] x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in') x1 = tf.multiply(x, 1.5) x2 = tf.cos(x1) y = tf.identity(x2, name='dnn_out') sess=tf.Session() sess.run(tf.global_variables_initializer()) graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['dnn_out']) tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False) print("image_process.pb generated, please use \ path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n") output = sess.run(y, feed_dict={x: in_data}) imageio.imsave("out.jpg", np.squeeze(output)) Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index 5324d15bc3..fa8710a3ed 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -80,6 +80,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = sin(src[i]); return 0; +case DMUO_COS: +for (int i = 0; i < dims_count; ++i) +dst[i] = cos(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index 31a1ea8fb6..f70aea846b 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -32,6 +32,7 @@ typedef enum { DMUO_ABS = 0, DMUO_SIN = 1, +DMUO_COS = 2, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index b17facdda8..9e99fccdab 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index c79fef4be8..ba6d18126e 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 7 +minor = 8 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/6] dnn-layer-mathunary-test: add unit test for sin
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index f032ca0684..ed42198195 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -32,6 +32,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) { case DMUO_ABS: return (f >= 0) ? f : -f; +case DMUO_SIN: +return sin(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -77,5 +79,7 @@ int main(int agrc, char **argv) { if (test(DMUO_ABS)) return 1; +if (test(DMUO_SIN)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 6/6] dnn-layer-mathunary-test: add unit test for tan
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index 23e1766ad0..9a7e07c98c 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -36,6 +36,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return sin(f); case DMUO_COS: return cos(f); +case DMUO_TAN: +return tan(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -85,5 +87,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_COS)) return 1; +if (test(DMUO_TAN)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 5/6] dnn_backend_native_layer_mathunary: add tan support
It can be tested with the model generated with below python scripy import tensorflow as tf import numpy as np import imageio in_img = imageio.imread('input.jpeg') in_img = in_img.astype(np.float32)/255.0 in_data = in_img[np.newaxis, :] x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in') x1 = tf.multiply(x, 0.78) x2 = tf.tan(x1) y = tf.identity(x2, name='dnn_out') sess=tf.Session() sess.run(tf.global_variables_initializer()) graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['dnn_out']) tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False) print("image_process.pb generated, please use \ path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n") output = sess.run(y, feed_dict={x: in_data}) imageio.imsave("out.jpg", np.squeeze(output)) Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index fa8710a3ed..e6e45a6b9f 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -84,6 +84,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = cos(src[i]); return 0; +case DMUO_TAN: +for (int i = 0; i < dims_count; ++i) +dst[i] = tan(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index f70aea846b..0467717a8b 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -33,6 +33,7 @@ typedef enum { DMUO_ABS = 0, DMUO_SIN = 1, DMUO_COS = 2, +DMUO_TAN = 3, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index 9e99fccdab..9da6a43612 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index ba6d18126e..b7fb0f797a 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 8 +minor = 9 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 4/6] dnn-layer-mathunary-test: add unit test for cos
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index ed42198195..23e1766ad0 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -34,6 +34,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return (f >= 0) ? f : -f; case DMUO_SIN: return sin(f); +case DMUO_COS: +return cos(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -81,5 +83,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_SIN)) return 1; +if (test(DMUO_COS)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/6] dnn_backend_native_layer_mathunary: add asin support
It can be tested with the model generated with below python script: import tensorflow as tf import numpy as np import imageio in_img = imageio.imread('input.jpeg') in_img = in_img.astype(np.float32)/255.0 in_data = in_img[np.newaxis, :] x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in') x1 = tf.asin(x) x2 = tf.divide(x1, 3.1416/2) # pi/2 y = tf.identity(x2, name='dnn_out') sess=tf.Session() sess.run(tf.global_variables_initializer()) graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['dnn_out']) tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False) print("image_process.pb generated, please use \ path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n") output = sess.run(y, feed_dict={x: in_data}) imageio.imsave("out.jpg", np.squeeze(output)) Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index 90fac6aa67..3a147c2b3c 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -92,6 +92,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = tan(src[i]); return 0; +case DMUO_ASIN: +for (int i = 0; i < dims_count; ++i) +dst[i] = asin(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index 40a9bb5fb8..1c25db5a42 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -34,6 +34,7 @@ typedef enum { DMUO_SIN = 1, DMUO_COS = 2, DMUO_TAN = 3, +DMUO_ASIN = 4, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index 9da6a43612..5e526e31ce 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index b7fb0f797a..2b6afe8d13 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 9 +minor = 10 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 4/6] dnn-layer-math-unary-test: add unit test for acos
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index ac26f7445f..540ea4cef5 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -40,6 +40,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return tan(f); case DMUO_ASIN: return asin(f); +case DMUO_ACOS: +return acos(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -93,5 +95,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_ASIN)) return 1; +if (test(DMUO_ACOS)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/6] dnn-layer-math-unary-test: add unit test for asin
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index 9a7e07c98c..ac26f7445f 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -38,6 +38,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return cos(f); case DMUO_TAN: return tan(f); +case DMUO_ASIN: +return asin(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -89,5 +91,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_TAN)) return 1; +if (test(DMUO_ASIN)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 6/6] dnn-layer-math-unary-test: add unit test for atan
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index 540ea4cef5..bf77c44bbe 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -42,6 +42,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return asin(f); case DMUO_ACOS: return acos(f); +case DMUO_ATAN: +return atan(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -97,5 +99,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_ACOS)) return 1; +if (test(DMUO_ATAN)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 3/6] dnn_backend_native_layer_mathunary: add acos support
It can be tested with the model generated with below python script: import tensorflow as tf import numpy as np import imageio in_img = imageio.imread('input.jpeg') in_img = in_img.astype(np.float32)/255.0 in_data = in_img[np.newaxis, :] x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in') x1 = tf.acos(x) x2 = tf.divide(x1, 3.1416/2) # pi/2 y = tf.identity(x2, name='dnn_out') sess=tf.Session() sess.run(tf.global_variables_initializer()) graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['dnn_out']) tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False) print("image_process.pb generated, please use \ path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n") output = sess.run(y, feed_dict={x: in_data}) imageio.imsave("out.jpg", np.squeeze(output)) Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index 3a147c2b3c..d130058546 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -96,6 +96,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = asin(src[i]); return 0; +case DMUO_ACOS: +for (int i = 0; i < dims_count; ++i) +dst[i] = acos(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index 1c25db5a42..f146248567 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -35,6 +35,7 @@ typedef enum { DMUO_COS = 2, DMUO_TAN = 3, DMUO_ASIN = 4, +DMUO_ACOS = 5, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index 5e526e31ce..78297e48a9 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index 2b6afe8d13..4a8e44b4aa 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 10 +minor = 11 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 5/6] dnn_backend_native_layer_mathunary: add atan support
It can be tested with the model generated with below python script: import tensorflow as tf import numpy as np import imageio in_img = imageio.imread('input.jpeg') in_img = in_img.astype(np.float32)/255.0 in_data = in_img[np.newaxis, :] x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in') x1 = tf.atan(x) x2 = tf.divide(x1, 3.1416/4) # pi/4 y = tf.identity(x2, name='dnn_out') sess=tf.Session() sess.run(tf.global_variables_initializer()) graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['dnn_out']) tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False) print("image_process.pb generated, please use \ path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n") output = sess.run(y, feed_dict={x: in_data}) imageio.imsave("out.jpg", np.squeeze(output)) Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index d130058546..42615c43d5 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -100,6 +100,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = acos(src[i]); return 0; +case DMUO_ATAN: +for (int i = 0; i < dims_count; ++i) +dst[i] = atan(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index f146248567..13fa33178a 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -36,6 +36,7 @@ typedef enum { DMUO_TAN = 3, DMUO_ASIN = 4, DMUO_ACOS = 5, +DMUO_ATAN = 6, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index 78297e48a9..b90c31c495 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index 4a8e44b4aa..73cf23bf53 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 11 +minor = 12 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 01/12] dnn_backend_native_layer_mathunary: add sinh support
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index 42615c43d5..2630fe07e2 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -104,6 +104,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = atan(src[i]); return 0; +case DMUO_SINH: +for (int i = 0; i < dims_count; ++i) +dst[i] = sinh(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index 13fa33178a..760930c60e 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -37,6 +37,7 @@ typedef enum { DMUO_ASIN = 4, DMUO_ACOS = 5, DMUO_ATAN = 6, +DMUO_SINH = 7, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index b90c31c495..6f34a71ab4 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index 73cf23bf53..4747f41395 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 12 +minor = 13 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 08/12] dnn-layer-math-unary-test: add unit test for asinh
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index 6885b4d318..90fce71a0c 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -50,6 +50,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return cosh(f); case DMUO_TANH: return tanh(f); +case DMUO_ASINH: +return asinh(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -113,5 +115,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_TANH)) return 1; +if (test(DMUO_ASINH)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 03/12] dnn_backend_native_layer_mathunary: add cosh support
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index 2630fe07e2..ddb70996e7 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -108,6 +108,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = sinh(src[i]); return 0; +case DMUO_COSH: +for (int i = 0; i < dims_count; ++i) +dst[i] = cosh(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index 760930c60e..5a486b4f5f 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -38,6 +38,7 @@ typedef enum { DMUO_ACOS = 5, DMUO_ATAN = 6, DMUO_SINH = 7, +DMUO_COSH = 8, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index 6f34a71ab4..96da44c4a8 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index 4747f41395..a73f51ba48 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 13 +minor = 14 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 05/12] dnn_backend_native_layer_mathunary: add tanh support
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index ddb70996e7..ccdbcc21e0 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -112,6 +112,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = cosh(src[i]); return 0; +case DMUO_TANH: +for (int i = 0; i < dims_count; ++i) +dst[i] = tanh(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index 5a486b4f5f..ae0c1e1cdd 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -39,6 +39,7 @@ typedef enum { DMUO_ATAN = 6, DMUO_SINH = 7, DMUO_COSH = 8, +DMUO_TANH = 9, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index 96da44c4a8..f98a3cae3d 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index a73f51ba48..d2753f0af0 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 14 +minor = 15 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 04/12] dnn-layer-math-unary-test: add unit test for cosh
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index a1ff05e5fb..0280debc0b 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -46,6 +46,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return atan(f); case DMUO_SINH: return sinh(f); +case DMUO_COSH: +return cosh(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -105,5 +107,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_SINH)) return 1; +if (test(DMUO_COSH)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 02/12] dnn-layer-math-unary-test: add unit test for sinh
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index bf77c44bbe..a1ff05e5fb 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -44,6 +44,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return acos(f); case DMUO_ATAN: return atan(f); +case DMUO_SINH: +return sinh(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -101,5 +103,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_ATAN)) return 1; +if (test(DMUO_SINH)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 06/12] dnn-layer-math-unary-test: add unit test for tanh
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index 0280debc0b..6885b4d318 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -48,6 +48,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return sinh(f); case DMUO_COSH: return cosh(f); +case DMUO_TANH: +return tanh(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -109,5 +111,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_COSH)) return 1; +if (test(DMUO_TANH)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 10/12] dnn-layer-math-unary-test: add unit test for acosh
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index 90fce71a0c..5587e47ad5 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -52,6 +52,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return tanh(f); case DMUO_ASINH: return asinh(f); +case DMUO_ACOSH: +return acosh(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -117,5 +119,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_ASINH)) return 1; +if (test(DMUO_ACOSH)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 12/12] dnn-layer-math-unary-test: add unit test for atanh
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index 5587e47ad5..1815f79f34 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -54,6 +54,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return asinh(f); case DMUO_ACOSH: return acosh(f); +case DMUO_ATANH: +return acosh(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -121,5 +123,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_ACOSH)) return 1; +if (test(DMUO_ATANH)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 11/12] dnn_backend_native_layer_mathunary: add atanh support
It can be tested with the model generated with below python script: import tensorflow as tf import numpy as np import imageio in_img = imageio.imread('input.jpeg') in_img = in_img.astype(np.float32)/255.0 in_data = in_img[np.newaxis, :] x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in') please uncomment the part you want to test x_sinh_1 = tf.sinh(x) x_out = tf.divide(x_sinh_1, 1.176) # sinh(1.0) x_cosh_1 = tf.cosh(x) x_out = tf.divide(x_cosh_1, 1.55) # cosh(1.0) x_tanh_1 = tf.tanh(x) x__out = tf.divide(x_tanh_1, 0.77) # tanh(1.0) x_asinh_1 = tf.asinh(x) x_out = tf.divide(x_asinh_1, 0.89) # asinh(1.0/1.1) x_acosh_1 = tf.add(x, 1.1) x_acosh_2 = tf.acosh(x_acosh_1) # accept (1, inf) x_out = tf.divide(x_acosh_2, 1.4) # acosh(2.1) x_atanh_1 = tf.divide(x, 1.1) x_atanh_2 = tf.atanh(x_atanh_1) # accept (-1, 1) x_out = tf.divide(x_atanh_2, 1.55) # atanhh(1.0/1.1) y = tf.identity(x_out, name='dnn_out') #please only preserve the x_out you want to test sess=tf.Session() sess.run(tf.global_variables_initializer()) graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['dnn_out']) tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False) print("image_process.pb generated, please use \ path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n") output = sess.run(y, feed_dict={x: in_data}) imageio.imsave("out.jpg", np.squeeze(output)) Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index b77b84a794..c83d50db64 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -124,6 +124,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = acosh(src[i]); return 0; +case DMUO_ATANH: +for (int i = 0; i < dims_count; ++i) +dst[i] = atanh(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index eb30231549..8076356ba4 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -42,6 +42,7 @@ typedef enum { DMUO_TANH = 9, DMUO_ASINH = 10, DMUO_ACOSH = 11, +DMUO_ATANH = 12, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index 1e73e3aefe..85db7bf710 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, 'Acosh':11} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, 'Acosh':11, 'Atanh':12} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index 8fc3438552..9851d84144 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 17 +minor = 18 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 07/12] dnn_backend_native_layer_mathunary: add asinh support
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index ccdbcc21e0..83df98d0f8 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -116,6 +116,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = tanh(src[i]); return 0; +case DMUO_ASINH: +for (int i = 0; i < dims_count; ++i) +dst[i] = asinh(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index ae0c1e1cdd..fbe9af5c7d 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -40,6 +40,7 @@ typedef enum { DMUO_SINH = 7, DMUO_COSH = 8, DMUO_TANH = 9, +DMUO_ASINH = 10, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index f98a3cae3d..0d756c8109 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index d2753f0af0..3211c13f6d 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 15 +minor = 16 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 09/12] dnn_backend_native_layer_mathunary: add acosh support
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index 83df98d0f8..b77b84a794 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -120,6 +120,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = asinh(src[i]); return 0; +case DMUO_ACOSH: +for (int i = 0; i < dims_count; ++i) +dst[i] = acosh(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index fbe9af5c7d..eb30231549 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -41,6 +41,7 @@ typedef enum { DMUO_COSH = 8, DMUO_TANH = 9, DMUO_ASINH = 10, +DMUO_ACOSH = 11, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index 0d756c8109..1e73e3aefe 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, 'Acosh':11} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index 3211c13f6d..8fc3438552 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 16 +minor = 17 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 03/12] dnn_backend_native_layer_mathunary: add cosh support
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index 2630fe07e2..ddb70996e7 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -108,6 +108,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = sinh(src[i]); return 0; +case DMUO_COSH: +for (int i = 0; i < dims_count; ++i) +dst[i] = cosh(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index 760930c60e..5a486b4f5f 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -38,6 +38,7 @@ typedef enum { DMUO_ACOS = 5, DMUO_ATAN = 6, DMUO_SINH = 7, +DMUO_COSH = 8, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index 6f34a71ab4..96da44c4a8 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index 4747f41395..a73f51ba48 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 13 +minor = 14 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 02/12] dnn-layer-math-unary-test: add unit test for sinh
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index bf77c44bbe..a1ff05e5fb 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -44,6 +44,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return acos(f); case DMUO_ATAN: return atan(f); +case DMUO_SINH: +return sinh(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -101,5 +103,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_ATAN)) return 1; +if (test(DMUO_SINH)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 01/12] dnn_backend_native_layer_mathunary: add sinh support
Signed-off-by: Ting Fu --- V2: Add more test number Fix incorrect atanh unit test libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index 42615c43d5..2630fe07e2 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -104,6 +104,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = atan(src[i]); return 0; +case DMUO_SINH: +for (int i = 0; i < dims_count; ++i) +dst[i] = sinh(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index 13fa33178a..760930c60e 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -37,6 +37,7 @@ typedef enum { DMUO_ASIN = 4, DMUO_ACOS = 5, DMUO_ATAN = 6, +DMUO_SINH = 7, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index b90c31c495..6f34a71ab4 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index 73cf23bf53..4747f41395 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 12 +minor = 13 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 04/12] dnn-layer-math-unary-test: add unit test for cosh
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index a1ff05e5fb..0280debc0b 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -46,6 +46,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return atan(f); case DMUO_SINH: return sinh(f); +case DMUO_COSH: +return cosh(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -105,5 +107,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_SINH)) return 1; +if (test(DMUO_COSH)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 05/12] dnn_backend_native_layer_mathunary: add tanh support
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index ddb70996e7..ccdbcc21e0 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -112,6 +112,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = cosh(src[i]); return 0; +case DMUO_TANH: +for (int i = 0; i < dims_count; ++i) +dst[i] = tanh(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index 5a486b4f5f..ae0c1e1cdd 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -39,6 +39,7 @@ typedef enum { DMUO_ATAN = 6, DMUO_SINH = 7, DMUO_COSH = 8, +DMUO_TANH = 9, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index 96da44c4a8..f98a3cae3d 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index a73f51ba48..d2753f0af0 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 14 +minor = 15 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 10/12] dnn-layer-math-unary-test: add unit test for acosh
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index 90fce71a0c..5587e47ad5 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -52,6 +52,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return tanh(f); case DMUO_ASINH: return asinh(f); +case DMUO_ACOSH: +return acosh(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -117,5 +119,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_ASINH)) return 1; +if (test(DMUO_ACOSH)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 09/12] dnn_backend_native_layer_mathunary: add acosh support
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index 83df98d0f8..b77b84a794 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -120,6 +120,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = asinh(src[i]); return 0; +case DMUO_ACOSH: +for (int i = 0; i < dims_count; ++i) +dst[i] = acosh(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index fbe9af5c7d..eb30231549 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -41,6 +41,7 @@ typedef enum { DMUO_COSH = 8, DMUO_TANH = 9, DMUO_ASINH = 10, +DMUO_ACOSH = 11, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index 0d756c8109..1e73e3aefe 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, 'Acosh':11} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index 3211c13f6d..8fc3438552 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 16 +minor = 17 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 06/12] dnn-layer-math-unary-test: add unit test for tanh
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index 0280debc0b..6885b4d318 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -48,6 +48,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return sinh(f); case DMUO_COSH: return cosh(f); +case DMUO_TANH: +return tanh(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -109,5 +111,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_COSH)) return 1; +if (test(DMUO_TANH)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 11/12] dnn_backend_native_layer_mathunary: add atanh support
It can be tested with the model generated with below python script: import tensorflow as tf import numpy as np import imageio in_img = imageio.imread('input.jpeg') in_img = in_img.astype(np.float32)/255.0 in_data = in_img[np.newaxis, :] x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in') please uncomment the part you want to test x_sinh_1 = tf.sinh(x) x_out = tf.divide(x_sinh_1, 1.176) # sinh(1.0) x_cosh_1 = tf.cosh(x) x_out = tf.divide(x_cosh_1, 1.55) # cosh(1.0) x_tanh_1 = tf.tanh(x) x__out = tf.divide(x_tanh_1, 0.77) # tanh(1.0) x_asinh_1 = tf.asinh(x) x_out = tf.divide(x_asinh_1, 0.89) # asinh(1.0/1.1) x_acosh_1 = tf.add(x, 1.1) x_acosh_2 = tf.acosh(x_acosh_1) # accept (1, inf) x_out = tf.divide(x_acosh_2, 1.4) # acosh(2.1) x_atanh_1 = tf.divide(x, 1.1) x_atanh_2 = tf.atanh(x_atanh_1) # accept (-1, 1) x_out = tf.divide(x_atanh_2, 1.55) # atanhh(1.0/1.1) y = tf.identity(x_out, name='dnn_out') #please only preserve the x_out you want to test sess=tf.Session() sess.run(tf.global_variables_initializer()) graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['dnn_out']) tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False) print("image_process.pb generated, please use \ path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n") output = sess.run(y, feed_dict={x: in_data}) imageio.imsave("out.jpg", np.squeeze(output)) Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index b77b84a794..c83d50db64 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -124,6 +124,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = acosh(src[i]); return 0; +case DMUO_ATANH: +for (int i = 0; i < dims_count; ++i) +dst[i] = atanh(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index eb30231549..8076356ba4 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -42,6 +42,7 @@ typedef enum { DMUO_TANH = 9, DMUO_ASINH = 10, DMUO_ACOSH = 11, +DMUO_ATANH = 12, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index 1e73e3aefe..85db7bf710 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, 'Acosh':11} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, 'Acosh':11, 'Atanh':12} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index 8fc3438552..9851d84144 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 17 +minor = 18 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 07/12] dnn_backend_native_layer_mathunary: add asinh support
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_native_layer_mathunary.c | 4 libavfilter/dnn/dnn_backend_native_layer_mathunary.h | 1 + tools/python/convert_from_tensorflow.py | 2 +- tools/python/convert_header.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c index ccdbcc21e0..83df98d0f8 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.c +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.c @@ -116,6 +116,10 @@ int dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_oper for (int i = 0; i < dims_count; ++i) dst[i] = tanh(src[i]); return 0; +case DMUO_ASINH: +for (int i = 0; i < dims_count; ++i) +dst[i] = asinh(src[i]); +return 0; default: return -1; } diff --git a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h index ae0c1e1cdd..fbe9af5c7d 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_mathunary.h +++ b/libavfilter/dnn/dnn_backend_native_layer_mathunary.h @@ -40,6 +40,7 @@ typedef enum { DMUO_SINH = 7, DMUO_COSH = 8, DMUO_TANH = 9, +DMUO_ASINH = 10, DMUO_COUNT } DNNMathUnaryOperation; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index f98a3cae3d..0d756c8109 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -72,7 +72,7 @@ class TFConverter: self.conv2d_scopename_inputname_dict = {} self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} -self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9} +self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} self.name_operand_dict = {} diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index d2753f0af0..3211c13f6d 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE' major = 1 # increase minor when we don't have to re-convert the model file -minor = 15 +minor = 16 -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 08/12] dnn-layer-math-unary-test: add unit test for asinh
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 4 1 file changed, 4 insertions(+) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index 6885b4d318..90fce71a0c 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -50,6 +50,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return cosh(f); case DMUO_TANH: return tanh(f); +case DMUO_ASINH: +return asinh(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -113,5 +115,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_TANH)) return 1; +if (test(DMUO_ASINH)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 12/12] dnn-layer-math-unary-test: add unit test for atanh
Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index 5587e47ad5..683e623d95 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -54,6 +54,8 @@ static float get_expected(float f, DNNMathUnaryOperation op) return asinh(f); case DMUO_ACOSH: return acosh(f); +case DMUO_ATANH: +return atanh(f); default: av_assert0(!"not supported yet"); return 0.f; @@ -65,8 +67,8 @@ static int test(DNNMathUnaryOperation op) DnnLayerMathUnaryParams params; DnnOperand operands[2]; int32_t input_indexes[1]; -float input[1*1*2*3] = { --3, 2.5, 2, -2.1, 7.8, 100}; +float input[1*1*3*3] = { +0.1, 0.5, 0.75, -3, 2.5, 2, -2.1, 7.8, 100}; float *output; params.un_op = op; @@ -74,7 +76,7 @@ static int test(DNNMathUnaryOperation op) operands[0].data = input; operands[0].dims[0] = 1; operands[0].dims[1] = 1; -operands[0].dims[2] = 2; +operands[0].dims[2] = 3; operands[0].dims[3] = 3; operands[1].data = NULL; @@ -121,5 +123,7 @@ int main(int agrc, char **argv) return 1; if (test(DMUO_ACOSH)) return 1; +if (test(DMUO_ATANH)) +return 1; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 3/3] avfilter/x86/vf_eq: add SSE2 version
Signed-off-by: Ting Fu --- libavfilter/x86/vf_eq.asm| 19 +-- libavfilter/x86/vf_eq_init.c | 20 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm index bf28691297..d6b51cf6df 100644 --- a/libavfilter/x86/vf_eq.asm +++ b/libavfilter/x86/vf_eq.asm @@ -24,14 +24,21 @@ SECTION .text -INIT_MMX mmx +%macro PROCESS_ONE_LINE 1 cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w movd m3, contrastd movd m4, brightnessd movsx r5d, contrastw movsx r6d, brightnessw +%if mmsize == 8 pshufw m3, m3, 0 pshufw m4, m4, 0 +%elif mmsize == 16 +pshuflw m3, m3, 0 +movlhps m3, m3 +pshuflw m4, m4, 0 +movlhps m4, m4 +%endif DEFINE_ARGS src, dst, tmp, scalar, w xor tmpd, tmpd @@ -39,7 +46,7 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w pxor m1, m1 mov scalard, wd and scalard, mmsize-1 -sar wd, 3 +sar wd, %1 cmp wd, 1 jl .loop1 @@ -80,3 +87,11 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w .end: RET + +%endmacro + +INIT_MMX mmx +PROCESS_ONE_LINE 3 + +INIT_XMM sse2 +PROCESS_ONE_LINE 4 diff --git a/libavfilter/x86/vf_eq_init.c b/libavfilter/x86/vf_eq_init.c index 63c69078fb..cdd5272220 100644 --- a/libavfilter/x86/vf_eq_init.c +++ b/libavfilter/x86/vf_eq_init.c @@ -28,6 +28,8 @@ extern void ff_process_one_line_mmx(const uint8_t *src, uint8_t *dst, int contvec, int brvec, int w); +extern void ff_process_one_line_sse2(const uint8_t *src, uint8_t *dst, int contvec, +int brvec, int w); static void process_mmx(EQParameters *param, uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int w, int h) @@ -44,6 +46,21 @@ static void process_mmx(EQParameters *param, uint8_t *dst, int dst_stride, emms_c(); } +static void process_sse2(EQParameters *param, uint8_t *dst, int dst_stride, +const uint8_t *src, int src_stride, int w, int h) +{ +short contrast = (short) (param->contrast * 256 * 16); +short brightness = ((short) (100.0 * param->brightness + 100.0) * 511) + / 200 - 128 - contrast / 32; + +while (h--) { +ff_process_one_line_sse2(src, dst, contrast, brightness, w); +src += src_stride; +dst += dst_stride; +} +emms_c(); +} + av_cold void ff_eq_init_x86(EQContext *eq) { int cpu_flags = av_get_cpu_flags(); @@ -51,5 +68,8 @@ av_cold void ff_eq_init_x86(EQContext *eq) if (cpu_flags & AV_CPU_FLAG_MMX) { eq->process = process_mmx; } +if (cpu_flags & AV_CPU_FLAG_SSE2) { +eq->process = process_sse2; +} } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/3] checkasm/vf_eq: add test for vf_eq
Signed-off-by: Ting Fu --- libavfilter/vf_eq.c | 13 --- libavfilter/vf_eq.h | 1 + tests/checkasm/Makefile | 1 + tests/checkasm/checkasm.c | 3 ++ tests/checkasm/checkasm.h | 1 + tests/checkasm/vf_eq.c| 79 +++ tests/fate/checkasm.mak | 1 + 7 files changed, 94 insertions(+), 5 deletions(-) create mode 100644 tests/checkasm/vf_eq.c diff --git a/libavfilter/vf_eq.c b/libavfilter/vf_eq.c index 2c4c7e4d54..0f9d129255 100644 --- a/libavfilter/vf_eq.c +++ b/libavfilter/vf_eq.c @@ -174,12 +174,18 @@ static int set_expr(AVExpr **pexpr, const char *expr, const char *option, void * return 0; } +void ff_eq_init(EQContext *eq) +{ +eq->process = process_c; +if (ARCH_X86) +ff_eq_init_x86(eq); +} + static int initialize(AVFilterContext *ctx) { EQContext *eq = ctx->priv; int ret; - -eq->process = process_c; +ff_eq_init(eq); if ((ret = set_expr(&eq->contrast_pexpr, eq->contrast_expr, "contrast", ctx)) < 0 || (ret = set_expr(&eq->brightness_pexpr, eq->brightness_expr, "brightness", ctx)) < 0 || @@ -191,9 +197,6 @@ static int initialize(AVFilterContext *ctx) (ret = set_expr(&eq->gamma_weight_pexpr, eq->gamma_weight_expr, "gamma_weight", ctx)) < 0 ) return ret; -if (ARCH_X86) -ff_eq_init_x86(eq); - if (eq->eval_mode == EVAL_MODE_INIT) { set_gamma(eq); set_contrast(eq); diff --git a/libavfilter/vf_eq.h b/libavfilter/vf_eq.h index fa49d46e5c..cd0cd75f08 100644 --- a/libavfilter/vf_eq.h +++ b/libavfilter/vf_eq.h @@ -100,6 +100,7 @@ typedef struct EQContext { enum EvalMode { EVAL_MODE_INIT, EVAL_MODE_FRAME, EVAL_MODE_NB } eval_mode; } EQContext; +void ff_eq_init(EQContext *eq); void ff_eq_init_x86(EQContext *eq); #endif /* AVFILTER_EQ_H */ diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 0112ff603e..de850c016e 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -36,6 +36,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes) AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o +AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o AVFILTEROBJS-$(CONFIG_HFLIP_FILTER) += vf_hflip.o AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index d9a5c7f401..bcbe775510 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -165,6 +165,9 @@ static const struct { #if CONFIG_COLORSPACE_FILTER { "vf_colorspace", checkasm_check_colorspace }, #endif +#if CONFIG_EQ_FILTER +{ "vf_eq", checkasm_check_vf_eq }, +#endif #if CONFIG_GBLUR_FILTER { "vf_gblur", checkasm_check_vf_gblur }, #endif diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index fdf9eeb75d..0a7f9f25c4 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -72,6 +72,7 @@ void checkasm_check_sw_rgb(void); void checkasm_check_utvideodsp(void); void checkasm_check_v210dec(void); void checkasm_check_v210enc(void); +void checkasm_check_vf_eq(void); void checkasm_check_vf_gblur(void); void checkasm_check_vf_hflip(void); void checkasm_check_vf_threshold(void); diff --git a/tests/checkasm/vf_eq.c b/tests/checkasm/vf_eq.c new file mode 100644 index 00..b1a6a61e05 --- /dev/null +++ b/tests/checkasm/vf_eq.c @@ -0,0 +1,79 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include "checkasm.h" +#include "libavfilter/avfilter.h" +#include "libavfilter/vf_eq.h" +#include "libavutil/intreadwrite.h" + +#define WIDTH 256 +#define HEIGHT 256 +#define SRC_STRIDE 256 +#define PIXELS (WIDTH * HEIGHT) +#define RANDOM_RANGE 8 +#define SCALE 1 + +#define randomize_buffers(buf, size) \ +do { \ +int j;\ +uint8_t *tmp_buf = (uint
[FFmpeg-devel] [PATCH 2/3] avfilter/x86/vf_eq: Change inline assembly into nasm code
Signed-off-by: Ting Fu --- libavfilter/x86/Makefile | 3 +- libavfilter/x86/vf_eq.asm| 82 ++ libavfilter/x86/vf_eq.c | 96 libavfilter/x86/vf_eq_init.c | 55 + 4 files changed, 139 insertions(+), 97 deletions(-) create mode 100644 libavfilter/x86/vf_eq.asm delete mode 100644 libavfilter/x86/vf_eq.c create mode 100644 libavfilter/x86/vf_eq_init.c diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile index f12993e606..f2922c4597 100644 --- a/libavfilter/x86/Makefile +++ b/libavfilter/x86/Makefile @@ -6,7 +6,7 @@ OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o OBJS-$(CONFIG_CONVOLUTION_FILTER)+= x86/vf_convolution_init.o -OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o +OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur_init.o OBJS-$(CONFIG_GRADFUN_FILTER)+= x86/vf_gradfun_init.o @@ -43,6 +43,7 @@ X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o +X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_eq.o X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER) += x86/vf_framerate.o X86ASM-OBJS-$(CONFIG_FSPP_FILTER)+= x86/vf_fspp.o X86ASM-OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur.o diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm new file mode 100644 index 00..bf28691297 --- /dev/null +++ b/libavfilter/x86/vf_eq.asm @@ -0,0 +1,82 @@ +;* +;* x86-optimized functions for eq filter +;* +;* Original MPlayer filters by Richard Felker. +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or modify +;* it under the terms of the GNU General Public License as published by +;* the Free Software Foundation; either version 2 of the License, or +;* (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;* GNU General Public License for more details. +;* +;* You should have received a copy of the GNU General Public License along +;* with FFmpeg; if not, write to the Free Software Foundation, Inc., +;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +;* + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +INIT_MMX mmx +cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w +movd m3, contrastd +movd m4, brightnessd +movsx r5d, contrastw +movsx r6d, brightnessw +pshufw m3, m3, 0 +pshufw m4, m4, 0 + +DEFINE_ARGS src, dst, tmp, scalar, w +xor tmpd, tmpd +pxor m0, m0 +pxor m1, m1 +mov scalard, wd +and scalard, mmsize-1 +sar wd, 3 +cmp wd, 1 +jl .loop1 + +.loop0: +movu m1, [srcq] +mova m2, m1 +punpcklbw m1, m0 +punpckhbw m2, m0 +psllw m1, 4 +psllw m2, 4 +pmulhw m1, m3 +pmulhw m2, m3 +paddw m1, m4 +paddw m2, m4 +packuswb m1, m2 +movu [dstq], m1 +add srcq, mmsize +add dstq, mmsize +sub wd, 1 +cmp wd, 0 +jne .loop0 + +.loop1: +cmp scalard, 0 +je .end +movzx tmpd, byte [srcq] +imul tmpd, r5d +sar tmpd, 12 +add tmpd, r6d +movd m1, tmpd +packuswb m1, m0 +movd tmpd, m1 +mov [dstq], tmpb +inc srcq +inc dstq +dec scalard +jmp .loop1 + +.end: +RET diff --git a/libavfilter/x86/vf_eq.c b/libavfilter/x86/vf_eq.c deleted file mode 100644 index 16f399505f..00 --- a/libavfilter/x86/vf_eq.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * - * Original MPlayer filters by Richard Felker. - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for mo
[FFmpeg-devel] [PATCH V2 1/3] checkasm/vf_eq: add test for vf_eq
Signed-off-by: Ting Fu --- libavfilter/vf_eq.c | 13 --- libavfilter/vf_eq.h | 1 + tests/checkasm/Makefile | 1 + tests/checkasm/checkasm.c | 3 ++ tests/checkasm/checkasm.h | 1 + tests/checkasm/vf_eq.c| 79 +++ tests/fate/checkasm.mak | 1 + 7 files changed, 94 insertions(+), 5 deletions(-) create mode 100644 tests/checkasm/vf_eq.c diff --git a/libavfilter/vf_eq.c b/libavfilter/vf_eq.c index 2c4c7e4d54..0f9d129255 100644 --- a/libavfilter/vf_eq.c +++ b/libavfilter/vf_eq.c @@ -174,12 +174,18 @@ static int set_expr(AVExpr **pexpr, const char *expr, const char *option, void * return 0; } +void ff_eq_init(EQContext *eq) +{ +eq->process = process_c; +if (ARCH_X86) +ff_eq_init_x86(eq); +} + static int initialize(AVFilterContext *ctx) { EQContext *eq = ctx->priv; int ret; - -eq->process = process_c; +ff_eq_init(eq); if ((ret = set_expr(&eq->contrast_pexpr, eq->contrast_expr, "contrast", ctx)) < 0 || (ret = set_expr(&eq->brightness_pexpr, eq->brightness_expr, "brightness", ctx)) < 0 || @@ -191,9 +197,6 @@ static int initialize(AVFilterContext *ctx) (ret = set_expr(&eq->gamma_weight_pexpr, eq->gamma_weight_expr, "gamma_weight", ctx)) < 0 ) return ret; -if (ARCH_X86) -ff_eq_init_x86(eq); - if (eq->eval_mode == EVAL_MODE_INIT) { set_gamma(eq); set_contrast(eq); diff --git a/libavfilter/vf_eq.h b/libavfilter/vf_eq.h index fa49d46e5c..cd0cd75f08 100644 --- a/libavfilter/vf_eq.h +++ b/libavfilter/vf_eq.h @@ -100,6 +100,7 @@ typedef struct EQContext { enum EvalMode { EVAL_MODE_INIT, EVAL_MODE_FRAME, EVAL_MODE_NB } eval_mode; } EQContext; +void ff_eq_init(EQContext *eq); void ff_eq_init_x86(EQContext *eq); #endif /* AVFILTER_EQ_H */ diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 0112ff603e..de850c016e 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -36,6 +36,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes) AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o +AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o AVFILTEROBJS-$(CONFIG_HFLIP_FILTER) += vf_hflip.o AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index d9a5c7f401..bcbe775510 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -165,6 +165,9 @@ static const struct { #if CONFIG_COLORSPACE_FILTER { "vf_colorspace", checkasm_check_colorspace }, #endif +#if CONFIG_EQ_FILTER +{ "vf_eq", checkasm_check_vf_eq }, +#endif #if CONFIG_GBLUR_FILTER { "vf_gblur", checkasm_check_vf_gblur }, #endif diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index fdf9eeb75d..0a7f9f25c4 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -72,6 +72,7 @@ void checkasm_check_sw_rgb(void); void checkasm_check_utvideodsp(void); void checkasm_check_v210dec(void); void checkasm_check_v210enc(void); +void checkasm_check_vf_eq(void); void checkasm_check_vf_gblur(void); void checkasm_check_vf_hflip(void); void checkasm_check_vf_threshold(void); diff --git a/tests/checkasm/vf_eq.c b/tests/checkasm/vf_eq.c new file mode 100644 index 00..684718f2cd --- /dev/null +++ b/tests/checkasm/vf_eq.c @@ -0,0 +1,79 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include "checkasm.h" +#include "libavfilter/avfilter.h" +#include "libavfilter/vf_eq.h" +#include "libavutil/intreadwrite.h" + +#define WIDTH 256 +#define HEIGHT 256 +#define SRC_STRIDE 256 +#define PIXELS (WIDTH * HEIGHT) +#define RANDOM_RANGE 8 +#define SCALE 1 + +#define randomize_buffers(buf, size) \ +do { \ +int j;\ +uint8_t *tmp_buf = (uint
[FFmpeg-devel] [PATCH V2 3/3] avfilter/x86/vf_eq: add SSE2 version
Signed-off-by: Ting Fu --- libavfilter/x86/vf_eq.asm| 12 ++-- libavfilter/x86/vf_eq_init.c | 19 +++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm index 8460342896..a30a287029 100644 --- a/libavfilter/x86/vf_eq.asm +++ b/libavfilter/x86/vf_eq.asm @@ -24,7 +24,7 @@ SECTION .text -INIT_MMX mmxext +%macro PROCESS_ONE_LINE 1 cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w movd m3, contrastd movd m4, brightnessd @@ -39,7 +39,7 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w pxor m1, m1 mov scalard, wd and scalard, mmsize-1 -sar wd, 3 +sar wd, %1 cmp wd, 1 jl .loop1 @@ -80,3 +80,11 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w .end: RET + +%endmacro + +INIT_MMX mmxext +PROCESS_ONE_LINE 3 + +INIT_XMM sse2 +PROCESS_ONE_LINE 4 diff --git a/libavfilter/x86/vf_eq_init.c b/libavfilter/x86/vf_eq_init.c index c18db75545..8ad9f4bcaf 100644 --- a/libavfilter/x86/vf_eq_init.c +++ b/libavfilter/x86/vf_eq_init.c @@ -28,6 +28,8 @@ extern void ff_process_one_line_mmxext(const uint8_t *src, uint8_t *dst, short contrast, short brightness, int w); +extern void ff_process_one_line_sse2(const uint8_t *src, uint8_t *dst, short contrast, + short brightness, int w); static void process_mmxext(EQParameters *param, uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int w, int h) @@ -44,6 +46,20 @@ static void process_mmxext(EQParameters *param, uint8_t *dst, int dst_stride, emms_c(); } +static void process_sse2(EQParameters *param, uint8_t *dst, int dst_stride, + const uint8_t *src, int src_stride, int w, int h) +{ +short contrast = (short) (param->contrast * 256 * 16); +short brightness = ((short) (100.0 * param->brightness + 100.0) * 511) + / 200 - 128 - contrast / 32; + +while (h--) { +ff_process_one_line_sse2(src, dst, contrast, brightness, w); +src += src_stride; +dst += dst_stride; +} +} + av_cold void ff_eq_init_x86(EQContext *eq) { int cpu_flags = av_get_cpu_flags(); @@ -51,5 +67,8 @@ av_cold void ff_eq_init_x86(EQContext *eq) if (cpu_flags & AV_CPU_FLAG_MMXEXT) { eq->process = process_mmxext; } +if (cpu_flags & AV_CPU_FLAG_SSE2) { +eq->process = process_sse2; +} } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 2/3] avfilter/x86/vf_eq: Change inline assembly into nasm code
Signed-off-by: Ting Fu --- libavfilter/x86/Makefile | 3 +- libavfilter/x86/vf_eq.asm| 82 ++ libavfilter/x86/vf_eq.c | 96 libavfilter/x86/vf_eq_init.c | 55 + 4 files changed, 139 insertions(+), 97 deletions(-) create mode 100644 libavfilter/x86/vf_eq.asm delete mode 100644 libavfilter/x86/vf_eq.c create mode 100644 libavfilter/x86/vf_eq_init.c diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile index f12993e606..f2922c4597 100644 --- a/libavfilter/x86/Makefile +++ b/libavfilter/x86/Makefile @@ -6,7 +6,7 @@ OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o OBJS-$(CONFIG_CONVOLUTION_FILTER)+= x86/vf_convolution_init.o -OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o +OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur_init.o OBJS-$(CONFIG_GRADFUN_FILTER)+= x86/vf_gradfun_init.o @@ -43,6 +43,7 @@ X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o +X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_eq.o X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER) += x86/vf_framerate.o X86ASM-OBJS-$(CONFIG_FSPP_FILTER)+= x86/vf_fspp.o X86ASM-OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur.o diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm new file mode 100644 index 00..8460342896 --- /dev/null +++ b/libavfilter/x86/vf_eq.asm @@ -0,0 +1,82 @@ +;* +;* x86-optimized functions for eq filter +;* +;* Original MPlayer filters by Richard Felker. +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or modify +;* it under the terms of the GNU General Public License as published by +;* the Free Software Foundation; either version 2 of the License, or +;* (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;* GNU General Public License for more details. +;* +;* You should have received a copy of the GNU General Public License along +;* with FFmpeg; if not, write to the Free Software Foundation, Inc., +;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +;* + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +INIT_MMX mmxext +cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w +movd m3, contrastd +movd m4, brightnessd +movsx r5d, contrastw +movsx r6d, brightnessw +SPLATW m3, m3, 0 +SPLATW m4, m4, 0 + +DEFINE_ARGS src, dst, tmp, scalar, w +xor tmpd, tmpd +pxor m0, m0 +pxor m1, m1 +mov scalard, wd +and scalard, mmsize-1 +sar wd, 3 +cmp wd, 1 +jl .loop1 + +.loop0: +movu m1, [srcq] +mova m2, m1 +punpcklbw m1, m0 +punpckhbw m2, m0 +psllw m1, 4 +psllw m2, 4 +pmulhw m1, m3 +pmulhw m2, m3 +paddw m1, m4 +paddw m2, m4 +packuswb m1, m2 +movu [dstq], m1 +add srcq, mmsize +add dstq, mmsize +sub wd, 1 +cmp wd, 0 +jne .loop0 + +.loop1: +cmp scalard, 0 +je .end +movzx tmpd, byte [srcq] +imul tmpd, r5d +sar tmpd, 12 +add tmpd, r6d +movd m1, tmpd +packuswb m1, m0 +movd tmpd, m1 +mov [dstq], tmpb +inc srcq +inc dstq +dec scalard +jmp .loop1 + +.end: +RET diff --git a/libavfilter/x86/vf_eq.c b/libavfilter/x86/vf_eq.c deleted file mode 100644 index 16f399505f..00 --- a/libavfilter/x86/vf_eq.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * - * Original MPlayer filters by Richard Felker. - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public Licens
[FFmpeg-devel] [PATCH 2/2] libswscale/x86/yuv2rgb: add ssse3 version
Signed-off-by: Ting Fu --- libswscale/x86/yuv2rgb.c | 5 + libswscale/x86/yuv2rgb_template.c | 58 ++- libswscale/x86/yuv_2_rgb.asm | 163 +++--- 3 files changed, 208 insertions(+), 18 deletions(-) diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 70412a3914..d983934762 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -61,6 +61,11 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; #define COMPILE_TEMPLATE_MMXEXT 1 #endif /* HAVE_MMXEXT */ +//SSSE3 versions +#if HAVE_SSSE3 +#define COMPILE_TEMPLATE_SSSE3 1 +#endif + #include "yuv2rgb_template.c" av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c index efe6356f30..fe586047f0 100644 --- a/libswscale/x86/yuv2rgb_template.c +++ b/libswscale/x86/yuv2rgb_template.c @@ -40,6 +40,30 @@ const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ x86_reg index = -h_size / 2; \ +extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint8_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint8_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint8_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint8_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint8_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint8_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, +const uint8_t *pv_index, const uint8_t *pointer_c_dither, +const uint8_t *py_2index, const uint8_t *pa_2index); +extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, +const uint8_t *pv_index, const uint8_t *pointer_c_dither, +const uint8_t *py_2index, const uint8_t *pa_2index); extern void ff_yuv_420_rgb24_mmxext(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint8_t *pointer_c_dither, const uint8_t *py_2index); @@ -84,7 +108,12 @@ static inline int yuv420_rgb15(SwsContext *c, const uint8_t *src[], c->greenDither = ff_dither8[y & 1]; c->redDither = ff_dither8[(y + 1) & 1]; #endif + +#if COMPILE_TEMPLATE_SSSE3 +ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#else ff_yuv_420_rgb15_mmx(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#endif } return srcSliceH; } @@ -102,7 +131,12 @@ static inline int yuv420_rgb16(SwsContext *c, const uint8_t *src[], c->greenDither = ff_dither4[y & 1]; c->redDither = ff_dither8[(y + 1) & 1]; #endif + +#if COMPILE_TEMPLATE_SSSE3 +ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#else ff_yuv_420_rgb16_mmx(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#endif } return srcSliceH; } @@ -115,7 +149,9 @@ static inline int yuv420_rgb24(SwsContext *c, const uint8_t *src[], int y, h_size, vshift; YUV2RGB_LOOP(3) -#if COMPILE_TEMPLATE_MMXEXT +#if COMPILE_TEMPLATE_SSSE3 +ff_yuv_420_rgb24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#elif COMPILE_TEMPLATE_MMXEXT ff_yuv_420_rgb24_mmxext(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); #else ff_yuv_420_rgb24_mmx(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); @@ -132,7 +168
[FFmpeg-devel] [PATCH 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code
Signed-off-by: Ting Fu --- libswscale/x86/Makefile | 1 + libswscale/x86/swscale.c | 16 +- libswscale/x86/yuv2rgb.c | 81 ++ libswscale/x86/yuv2rgb_template.c | 441 ++ libswscale/x86/yuv_2_rgb.asm | 270 ++ 5 files changed, 394 insertions(+), 415 deletions(-) create mode 100644 libswscale/x86/yuv_2_rgb.asm diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile index f317d5dd9b..831d5359aa 100644 --- a/libswscale/x86/Makefile +++ b/libswscale/x86/Makefile @@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o \ x86/output.o \ x86/scale.o \ x86/rgb_2_rgb.o \ + x86/yuv_2_rgb.o \ diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 0eed4f18d5..e9d474a1e8 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -29,6 +29,14 @@ #include "libavutil/cpu.h" #include "libavutil/pixdesc.h" +const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { +0x0103010301030103LL, +0x0200020002000200LL,}; + +const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { +0x0602060206020602LL, +0x0004000400040004LL,}; + #if HAVE_INLINE_ASM #define DITHER1XBPP @@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { -0x0103010301030103LL, -0x0200020002000200LL,}; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { -0x0602060206020602LL, -0x0004000400040004LL,}; - DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 5e2f77c20f..70412a3914 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -37,7 +37,7 @@ #include "libavutil/x86/cpu.h" #include "libavutil/cpu.h" -#if HAVE_INLINE_ASM +#if HAVE_X86ASM #define DITHER1XBPP // only for MMX @@ -50,70 +50,49 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; //MMX versions -#if HAVE_MMX_INLINE && HAVE_6REGS -#undef RENAME +#if HAVE_MMX #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 0 -#define RENAME(a) a ## _mmx -#include "yuv2rgb_template.c" -#endif /* HAVE_MMX_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMX */ // MMXEXT versions -#if HAVE_MMXEXT_INLINE && HAVE_6REGS -#undef RENAME +#if HAVE_MMXEXT #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _mmxext -#include "yuv2rgb_template.c" -#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMXEXT */ -#endif /* HAVE_INLINE_ASM */ +#include "yuv2rgb_template.c" av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { -#if HAVE_MMX_INLINE && HAVE_6REGS int cpu_flags = av_get_cpu_flags(); -#if HAVE_MMXEXT_INLINE -if (INLINE_MMXEXT(cpu_flags)) { -switch (c->dstFormat) { -case AV_PIX_FMT_RGB24: -return yuv420_rgb24_mmxext; -case AV_PIX_FMT_BGR24: -return yuv420_bgr24_mmxext; -} -} +switch (c->dstFormat) { +case AV_PIX_FMT_RGB32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_rgb32; #endif - -if (INLINE_MMX(cpu_flags)) { -switch (c->dstFormat) { -case AV_PIX_FMT_RGB32: -if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -return yuva420_rgb32_mmx; -#endif -break; -} else -return yuv420_rgb32_mmx; -case AV_PIX_FMT_BGR32: -if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -return yuva420_bgr32_mmx; +break; +} else +return yuv420_rgb32; +case AV_PIX_FMT_BGR32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_bgr32; #endif -break; -} else -return yuv420_bgr32_mmx; -case AV_PIX_FMT_RGB24: -return yuv420_rgb24_mmx; -case AV_PIX_FMT_BGR24: -return yuv420_bgr24_mmx; -case AV_PIX_FMT_RGB565: -
[FFmpeg-devel] [PATCH V2 2/2] libswscale/x86/yuv2rgb: add ssse3 version
Tested using this command: /ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \ -vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null The fps increase from 389 to 640 on my local machine. Signed-off-by: Ting Fu --- libswscale/x86/yuv2rgb.c | 8 +- libswscale/x86/yuv2rgb_template.c | 58 ++- libswscale/x86/yuv_2_rgb.asm | 162 +++--- 3 files changed, 209 insertions(+), 19 deletions(-) diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index ed9b613cab..b83dd7089a 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -61,13 +61,19 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; #define COMPILE_TEMPLATE_MMXEXT 1 #endif /* HAVE_MMXEXT */ +//SSSE3 versions +#if HAVE_SSSE3 +#define COMPILE_TEMPLATE_SSSE3 1 +#endif + #include "yuv2rgb_template.c" av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) { +if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags) || +EXTERNAL_SSSE3(cpu_flags)) { switch (c->dstFormat) { case AV_PIX_FMT_RGB32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c index efe6356f30..fe586047f0 100644 --- a/libswscale/x86/yuv2rgb_template.c +++ b/libswscale/x86/yuv2rgb_template.c @@ -40,6 +40,30 @@ const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ x86_reg index = -h_size / 2; \ +extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint8_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint8_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint8_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint8_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint8_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint8_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, +const uint8_t *pv_index, const uint8_t *pointer_c_dither, +const uint8_t *py_2index, const uint8_t *pa_2index); +extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, +const uint8_t *pv_index, const uint8_t *pointer_c_dither, +const uint8_t *py_2index, const uint8_t *pa_2index); extern void ff_yuv_420_rgb24_mmxext(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint8_t *pointer_c_dither, const uint8_t *py_2index); @@ -84,7 +108,12 @@ static inline int yuv420_rgb15(SwsContext *c, const uint8_t *src[], c->greenDither = ff_dither8[y & 1]; c->redDither = ff_dither8[(y + 1) & 1]; #endif + +#if COMPILE_TEMPLATE_SSSE3 +ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#else ff_yuv_420_rgb15_mmx(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#endif } return srcSliceH; } @@ -102,7 +131,12 @@ static inline int yuv420_rgb16(SwsContext *c, const uint8_t *src[], c->greenDither = ff_dither4[y & 1]; c->redDither = ff_dither8[(y + 1) & 1]; #endif + +#if COMPILE_TEMPLATE_SSSE3 +ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#else ff_yuv_420_rgb16_mmx(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#endif } return srcSliceH; } @@ -115,7 +149,9 @@ s
[FFmpeg-devel] [PATCH V2 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code
Tested using this command: ./ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \ -vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null The fps increase from 151 to 389 on my local machine. Signed-off-by: Ting Fu --- libswscale/x86/Makefile | 1 + libswscale/x86/swscale.c | 16 +- libswscale/x86/yuv2rgb.c | 81 +++--- libswscale/x86/yuv2rgb_template.c | 441 ++ libswscale/x86/yuv_2_rgb.asm | 270 ++ 5 files changed, 395 insertions(+), 414 deletions(-) create mode 100644 libswscale/x86/yuv_2_rgb.asm diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile index f317d5dd9b..831d5359aa 100644 --- a/libswscale/x86/Makefile +++ b/libswscale/x86/Makefile @@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o \ x86/output.o \ x86/scale.o \ x86/rgb_2_rgb.o \ + x86/yuv_2_rgb.o \ diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 0eed4f18d5..e9d474a1e8 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -29,6 +29,14 @@ #include "libavutil/cpu.h" #include "libavutil/pixdesc.h" +const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { +0x0103010301030103LL, +0x0200020002000200LL,}; + +const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { +0x0602060206020602LL, +0x0004000400040004LL,}; + #if HAVE_INLINE_ASM #define DITHER1XBPP @@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { -0x0103010301030103LL, -0x0200020002000200LL,}; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { -0x0602060206020602LL, -0x0004000400040004LL,}; - DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 5e2f77c20f..ed9b613cab 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -37,7 +37,7 @@ #include "libavutil/x86/cpu.h" #include "libavutil/cpu.h" -#if HAVE_INLINE_ASM +#if HAVE_X86ASM #define DITHER1XBPP // only for MMX @@ -50,70 +50,51 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; //MMX versions -#if HAVE_MMX_INLINE && HAVE_6REGS -#undef RENAME +#if HAVE_MMX #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 0 -#define RENAME(a) a ## _mmx -#include "yuv2rgb_template.c" -#endif /* HAVE_MMX_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMX */ // MMXEXT versions -#if HAVE_MMXEXT_INLINE && HAVE_6REGS -#undef RENAME +#if HAVE_MMXEXT #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _mmxext -#include "yuv2rgb_template.c" -#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMXEXT */ -#endif /* HAVE_INLINE_ASM */ +#include "yuv2rgb_template.c" av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { -#if HAVE_MMX_INLINE && HAVE_6REGS int cpu_flags = av_get_cpu_flags(); -#if HAVE_MMXEXT_INLINE -if (INLINE_MMXEXT(cpu_flags)) { -switch (c->dstFormat) { -case AV_PIX_FMT_RGB24: -return yuv420_rgb24_mmxext; -case AV_PIX_FMT_BGR24: -return yuv420_bgr24_mmxext; -} -} -#endif - -if (INLINE_MMX(cpu_flags)) { +if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) { switch (c->dstFormat) { -case AV_PIX_FMT_RGB32: -if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -return yuva420_rgb32_mmx; +case AV_PIX_FMT_RGB32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_rgb32; #endif -break; -} else -return yuv420_rgb32_mmx; -case AV_PIX_FMT_BGR32: -if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -return yuva420_bgr32_mmx; +break; +} else +return yuv420_rgb32; +case AV_PIX_FMT_BGR32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_bgr32; #endif -
[FFmpeg-devel] [PATCH V3 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code
Tested using this command: ./ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \ -vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null The fps increase from 151 to 389 on my local machine. Signed-off-by: Ting Fu --- libswscale/x86/Makefile | 1 + libswscale/x86/swscale.c | 16 +- libswscale/x86/yuv2rgb.c | 81 +++--- libswscale/x86/yuv2rgb_template.c | 441 ++ libswscale/x86/yuv_2_rgb.asm | 270 ++ 5 files changed, 395 insertions(+), 414 deletions(-) create mode 100644 libswscale/x86/yuv_2_rgb.asm diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile index f317d5dd9b..831d5359aa 100644 --- a/libswscale/x86/Makefile +++ b/libswscale/x86/Makefile @@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o \ x86/output.o \ x86/scale.o \ x86/rgb_2_rgb.o \ + x86/yuv_2_rgb.o \ diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 0eed4f18d5..e9d474a1e8 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -29,6 +29,14 @@ #include "libavutil/cpu.h" #include "libavutil/pixdesc.h" +const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { +0x0103010301030103LL, +0x0200020002000200LL,}; + +const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { +0x0602060206020602LL, +0x0004000400040004LL,}; + #if HAVE_INLINE_ASM #define DITHER1XBPP @@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { -0x0103010301030103LL, -0x0200020002000200LL,}; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { -0x0602060206020602LL, -0x0004000400040004LL,}; - DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 5e2f77c20f..ed9b613cab 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -37,7 +37,7 @@ #include "libavutil/x86/cpu.h" #include "libavutil/cpu.h" -#if HAVE_INLINE_ASM +#if HAVE_X86ASM #define DITHER1XBPP // only for MMX @@ -50,70 +50,51 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; //MMX versions -#if HAVE_MMX_INLINE && HAVE_6REGS -#undef RENAME +#if HAVE_MMX #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 0 -#define RENAME(a) a ## _mmx -#include "yuv2rgb_template.c" -#endif /* HAVE_MMX_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMX */ // MMXEXT versions -#if HAVE_MMXEXT_INLINE && HAVE_6REGS -#undef RENAME +#if HAVE_MMXEXT #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _mmxext -#include "yuv2rgb_template.c" -#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMXEXT */ -#endif /* HAVE_INLINE_ASM */ +#include "yuv2rgb_template.c" av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { -#if HAVE_MMX_INLINE && HAVE_6REGS int cpu_flags = av_get_cpu_flags(); -#if HAVE_MMXEXT_INLINE -if (INLINE_MMXEXT(cpu_flags)) { -switch (c->dstFormat) { -case AV_PIX_FMT_RGB24: -return yuv420_rgb24_mmxext; -case AV_PIX_FMT_BGR24: -return yuv420_bgr24_mmxext; -} -} -#endif - -if (INLINE_MMX(cpu_flags)) { +if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) { switch (c->dstFormat) { -case AV_PIX_FMT_RGB32: -if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -return yuva420_rgb32_mmx; +case AV_PIX_FMT_RGB32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_rgb32; #endif -break; -} else -return yuv420_rgb32_mmx; -case AV_PIX_FMT_BGR32: -if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -return yuva420_bgr32_mmx; +break; +} else +return yuv420_rgb32; +case AV_PIX_FMT_BGR32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_bgr32; #endif -
[FFmpeg-devel] [PATCH V3 2/2] libswscale/x86/yuv2rgb: add ssse3 version
Tested using this command: /ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \ -vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null The fps increase from 389 to 640 on my local machine. Signed-off-by: Ting Fu --- libswscale/x86/yuv2rgb.c | 8 +- libswscale/x86/yuv2rgb_template.c | 58 ++- libswscale/x86/yuv_2_rgb.asm | 162 +++--- 3 files changed, 209 insertions(+), 19 deletions(-) diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index ed9b613cab..b83dd7089a 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -61,13 +61,19 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; #define COMPILE_TEMPLATE_MMXEXT 1 #endif /* HAVE_MMXEXT */ +//SSSE3 versions +#if HAVE_SSSE3 +#define COMPILE_TEMPLATE_SSSE3 1 +#endif + #include "yuv2rgb_template.c" av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) { +if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags) || +EXTERNAL_SSSE3(cpu_flags)) { switch (c->dstFormat) { case AV_PIX_FMT_RGB32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c index bcc8eb7602..97a3645b90 100644 --- a/libswscale/x86/yuv2rgb_template.c +++ b/libswscale/x86/yuv2rgb_template.c @@ -40,6 +40,30 @@ const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ x86_reg index = -h_size / 2; \ +extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, +const uint8_t *pv_index, const uint64_t *pointer_c_dither, +const uint8_t *py_2index, const uint8_t *pa_2index); +extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, +const uint8_t *pv_index, const uint64_t *pointer_c_dither, +const uint8_t *py_2index, const uint8_t *pa_2index); extern void ff_yuv_420_rgb24_mmxext(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index); @@ -84,7 +108,12 @@ static inline int yuv420_rgb15(SwsContext *c, const uint8_t *src[], c->greenDither = ff_dither8[y & 1]; c->redDither = ff_dither8[(y + 1) & 1]; #endif + +#if COMPILE_TEMPLATE_SSSE3 +ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#else ff_yuv_420_rgb15_mmx(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#endif } return srcSliceH; } @@ -102,7 +131,12 @@ static inline int yuv420_rgb16(SwsContext *c, const uint8_t *src[], c->greenDither = ff_dither4[y & 1]; c->redDither = ff_dither8[(y + 1) & 1]; #endif + +#if COMPILE_TEMPLATE_SSSE3 +ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#else ff_yuv_420_rgb16_mmx(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#endif } return srcSliceH; } @@ -115
[FFmpeg-devel] [PATCH] libswscale/swscale_unscaled.c: remove redundant code
Signed-off-by: Ting Fu --- libswscale/swscale_unscaled.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index d9260c151a..0d109da2d7 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -2032,7 +2032,6 @@ void ff_get_unscaled_swscale(SwsContext *c) IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BAYER_GRBG16) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR444) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR48) || -IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGRA64) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR555) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR565) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGRA64) || @@ -2053,7 +2052,6 @@ void ff_get_unscaled_swscale(SwsContext *c) IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRAP16) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB444) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB48) || -IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGBA64) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB555) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB565) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGBA64) || -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V4 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code
Tested using this command: ./ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \ -vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null The fps increase from 151 to 389 on my local machine. Signed-off-by: Ting Fu --- libswscale/x86/Makefile | 1 + libswscale/x86/swscale.c | 16 +- libswscale/x86/yuv2rgb.c | 81 +++--- libswscale/x86/yuv2rgb_template.c | 441 ++ libswscale/x86/yuv_2_rgb.asm | 270 ++ 5 files changed, 395 insertions(+), 414 deletions(-) create mode 100644 libswscale/x86/yuv_2_rgb.asm diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile index f317d5dd9b..831d5359aa 100644 --- a/libswscale/x86/Makefile +++ b/libswscale/x86/Makefile @@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o \ x86/output.o \ x86/scale.o \ x86/rgb_2_rgb.o \ + x86/yuv_2_rgb.o \ diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 0eed4f18d5..e9d474a1e8 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -29,6 +29,14 @@ #include "libavutil/cpu.h" #include "libavutil/pixdesc.h" +const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { +0x0103010301030103LL, +0x0200020002000200LL,}; + +const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { +0x0602060206020602LL, +0x0004000400040004LL,}; + #if HAVE_INLINE_ASM #define DITHER1XBPP @@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { -0x0103010301030103LL, -0x0200020002000200LL,}; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { -0x0602060206020602LL, -0x0004000400040004LL,}; - DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 5e2f77c20f..ed9b613cab 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -37,7 +37,7 @@ #include "libavutil/x86/cpu.h" #include "libavutil/cpu.h" -#if HAVE_INLINE_ASM +#if HAVE_X86ASM #define DITHER1XBPP // only for MMX @@ -50,70 +50,51 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; //MMX versions -#if HAVE_MMX_INLINE && HAVE_6REGS -#undef RENAME +#if HAVE_MMX #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 0 -#define RENAME(a) a ## _mmx -#include "yuv2rgb_template.c" -#endif /* HAVE_MMX_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMX */ // MMXEXT versions -#if HAVE_MMXEXT_INLINE && HAVE_6REGS -#undef RENAME +#if HAVE_MMXEXT #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _mmxext -#include "yuv2rgb_template.c" -#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMXEXT */ -#endif /* HAVE_INLINE_ASM */ +#include "yuv2rgb_template.c" av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { -#if HAVE_MMX_INLINE && HAVE_6REGS int cpu_flags = av_get_cpu_flags(); -#if HAVE_MMXEXT_INLINE -if (INLINE_MMXEXT(cpu_flags)) { -switch (c->dstFormat) { -case AV_PIX_FMT_RGB24: -return yuv420_rgb24_mmxext; -case AV_PIX_FMT_BGR24: -return yuv420_bgr24_mmxext; -} -} -#endif - -if (INLINE_MMX(cpu_flags)) { +if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) { switch (c->dstFormat) { -case AV_PIX_FMT_RGB32: -if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -return yuva420_rgb32_mmx; +case AV_PIX_FMT_RGB32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_rgb32; #endif -break; -} else -return yuv420_rgb32_mmx; -case AV_PIX_FMT_BGR32: -if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -return yuva420_bgr32_mmx; +break; +} else +return yuv420_rgb32; +case AV_PIX_FMT_BGR32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_bgr32; #endif -
[FFmpeg-devel] [PATCH V4 2/2] libswscale/x86/yuv2rgb: add ssse3 version
Tested using this command: /ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \ -vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null The fps increase from 389 to 640 on my local machine. Signed-off-by: Ting Fu --- libswscale/x86/yuv2rgb.c | 8 +- libswscale/x86/yuv2rgb_template.c | 58 +++- libswscale/x86/yuv_2_rgb.asm | 145 ++ 3 files changed, 192 insertions(+), 19 deletions(-) diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index ed9b613cab..b83dd7089a 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -61,13 +61,19 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; #define COMPILE_TEMPLATE_MMXEXT 1 #endif /* HAVE_MMXEXT */ +//SSSE3 versions +#if HAVE_SSSE3 +#define COMPILE_TEMPLATE_SSSE3 1 +#endif + #include "yuv2rgb_template.c" av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) { +if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags) || +EXTERNAL_SSSE3(cpu_flags)) { switch (c->dstFormat) { case AV_PIX_FMT_RGB32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c index bcc8eb7602..97a3645b90 100644 --- a/libswscale/x86/yuv2rgb_template.c +++ b/libswscale/x86/yuv2rgb_template.c @@ -40,6 +40,30 @@ const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ x86_reg index = -h_size / 2; \ +extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, +const uint8_t *pv_index, const uint64_t *pointer_c_dither, +const uint8_t *py_2index, const uint8_t *pa_2index); +extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, +const uint8_t *pv_index, const uint64_t *pointer_c_dither, +const uint8_t *py_2index, const uint8_t *pa_2index); extern void ff_yuv_420_rgb24_mmxext(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index); @@ -84,7 +108,12 @@ static inline int yuv420_rgb15(SwsContext *c, const uint8_t *src[], c->greenDither = ff_dither8[y & 1]; c->redDither = ff_dither8[(y + 1) & 1]; #endif + +#if COMPILE_TEMPLATE_SSSE3 +ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#else ff_yuv_420_rgb15_mmx(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#endif } return srcSliceH; } @@ -102,7 +131,12 @@ static inline int yuv420_rgb16(SwsContext *c, const uint8_t *src[], c->greenDither = ff_dither4[y & 1]; c->redDither = ff_dither8[(y + 1) & 1]; #endif + +#if COMPILE_TEMPLATE_SSSE3 +ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#else ff_yuv_420_rgb16_mmx(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#endif } return srcSliceH; } @@ -115
[FFmpeg-devel] [PATCH V5 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code
Signed-off-by: Ting Fu --- libswscale/x86/Makefile | 1 + libswscale/x86/swscale.c | 16 +- libswscale/x86/yuv2rgb.c | 81 +++--- libswscale/x86/yuv2rgb_template.c | 441 ++ libswscale/x86/yuv_2_rgb.asm | 270 ++ 5 files changed, 395 insertions(+), 414 deletions(-) create mode 100644 libswscale/x86/yuv_2_rgb.asm diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile index f317d5dd9b..831d5359aa 100644 --- a/libswscale/x86/Makefile +++ b/libswscale/x86/Makefile @@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o \ x86/output.o \ x86/scale.o \ x86/rgb_2_rgb.o \ + x86/yuv_2_rgb.o \ diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 0eed4f18d5..e9d474a1e8 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -29,6 +29,14 @@ #include "libavutil/cpu.h" #include "libavutil/pixdesc.h" +const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { +0x0103010301030103LL, +0x0200020002000200LL,}; + +const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { +0x0602060206020602LL, +0x0004000400040004LL,}; + #if HAVE_INLINE_ASM #define DITHER1XBPP @@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { -0x0103010301030103LL, -0x0200020002000200LL,}; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { -0x0602060206020602LL, -0x0004000400040004LL,}; - DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 5e2f77c20f..f3d2bb526e 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -37,7 +37,7 @@ #include "libavutil/x86/cpu.h" #include "libavutil/cpu.h" -#if HAVE_INLINE_ASM +#if HAVE_X86ASM #define DITHER1XBPP // only for MMX @@ -50,70 +50,51 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; //MMX versions -#if HAVE_MMX_INLINE && HAVE_6REGS -#undef RENAME +#if HAVE_MMX #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 0 -#define RENAME(a) a ## _mmx -#include "yuv2rgb_template.c" -#endif /* HAVE_MMX_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMX */ // MMXEXT versions -#if HAVE_MMXEXT_INLINE && HAVE_6REGS -#undef RENAME +#if HAVE_MMXEXT #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _mmxext -#include "yuv2rgb_template.c" -#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMXEXT */ -#endif /* HAVE_INLINE_ASM */ +#include "yuv2rgb_template.c" av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { -#if HAVE_MMX_INLINE && HAVE_6REGS int cpu_flags = av_get_cpu_flags(); -#if HAVE_MMXEXT_INLINE -if (INLINE_MMXEXT(cpu_flags)) { -switch (c->dstFormat) { -case AV_PIX_FMT_RGB24: -return yuv420_rgb24_mmxext; -case AV_PIX_FMT_BGR24: -return yuv420_bgr24_mmxext; -} -} -#endif - -if (INLINE_MMX(cpu_flags)) { +if (EXTERNAL_MMX(cpu_flags)) { switch (c->dstFormat) { -case AV_PIX_FMT_RGB32: -if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -return yuva420_rgb32_mmx; +case AV_PIX_FMT_RGB32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_rgb32; #endif -break; -} else -return yuv420_rgb32_mmx; -case AV_PIX_FMT_BGR32: -if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -return yuva420_bgr32_mmx; +break; +} else +return yuv420_rgb32; +case AV_PIX_FMT_BGR32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_bgr32; #endif -break; -} else -return yuv420_bgr32_mmx; -case AV_PIX_FMT_RGB24: -return yuv420_rgb24_mmx; -case AV_PIX_FMT_BGR24: -return yuv420_bgr24_mmx; -
[FFmpeg-devel] [PATCH V5 2/2] libswscale/x86/yuv2rgb: add ssse3 version
Tested using this command: /ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \ -vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null The fps increase from 389 to 640 on my local machine. Signed-off-by: Ting Fu --- libswscale/x86/yuv2rgb.c | 7 +- libswscale/x86/yuv2rgb_template.c | 58 +++- libswscale/x86/yuv_2_rgb.asm | 145 ++ 3 files changed, 191 insertions(+), 19 deletions(-) diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index f3d2bb526e..7015266a7e 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -61,13 +61,18 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; #define COMPILE_TEMPLATE_MMXEXT 1 #endif /* HAVE_MMXEXT */ +//SSSE3 versions +#if HAVE_SSSE3 +#define COMPILE_TEMPLATE_SSSE3 1 +#endif + #include "yuv2rgb_template.c" av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_MMX(cpu_flags)) { +if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_SSSE3(cpu_flags)) { switch (c->dstFormat) { case AV_PIX_FMT_RGB32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c index e67a85df33..ceaa6dea32 100644 --- a/libswscale/x86/yuv2rgb_template.c +++ b/libswscale/x86/yuv2rgb_template.c @@ -40,6 +40,30 @@ const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ x86_reg index = -h_size / 2; \ +extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, +const uint8_t *pv_index, const uint64_t *pointer_c_dither, +const uint8_t *py_2index, const uint8_t *pa_2index); +extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, +const uint8_t *pv_index, const uint64_t *pointer_c_dither, +const uint8_t *py_2index, const uint8_t *pa_2index); extern void ff_yuv_420_rgb24_mmxext(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index); @@ -84,7 +108,12 @@ static int yuv420_rgb15(SwsContext *c, const uint8_t *src[], c->greenDither = ff_dither8[y & 1]; c->redDither = ff_dither8[(y + 1) & 1]; #endif + +#if COMPILE_TEMPLATE_SSSE3 +ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#else ff_yuv_420_rgb15_mmx(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#endif } return srcSliceH; } @@ -102,7 +131,12 @@ static int yuv420_rgb16(SwsContext *c, const uint8_t *src[], c->greenDither = ff_dither4[y & 1]; c->redDither = ff_dither8[(y + 1) & 1]; #endif + +#if COMPILE_TEMPLATE_SSSE3 +ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#else ff_yuv_420_rgb16_mmx(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#endif } return srcSliceH; } @@ -115,7 +149,9 @@ static int yuv420_rgb24(SwsContext *c, const uint8_t *src[], i
[FFmpeg-devel] [PATCH V6 2/2] libswscale/x86/yuv2rgb: add ssse3 version
Tested using this command: /ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \ -vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null The fps increase from 389 to 640 on Intel(R) Core(TM) i7-8700K CPU @ 3.70GHz Signed-off-by: Ting Fu --- libswscale/x86/yuv2rgb.c | 7 +- libswscale/x86/yuv2rgb_template.c | 58 +++- libswscale/x86/yuv_2_rgb.asm | 145 ++ 3 files changed, 191 insertions(+), 19 deletions(-) diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index f3d2bb526e..7015266a7e 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -61,13 +61,18 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; #define COMPILE_TEMPLATE_MMXEXT 1 #endif /* HAVE_MMXEXT */ +//SSSE3 versions +#if HAVE_SSSE3 +#define COMPILE_TEMPLATE_SSSE3 1 +#endif + #include "yuv2rgb_template.c" av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_MMX(cpu_flags)) { +if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_SSSE3(cpu_flags)) { switch (c->dstFormat) { case AV_PIX_FMT_RGB32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c index e67a85df33..ceaa6dea32 100644 --- a/libswscale/x86/yuv2rgb_template.c +++ b/libswscale/x86/yuv2rgb_template.c @@ -40,6 +40,30 @@ const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ x86_reg index = -h_size / 2; \ +extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, + const uint8_t *pv_index, const uint64_t *pointer_c_dither, + const uint8_t *py_2index); +extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, +const uint8_t *pv_index, const uint64_t *pointer_c_dither, +const uint8_t *py_2index, const uint8_t *pa_2index); +extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index, +const uint8_t *pv_index, const uint64_t *pointer_c_dither, +const uint8_t *py_2index, const uint8_t *pa_2index); extern void ff_yuv_420_rgb24_mmxext(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index); @@ -84,7 +108,12 @@ static int yuv420_rgb15(SwsContext *c, const uint8_t *src[], c->greenDither = ff_dither8[y & 1]; c->redDither = ff_dither8[(y + 1) & 1]; #endif + +#if COMPILE_TEMPLATE_SSSE3 +ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#else ff_yuv_420_rgb15_mmx(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#endif } return srcSliceH; } @@ -102,7 +131,12 @@ static int yuv420_rgb16(SwsContext *c, const uint8_t *src[], c->greenDither = ff_dither4[y & 1]; c->redDither = ff_dither8[(y + 1) & 1]; #endif + +#if COMPILE_TEMPLATE_SSSE3 +ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#else ff_yuv_420_rgb16_mmx(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index); +#endif } return srcSliceH; } @@ -115,7 +149,9 @@ static int yuv420_rgb24(SwsContext *c,
[FFmpeg-devel] [PATCH V6 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code
Signed-off-by: Ting Fu --- libswscale/x86/Makefile | 1 + libswscale/x86/swscale.c | 16 +- libswscale/x86/yuv2rgb.c | 81 +++--- libswscale/x86/yuv2rgb_template.c | 441 ++ libswscale/x86/yuv_2_rgb.asm | 270 ++ 5 files changed, 395 insertions(+), 414 deletions(-) create mode 100644 libswscale/x86/yuv_2_rgb.asm diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile index f317d5dd9b..831d5359aa 100644 --- a/libswscale/x86/Makefile +++ b/libswscale/x86/Makefile @@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o \ x86/output.o \ x86/scale.o \ x86/rgb_2_rgb.o \ + x86/yuv_2_rgb.o \ diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 0eed4f18d5..e9d474a1e8 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -29,6 +29,14 @@ #include "libavutil/cpu.h" #include "libavutil/pixdesc.h" +const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { +0x0103010301030103LL, +0x0200020002000200LL,}; + +const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { +0x0602060206020602LL, +0x0004000400040004LL,}; + #if HAVE_INLINE_ASM #define DITHER1XBPP @@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { -0x0103010301030103LL, -0x0200020002000200LL,}; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { -0x0602060206020602LL, -0x0004000400040004LL,}; - DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 5e2f77c20f..f3d2bb526e 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -37,7 +37,7 @@ #include "libavutil/x86/cpu.h" #include "libavutil/cpu.h" -#if HAVE_INLINE_ASM +#if HAVE_X86ASM #define DITHER1XBPP // only for MMX @@ -50,70 +50,51 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; //MMX versions -#if HAVE_MMX_INLINE && HAVE_6REGS -#undef RENAME +#if HAVE_MMX #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 0 -#define RENAME(a) a ## _mmx -#include "yuv2rgb_template.c" -#endif /* HAVE_MMX_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMX */ // MMXEXT versions -#if HAVE_MMXEXT_INLINE && HAVE_6REGS -#undef RENAME +#if HAVE_MMXEXT #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _mmxext -#include "yuv2rgb_template.c" -#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMXEXT */ -#endif /* HAVE_INLINE_ASM */ +#include "yuv2rgb_template.c" av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { -#if HAVE_MMX_INLINE && HAVE_6REGS int cpu_flags = av_get_cpu_flags(); -#if HAVE_MMXEXT_INLINE -if (INLINE_MMXEXT(cpu_flags)) { -switch (c->dstFormat) { -case AV_PIX_FMT_RGB24: -return yuv420_rgb24_mmxext; -case AV_PIX_FMT_BGR24: -return yuv420_bgr24_mmxext; -} -} -#endif - -if (INLINE_MMX(cpu_flags)) { +if (EXTERNAL_MMX(cpu_flags)) { switch (c->dstFormat) { -case AV_PIX_FMT_RGB32: -if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -return yuva420_rgb32_mmx; +case AV_PIX_FMT_RGB32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_rgb32; #endif -break; -} else -return yuv420_rgb32_mmx; -case AV_PIX_FMT_BGR32: -if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -return yuva420_bgr32_mmx; +break; +} else +return yuv420_rgb32; +case AV_PIX_FMT_BGR32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_bgr32; #endif -break; -} else -return yuv420_bgr32_mmx; -case AV_PIX_FMT_RGB24: -return yuv420_rgb24_mmx; -case AV_PIX_FMT_BGR24: -return yuv420_bgr24_mmx; -
[FFmpeg-devel] [PATCH V7 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code
Signed-off-by: Ting Fu --- V7: Fix compile issue when user configure with --disable-mmx. Fix issue when running ./ffmpeg with --cpuflags mmx/ssse3. Adjust the SIMD verify logic in libswscale/x86/yuv2rgb.c libswscale/x86/Makefile | 1 + libswscale/x86/swscale.c | 16 +- libswscale/x86/yuv2rgb.c | 66 ++--- libswscale/x86/yuv2rgb_template.c | 467 ++ libswscale/x86/yuv_2_rgb.asm | 270 + 5 files changed, 405 insertions(+), 415 deletions(-) create mode 100644 libswscale/x86/yuv_2_rgb.asm diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile index f317d5dd9b..831d5359aa 100644 --- a/libswscale/x86/Makefile +++ b/libswscale/x86/Makefile @@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o \ x86/output.o \ x86/scale.o \ x86/rgb_2_rgb.o \ + x86/yuv_2_rgb.o \ diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 0eed4f18d5..e9d474a1e8 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -29,6 +29,14 @@ #include "libavutil/cpu.h" #include "libavutil/pixdesc.h" +const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { +0x0103010301030103LL, +0x0200020002000200LL,}; + +const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { +0x0602060206020602LL, +0x0004000400040004LL,}; + #if HAVE_INLINE_ASM #define DITHER1XBPP @@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { -0x0103010301030103LL, -0x0200020002000200LL,}; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { -0x0602060206020602LL, -0x0004000400040004LL,}; - DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 5e2f77c20f..dd813d4deb 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -37,7 +37,7 @@ #include "libavutil/x86/cpu.h" #include "libavutil/cpu.h" -#if HAVE_INLINE_ASM +#if HAVE_X86ASM #define DITHER1XBPP // only for MMX @@ -50,32 +50,31 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; //MMX versions -#if HAVE_MMX_INLINE && HAVE_6REGS +#if HAVE_MMX #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 0 #define RENAME(a) a ## _mmx #include "yuv2rgb_template.c" -#endif /* HAVE_MMX_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMX */ // MMXEXT versions -#if HAVE_MMXEXT_INLINE && HAVE_6REGS +#if HAVE_MMXEXT #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 #define RENAME(a) a ## _mmxext #include "yuv2rgb_template.c" -#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMXEXT */ -#endif /* HAVE_INLINE_ASM */ +#endif /* HAVE_X86ASM */ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { -#if HAVE_MMX_INLINE && HAVE_6REGS +#if HAVE_X86ASM int cpu_flags = av_get_cpu_flags(); -#if HAVE_MMXEXT_INLINE -if (INLINE_MMXEXT(cpu_flags)) { +if (EXTERNAL_MMXEXT(cpu_flags)) { switch (c->dstFormat) { case AV_PIX_FMT_RGB24: return yuv420_rgb24_mmxext; @@ -83,37 +82,36 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) return yuv420_bgr24_mmxext; } } -#endif -if (INLINE_MMX(cpu_flags)) { +if (EXTERNAL_MMX(cpu_flags)) { switch (c->dstFormat) { -case AV_PIX_FMT_RGB32: -if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -return yuva420_rgb32_mmx; +case AV_PIX_FMT_RGB32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_rgb32_mmx; #endif -break; -} else -return yuv420_rgb32_mmx; -case AV_PIX_FMT_BGR32: -if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA -return yuva420_bgr32_mmx; +break; +} else +return yuv420_rgb32_mmx; +case AV_PIX_FMT_BGR32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_
[FFmpeg-devel] [PATCH V7 2/2] libswscale/x86/yuv2rgb: add ssse3 version
Tested using this command: /ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \ -vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null The fps increase from 389 to 640 on Intel(R) Core(TM) i7-8700K CPU @ 3.70GHz Signed-off-by: Ting Fu --- libswscale/x86/yuv2rgb.c | 38 + libswscale/x86/yuv_2_rgb.asm | 145 +++ 2 files changed, 167 insertions(+), 16 deletions(-) diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index dd813d4deb..ecbad95d1f 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -67,6 +67,15 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; #include "yuv2rgb_template.c" #endif /* HAVE_MMXEXT */ +//SSSE3 versions +#if HAVE_SSSE3 +#undef RENAME +#undef COMPILE_TEMPLATE_MMXEXT +#define COMPILE_TEMPLATE_MMXEXT 0 +#define RENAME(a) a ## _ssse3 +#include "yuv2rgb_template.c" +#endif + #endif /* HAVE_X86ASM */ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) @@ -74,6 +83,35 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) #if HAVE_X86ASM int cpu_flags = av_get_cpu_flags(); +if (EXTERNAL_SSSE3(cpu_flags)) { +switch (c->dstFormat) { +case AV_PIX_FMT_RGB32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_rgb32_ssse3; +#endif +break; +} else +return yuv420_rgb32_ssse3; +case AV_PIX_FMT_BGR32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_bgr32_ssse3; +#endif +break; +} else +return yuv420_bgr32_ssse3; +case AV_PIX_FMT_RGB24: +return yuv420_rgb24_ssse3; +case AV_PIX_FMT_BGR24: +return yuv420_bgr24_ssse3; +case AV_PIX_FMT_RGB565: +return yuv420_rgb16_ssse3; +case AV_PIX_FMT_RGB555: +return yuv420_rgb15_ssse3; +} +} + if (EXTERNAL_MMXEXT(cpu_flags)) { switch (c->dstFormat) { case AV_PIX_FMT_RGB24: diff --git a/libswscale/x86/yuv_2_rgb.asm b/libswscale/x86/yuv_2_rgb.asm index a44ab1607b..e05bbb89f5 100644 --- a/libswscale/x86/yuv_2_rgb.asm +++ b/libswscale/x86/yuv_2_rgb.asm @@ -25,11 +25,18 @@ SECTION_RODATA -pw_00ff: times 4 dw 255 -pb_f8: times 8 db 248 -pb_e0: times 8 db 224 -pb_03: times 8 db 3 -pb_07: times 8 db 7 +; below variables are named like mask_dwXY, which means to preserve dword No.X & No.Y +mask_dw036 : db -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0 +mask_dw147 : db 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1 +mask_dw25 : db 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0 +rgb24_shuf1: db 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11 +rgb24_shuf2: db 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5 +rgb24_shuf3: db 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15 +pw_00ff: times 8 dw 255 +pb_f8: times 16 db 248 +pb_e0: times 16 db 224 +pb_03: times 16 db 3 +pb_07: times 16 db 7 mask_1101: dw -1, -1, 0, -1 mask_0010: dw 0, 0, -1, 0 @@ -49,7 +56,11 @@ SECTION .text ;- %macro MOV_H2L 1 -psrlq %1, 32 +%if mmsize == 8 +psrlq %1, 32 +%else ; mmsize == 16 +psrldq %1, 8 +%endif %endmacro %macro yuv2rgb_fn 3 @@ -77,6 +88,7 @@ psrlq %1, 32 %define m_blue m1 %endif +%if mmsize == 8 %define time_num 1 %define reg_num 8 %define y_offset [pointer_c_ditherq + 8 * 8] @@ -87,11 +99,45 @@ psrlq %1, 32 %define y_coff [pointer_c_ditherq + 3 * 8] %define ub_coff [pointer_c_ditherq + 5 * 8] %define vr_coff [pointer_c_ditherq + 4 * 8] +%elif mmsize == 16 +%define time_num 2 +%if ARCH_X86_32 +%define reg_num 8 +%define my_offset [pointer_c_ditherq + 8 * 8] +%define mu_offset [pointer_c_ditherq + 9 * 8] +%define mv_offset [pointer_c_ditherq + 10 * 8] +%define mug_coff [pointer_c_ditherq + 7 * 8] +%define mvg_coff [pointer_c_ditherq + 6 * 8] +%define my_coff [pointer_c_ditherq + 3 * 8] +%define mub_coff [pointer_c_ditherq + 5 * 8] +%define mvr_coff [pointer_c_ditherq + 4 * 8] +%else ; ARCH_X86_64 +%define reg_num 16 +%define y_offset m8 +%define u_offset m9 +%define v_offset m10 +%define ug_coff m11 +%define vg_coff m12 +%define y_coff m13 +%define ub_coff m14 +%define vr_coff m15 +%endif ; ARCH_X86_32/64 +%endif ; coeff define mmsize == 8/16 cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters %if ARCH_X86_64 movsxd indexq, indexd +%if mmsize == 16 +VBROADCASTSD y_offset, [pointer_c_ditherq + 8 * 8] +VBROADCASTSD u_offset, [pointer_c_ditherq + 9 * 8] +VBROADCASTSD v_offset, [pointer_c_ditherq + 10 * 8] +VBROADCASTSD ug_coff, [pointer_c_ditherq + 7 * 8] +VBROADCASTSD vg_coff, [pointer_c_dither
[FFmpeg-devel] [PATCH V8 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code
The original inline assembly and nasm code have the same fps when called by command. NASM code almost has no impact on the perfromance. Signed-off-by: Ting Fu --- V8: Remove all reindention to make review easier. Fix some improper indention. Reserve the "inline" for next patch. libswscale/x86/Makefile | 1 + libswscale/x86/swscale.c | 16 +- libswscale/x86/yuv2rgb.c | 26 +- libswscale/x86/yuv2rgb_template.c | 392 +- libswscale/x86/yuv_2_rgb.asm | 270 5 files changed, 351 insertions(+), 354 deletions(-) create mode 100644 libswscale/x86/yuv_2_rgb.asm diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile index f317d5dd9b..831d5359aa 100644 --- a/libswscale/x86/Makefile +++ b/libswscale/x86/Makefile @@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o \ x86/output.o \ x86/scale.o \ x86/rgb_2_rgb.o \ + x86/yuv_2_rgb.o \ diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 0eed4f18d5..e9d474a1e8 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -29,6 +29,14 @@ #include "libavutil/cpu.h" #include "libavutil/pixdesc.h" +const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { +0x0103010301030103LL, +0x0200020002000200LL,}; + +const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { +0x0602060206020602LL, +0x0004000400040004LL,}; + #if HAVE_INLINE_ASM #define DITHER1XBPP @@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { -0x0103010301030103LL, -0x0200020002000200LL,}; - -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { -0x0602060206020602LL, -0x0004000400040004LL,}; - DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 5e2f77c20f..c7668f487c 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -37,7 +37,7 @@ #include "libavutil/x86/cpu.h" #include "libavutil/cpu.h" -#if HAVE_INLINE_ASM +#if HAVE_X86ASM #define DITHER1XBPP // only for MMX @@ -50,32 +50,31 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; //MMX versions -#if HAVE_MMX_INLINE && HAVE_6REGS +#if HAVE_MMX #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 0 #define RENAME(a) a ## _mmx #include "yuv2rgb_template.c" -#endif /* HAVE_MMX_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMX */ // MMXEXT versions -#if HAVE_MMXEXT_INLINE && HAVE_6REGS +#if HAVE_MMXEXT #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 #define RENAME(a) a ## _mmxext #include "yuv2rgb_template.c" -#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */ +#endif /* HAVE_MMXEXT */ -#endif /* HAVE_INLINE_ASM */ +#endif /* HAVE_X86ASM */ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { -#if HAVE_MMX_INLINE && HAVE_6REGS +#if HAVE_X86ASM int cpu_flags = av_get_cpu_flags(); -#if HAVE_MMXEXT_INLINE -if (INLINE_MMXEXT(cpu_flags)) { +if (EXTERNAL_MMXEXT(cpu_flags)) { switch (c->dstFormat) { case AV_PIX_FMT_RGB24: return yuv420_rgb24_mmxext; @@ -83,13 +82,12 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) return yuv420_bgr24_mmxext; } } -#endif -if (INLINE_MMX(cpu_flags)) { +if (EXTERNAL_MMX(cpu_flags)) { switch (c->dstFormat) { case AV_PIX_FMT_RGB32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA +#if CONFIG_SWSCALE_ALPHA return yuva420_rgb32_mmx; #endif break; @@ -97,7 +95,7 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) return yuv420_rgb32_mmx; case AV_PIX_FMT_BGR32: if (c->srcFormat == AV_PIX_FMT_YUVA420P) { -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA +#if CONFIG_SWSCALE_ALPHA return yuva420_bgr32_mmx; #endif break; @@ -113,7 +111,7 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) return yuv420_rgb15_mmx; } } -#endif /* HAVE_MMX_INLINE && HAVE_6
[FFmpeg-devel] [PATCH V8 2/2] libswscale/x86/yuv2rgb: add ssse3 version
Tested using this command: /ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \ -vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null The fps increase from 389 to 640 on Intel(R) Core(TM) i7-8700K CPU @ 3.70GHz Signed-off-by: Ting Fu --- libswscale/x86/yuv2rgb.c | 38 + libswscale/x86/yuv_2_rgb.asm | 145 +++ 2 files changed, 167 insertions(+), 16 deletions(-) diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index c7668f487c..c12e88cbb5 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -67,6 +67,15 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; #include "yuv2rgb_template.c" #endif /* HAVE_MMXEXT */ +//SSSE3 versions +#if HAVE_SSSE3 +#undef RENAME +#undef COMPILE_TEMPLATE_MMXEXT +#define COMPILE_TEMPLATE_MMXEXT 0 +#define RENAME(a) a ## _ssse3 +#include "yuv2rgb_template.c" +#endif + #endif /* HAVE_X86ASM */ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) @@ -74,6 +83,35 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) #if HAVE_X86ASM int cpu_flags = av_get_cpu_flags(); +if (EXTERNAL_SSSE3(cpu_flags)) { +switch (c->dstFormat) { +case AV_PIX_FMT_RGB32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_rgb32_ssse3; +#endif +break; +} else +return yuv420_rgb32_ssse3; +case AV_PIX_FMT_BGR32: +if (c->srcFormat == AV_PIX_FMT_YUVA420P) { +#if CONFIG_SWSCALE_ALPHA +return yuva420_bgr32_ssse3; +#endif +break; +} else +return yuv420_bgr32_ssse3; +case AV_PIX_FMT_RGB24: +return yuv420_rgb24_ssse3; +case AV_PIX_FMT_BGR24: +return yuv420_bgr24_ssse3; +case AV_PIX_FMT_RGB565: +return yuv420_rgb16_ssse3; +case AV_PIX_FMT_RGB555: +return yuv420_rgb15_ssse3; +} +} + if (EXTERNAL_MMXEXT(cpu_flags)) { switch (c->dstFormat) { case AV_PIX_FMT_RGB24: diff --git a/libswscale/x86/yuv_2_rgb.asm b/libswscale/x86/yuv_2_rgb.asm index a44ab1607b..e05bbb89f5 100644 --- a/libswscale/x86/yuv_2_rgb.asm +++ b/libswscale/x86/yuv_2_rgb.asm @@ -25,11 +25,18 @@ SECTION_RODATA -pw_00ff: times 4 dw 255 -pb_f8: times 8 db 248 -pb_e0: times 8 db 224 -pb_03: times 8 db 3 -pb_07: times 8 db 7 +; below variables are named like mask_dwXY, which means to preserve dword No.X & No.Y +mask_dw036 : db -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0 +mask_dw147 : db 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1 +mask_dw25 : db 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0 +rgb24_shuf1: db 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11 +rgb24_shuf2: db 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5 +rgb24_shuf3: db 4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15 +pw_00ff: times 8 dw 255 +pb_f8: times 16 db 248 +pb_e0: times 16 db 224 +pb_03: times 16 db 3 +pb_07: times 16 db 7 mask_1101: dw -1, -1, 0, -1 mask_0010: dw 0, 0, -1, 0 @@ -49,7 +56,11 @@ SECTION .text ;- %macro MOV_H2L 1 -psrlq %1, 32 +%if mmsize == 8 +psrlq %1, 32 +%else ; mmsize == 16 +psrldq %1, 8 +%endif %endmacro %macro yuv2rgb_fn 3 @@ -77,6 +88,7 @@ psrlq %1, 32 %define m_blue m1 %endif +%if mmsize == 8 %define time_num 1 %define reg_num 8 %define y_offset [pointer_c_ditherq + 8 * 8] @@ -87,11 +99,45 @@ psrlq %1, 32 %define y_coff [pointer_c_ditherq + 3 * 8] %define ub_coff [pointer_c_ditherq + 5 * 8] %define vr_coff [pointer_c_ditherq + 4 * 8] +%elif mmsize == 16 +%define time_num 2 +%if ARCH_X86_32 +%define reg_num 8 +%define my_offset [pointer_c_ditherq + 8 * 8] +%define mu_offset [pointer_c_ditherq + 9 * 8] +%define mv_offset [pointer_c_ditherq + 10 * 8] +%define mug_coff [pointer_c_ditherq + 7 * 8] +%define mvg_coff [pointer_c_ditherq + 6 * 8] +%define my_coff [pointer_c_ditherq + 3 * 8] +%define mub_coff [pointer_c_ditherq + 5 * 8] +%define mvr_coff [pointer_c_ditherq + 4 * 8] +%else ; ARCH_X86_64 +%define reg_num 16 +%define y_offset m8 +%define u_offset m9 +%define v_offset m10 +%define ug_coff m11 +%define vg_coff m12 +%define y_coff m13 +%define ub_coff m14 +%define vr_coff m15 +%endif ; ARCH_X86_32/64 +%endif ; coeff define mmsize == 8/16 cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters %if ARCH_X86_64 movsxd indexq, indexd +%if mmsize == 16 +VBROADCASTSD y_offset, [pointer_c_ditherq + 8 * 8] +VBROADCASTSD u_offset, [pointer_c_ditherq + 9 * 8] +VBROADCASTSD v_offset, [pointer_c_ditherq + 10 * 8] +VBROADCASTSD ug_coff, [pointer_c_ditherq + 7 * 8] +VBROADCASTSD vg_coff, [pointer_c_dither
[FFmpeg-devel] [PATCH 1/4] dnn: add DCO_RGB color order to enum DNNColorOrder
Adding DCO_RGB color order to DNNColorOrder, since tensorflow model needs this kind of color oder as input. Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_tf.c | 1 + libavfilter/dnn/dnn_io_proc.c| 14 +++--- libavfilter/dnn_interface.h | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index 076dd3d6a9..f70e3d4659 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -143,6 +143,7 @@ static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input tf_output.index = 0; input->dt = TF_OperationOutputType(tf_output); +input->order = DCO_RGB; status = TF_NewStatus(); TF_GraphGetTensorShape(tf_model->graph, tf_output, dims, 4, status); diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c index e104cc5064..5f6ce36b96 100644 --- a/libavfilter/dnn/dnn_io_proc.c +++ b/libavfilter/dnn/dnn_io_proc.c @@ -167,11 +167,19 @@ static DNNReturnType proc_from_frame_to_dnn_frameprocessing(AVFrame *frame, DNND static enum AVPixelFormat get_pixel_format(DNNData *data) { -if (data->dt == DNN_UINT8 && data->order == DCO_BGR) { -return AV_PIX_FMT_BGR24; +if (data->dt == DNN_UINT8) { +switch (data->order) { +case DCO_BGR: +return AV_PIX_FMT_BGR24; +case DCO_RGB: +return AV_PIX_FMT_RGB24; +default: +av_assert0(!"unsupported data pixel format.\n"); +return AV_PIX_FMT_BGR24; +} } -av_assert0(!"not supported yet.\n"); +av_assert0(!"unsupported data type.\n"); return AV_PIX_FMT_BGR24; } diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h index ae5a488341..92c3b0fc6e 100644 --- a/libavfilter/dnn_interface.h +++ b/libavfilter/dnn_interface.h @@ -39,6 +39,7 @@ typedef enum {DNN_FLOAT = 1, DNN_UINT8 = 4} DNNDataType; typedef enum { DCO_NONE, DCO_BGR, +DCO_RGB, } DNNColorOrder; typedef enum { -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/4] lavfi/dnn_backend_tensorflow: add multiple outputs support
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_tf.c | 49 libavfilter/dnn_filter_common.c | 45 + libavfilter/dnn_filter_common.h | 6 ++-- libavfilter/vf_derain.c | 2 +- libavfilter/vf_sr.c | 2 +- 5 files changed, 71 insertions(+), 33 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index f70e3d4659..5c85b562c4 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -155,7 +155,7 @@ static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input TF_DeleteStatus(status); // currently only NHWC is supported -av_assert0(dims[0] == 1); +av_assert0(dims[0] == 1 || dims[0] == -1); input->height = dims[1]; input->width = dims[2]; input->channels = dims[3]; @@ -707,7 +707,7 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n TF_Output *tf_outputs; TFModel *tf_model = model->model; TFContext *ctx = &tf_model->ctx; -DNNData input, output; +DNNData input, *outputs; TF_Tensor **output_tensors; TF_Output tf_input; TF_Tensor *input_tensor; @@ -738,14 +738,6 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n } } -if (nb_output != 1) { -// currently, the filter does not need multiple outputs, -// so we just pending the support until we really need it. -TF_DeleteTensor(input_tensor); -avpriv_report_missing_feature(ctx, "multiple outputs"); -return DNN_ERROR; -} - tf_outputs = av_malloc_array(nb_output, sizeof(*tf_outputs)); if (tf_outputs == NULL) { TF_DeleteTensor(input_tensor); @@ -785,23 +777,31 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n return DNN_ERROR; } +outputs = av_malloc_array(nb_output, sizeof(*outputs)); +if (!outputs) { +TF_DeleteTensor(input_tensor); +av_freep(&tf_outputs); +av_freep(&output_tensors); +av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for *outputs\n"); \ +return DNN_ERROR; +} + for (uint32_t i = 0; i < nb_output; ++i) { -output.height = TF_Dim(output_tensors[i], 1); -output.width = TF_Dim(output_tensors[i], 2); -output.channels = TF_Dim(output_tensors[i], 3); -output.data = TF_TensorData(output_tensors[i]); -output.dt = TF_TensorType(output_tensors[i]); - -if (do_ioproc) { -if (tf_model->model->frame_post_proc != NULL) { -tf_model->model->frame_post_proc(out_frame, &output, tf_model->model->filter_ctx); -} else { -ff_proc_from_dnn_to_frame(out_frame, &output, ctx); -} +outputs[i].height = TF_Dim(output_tensors[i], 1); +outputs[i].width = TF_Dim(output_tensors[i], 2); +outputs[i].channels = TF_Dim(output_tensors[i], 3); +outputs[i].data = TF_TensorData(output_tensors[i]); +outputs[i].dt = TF_TensorType(output_tensors[i]); +} +if (do_ioproc) { +if (tf_model->model->frame_post_proc != NULL) { +tf_model->model->frame_post_proc(out_frame, outputs, tf_model->model->filter_ctx); } else { -out_frame->width = output.width; -out_frame->height = output.height; +ff_proc_from_dnn_to_frame(out_frame, outputs, ctx); } +} else { +out_frame->width = outputs[0].width; +out_frame->height = outputs[0].height; } for (uint32_t i = 0; i < nb_output; ++i) { @@ -812,6 +812,7 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n TF_DeleteTensor(input_tensor); av_freep(&output_tensors); av_freep(&tf_outputs); +av_freep(&outputs); return DNN_SUCCESS; } diff --git a/libavfilter/dnn_filter_common.c b/libavfilter/dnn_filter_common.c index 1b922455a3..4cbfdbf52a 100644 --- a/libavfilter/dnn_filter_common.c +++ b/libavfilter/dnn_filter_common.c @@ -17,6 +17,39 @@ */ #include "dnn_filter_common.h" +#include "libavutil/avstring.h" + +#define MAX_SUPPORTED_OUTPUTS_NB 4 + +static char **separate_output_names(const char *expr, const char *val_sep, int *separated_nb) +{ +char *val, **parsed_vals = NULL; +int val_num = 0; +if (!expr || !val_sep || !separated_nb) { +return NULL; +} + +parsed_vals = av_mallocz_array(MAX_SUPPORTED_OUTPUTS_NB, sizeof(*parsed_vals)); +if (!parsed_vals) { +return NULL; +} + +do { +val = av_get_token(&expr, val_sep); +if(val) { +parsed_vals[val_num] = val; +val_num++; +} +if (*expr) { +
[FFmpeg-devel] [PATCH 3/4] lavfi/dnn_backend_tensorflow: support detect model
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_tf.c | 39 ++-- libavfilter/vf_dnn_detect.c | 32 +- 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index 5c85b562c4..8fb2ae8583 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -793,15 +793,40 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n outputs[i].data = TF_TensorData(output_tensors[i]); outputs[i].dt = TF_TensorType(output_tensors[i]); } -if (do_ioproc) { -if (tf_model->model->frame_post_proc != NULL) { -tf_model->model->frame_post_proc(out_frame, outputs, tf_model->model->filter_ctx); +switch (model->func_type) { +case DFT_PROCESS_FRAME: +//it only support 1 output if it's frame in & frame out +if (do_ioproc) { +if (tf_model->model->frame_post_proc != NULL) { +tf_model->model->frame_post_proc(out_frame, outputs, tf_model->model->filter_ctx); +} else { +ff_proc_from_dnn_to_frame(out_frame, outputs, ctx); +} } else { -ff_proc_from_dnn_to_frame(out_frame, outputs, ctx); +out_frame->width = outputs[0].width; +out_frame->height = outputs[0].height; +} +break; +case DFT_ANALYTICS_DETECT: +if (!model->detect_post_proc) { +av_log(ctx, AV_LOG_ERROR, "Detect filter needs provide post proc\n"); +return DNN_ERROR; +} +model->detect_post_proc(out_frame, outputs, nb_output, model->filter_ctx); +break; +default: +for (uint32_t i = 0; i < nb_output; ++i) { +if (output_tensors[i]) { +TF_DeleteTensor(output_tensors[i]); +} } -} else { -out_frame->width = outputs[0].width; -out_frame->height = outputs[0].height; +TF_DeleteTensor(input_tensor); +av_freep(&output_tensors); +av_freep(&tf_outputs); +av_freep(&outputs); + +av_log(ctx, AV_LOG_ERROR, "Tensorflow backend does not support this kind of dnn filter now\n"); +return DNN_ERROR; } for (uint32_t i = 0; i < nb_output; ++i) { diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 1dbe4f29a4..7d39acb653 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -203,10 +203,40 @@ static int read_detect_label_file(AVFilterContext *context) return 0; } +static int check_output_nb(DnnDetectContext *ctx, DNNBackendType backend_type, int output_nb) +{ +switch(backend_type) { +case DNN_TF: +if (output_nb != 4) { +av_log(ctx, AV_LOG_ERROR, "Only support tensorflow detect model with 4 outputs, \ + but get %d instead\n", output_nb); +return AVERROR(EINVAL); +} +return 0; +case DNN_OV: +if (output_nb != 1) { +av_log(ctx, AV_LOG_ERROR, "Dnn detect filter with openvino backend needs 1 output only, \ + but get %d instead\n", output_nb); +return AVERROR(EINVAL); +} +return 0; +default: +avpriv_report_missing_feature(ctx, "Dnn detect filter does not support current backend\n"); +return AVERROR(EINVAL); +} +return 0; +} + static av_cold int dnn_detect_init(AVFilterContext *context) { DnnDetectContext *ctx = context->priv; -int ret = ff_dnn_init(&ctx->dnnctx, DFT_ANALYTICS_DETECT, context); +DnnContext *dnn_ctx = &ctx->dnnctx; +int ret; + +ret = ff_dnn_init(&ctx->dnnctx, DFT_ANALYTICS_DETECT, context); +if (ret < 0) +return ret; +ret = check_output_nb(ctx, dnn_ctx->backend_type, dnn_ctx->nb_outputs); if (ret < 0) return ret; ff_dnn_set_detect_post_proc(&ctx->dnnctx, dnn_detect_post_proc); -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 4/4] dnn/vf_dnn_detect: add tensorflow output parse support
Testing model is tensorflow offical model in github repo, please refer https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md to download the detect model as you need. For example, local testing was carried on with 'ssd_mobilenet_v2_coco_2018_03_29.tar.gz', and used one image of dog in https://github.com/tensorflow/models/blob/master/research/object_detection/test_images/image1.jpg Testing command is: ./ffmpeg -i image1.jpg -vf dnn_detect=dnn_backend=tensorflow:input=image_tensor:output=\ "num_detections&detection_scores&detection_classes&detection_boxes":model=ssd_mobilenet_v2_coco.pb,\ showinfo -f null - We will see the result similar as below: [Parsed_showinfo_1 @ 0x33e65f0] side data - detection bounding boxes: [Parsed_showinfo_1 @ 0x33e65f0] source: ssd_mobilenet_v2_coco.pb [Parsed_showinfo_1 @ 0x33e65f0] index: 0, region: (382, 60) -> (1005, 593), label: 18, confidence: 9834/1. [Parsed_showinfo_1 @ 0x33e65f0] index: 1, region: (12, 8) -> (328, 549), label: 18, confidence: 8555/1. [Parsed_showinfo_1 @ 0x33e65f0] index: 2, region: (293, 7) -> (682, 458), label: 1, confidence: 8033/1. [Parsed_showinfo_1 @ 0x33e65f0] index: 3, region: (342, 0) -> (690, 325), label: 1, confidence: 5878/1. There are two boxes of dog with cores 94.05% & 93.45% and two boxes of person with scores 80.33% & 58.78%. Signed-off-by: Ting Fu --- libavfilter/vf_dnn_detect.c | 95 - 1 file changed, 94 insertions(+), 1 deletion(-) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 7d39acb653..818b53a052 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -48,6 +48,9 @@ typedef struct DnnDetectContext { #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM static const AVOption dnn_detect_options[] = { { "dnn_backend", "DNN backend",OFFSET(backend_type), AV_OPT_TYPE_INT, { .i64 = 2 },INT_MIN, INT_MAX, FLAGS, "backend" }, +#if (CONFIG_LIBTENSORFLOW == 1) +{ "tensorflow", "tensorflow backend flag",0, AV_OPT_TYPE_CONST, { .i64 = 1 },0, 0, FLAGS, "backend" }, +#endif #if (CONFIG_LIBOPENVINO == 1) { "openvino","openvino backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 2 },0, 0, FLAGS, "backend" }, #endif @@ -59,7 +62,7 @@ static const AVOption dnn_detect_options[] = { AVFILTER_DEFINE_CLASS(dnn_detect); -static int dnn_detect_post_proc(AVFrame *frame, DNNData *output, uint32_t nb, AVFilterContext *filter_ctx) +static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx) { DnnDetectContext *ctx = filter_ctx->priv; float conf_threshold = ctx->confidence; @@ -136,6 +139,96 @@ static int dnn_detect_post_proc(AVFrame *frame, DNNData *output, uint32_t nb, AV return 0; } +static int dnn_detect_post_proc_tf(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx) +{ +DnnDetectContext *ctx = filter_ctx->priv; +int proposal_count; +float conf_threshold = ctx->confidence; +float *conf, *position, *label_id, x0, y0, x1, y1; +int nb_bboxes = 0; +AVFrameSideData *sd; +AVDetectionBBox *bbox; +AVDetectionBBoxHeader *header; + +proposal_count = *(float *)(output[0].data); +conf = output[1].data; +position = output[3].data; +label_id = output[2].data; + +sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES); +if (sd) { +av_log(filter_ctx, AV_LOG_ERROR, "already have dnn bounding boxes in side data.\n"); +return -1; +} + +for (int i = 0; i < proposal_count; ++i) { +if (conf[i] < conf_threshold) +continue; +nb_bboxes++; +} + +if (nb_bboxes == 0) { +av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this frame.\n"); +return 0; +} + +header = av_detection_bbox_create_side_data(frame, nb_bboxes); +if (!header) { +av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data with %d bounding boxes\n", nb_bboxes); +return -1; +} + +av_strlcpy(header->source, ctx->dnnctx.model_filename, sizeof(header->source)); + +for (int i = 0; i < proposal_count; ++i) { +y0 = position[i * 4]; +x0 = position[i * 4 + 1]; +y1 = position[i * 4 + 2]; +x1 = position[i * 4 + 3]; + +bbox = av_get_detection_bbox(header, i); + +if (conf[i] < conf_threshold) { +continue; +} + +bbox->x = (int)(x0 * frame->width); +bbox->w = (int)(x1 * frame->width) - bbox->x; +bbox->y =
[FFmpeg-devel] [PATCH V2 1/4] dnn: add DCO_RGB color order to enum DNNColorOrder
Adding DCO_RGB color order to DNNColorOrder, since tensorflow model needs this kind of color oder as input. Signed-off-by: Ting Fu --- V2: Rebase patch to latest code libavfilter/dnn/dnn_backend_tf.c | 1 + libavfilter/dnn/dnn_io_proc.c| 14 +++--- libavfilter/dnn_interface.h | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index 03fe310b03..45da29ae70 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -143,6 +143,7 @@ static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input tf_output.index = 0; input->dt = TF_OperationOutputType(tf_output); +input->order = DCO_RGB; status = TF_NewStatus(); TF_GraphGetTensorShape(tf_model->graph, tf_output, dims, 4, status); diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c index 5f60d68078..1e2bef3f9a 100644 --- a/libavfilter/dnn/dnn_io_proc.c +++ b/libavfilter/dnn/dnn_io_proc.c @@ -168,11 +168,19 @@ static DNNReturnType proc_from_frame_to_dnn_frameprocessing(AVFrame *frame, DNND static enum AVPixelFormat get_pixel_format(DNNData *data) { -if (data->dt == DNN_UINT8 && data->order == DCO_BGR) { -return AV_PIX_FMT_BGR24; +if (data->dt == DNN_UINT8) { +switch (data->order) { +case DCO_BGR: +return AV_PIX_FMT_BGR24; +case DCO_RGB: +return AV_PIX_FMT_RGB24; +default: +av_assert0(!"unsupported data pixel format.\n"); +return AV_PIX_FMT_BGR24; +} } -av_assert0(!"not supported yet.\n"); +av_assert0(!"unsupported data type.\n"); return AV_PIX_FMT_BGR24; } diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h index 799244ee14..5e9ffeb077 100644 --- a/libavfilter/dnn_interface.h +++ b/libavfilter/dnn_interface.h @@ -39,6 +39,7 @@ typedef enum {DNN_FLOAT = 1, DNN_UINT8 = 4} DNNDataType; typedef enum { DCO_NONE, DCO_BGR, +DCO_RGB, } DNNColorOrder; typedef enum { -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 2/4] lavfi/dnn_backend_tensorflow: add multiple outputs support
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_tf.c | 49 ++--- libavfilter/dnn_filter_common.c | 53 ++-- libavfilter/dnn_filter_common.h | 6 ++-- libavfilter/vf_derain.c | 2 +- libavfilter/vf_sr.c | 2 +- 5 files changed, 75 insertions(+), 37 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index 45da29ae70..b6b1812cd9 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -155,7 +155,7 @@ static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input TF_DeleteStatus(status); // currently only NHWC is supported -av_assert0(dims[0] == 1); +av_assert0(dims[0] == 1 || dims[0] == -1); input->height = dims[1]; input->width = dims[2]; input->channels = dims[3]; @@ -707,7 +707,7 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n TF_Output *tf_outputs; TFModel *tf_model = model->model; TFContext *ctx = &tf_model->ctx; -DNNData input, output; +DNNData input, *outputs; TF_Tensor **output_tensors; TF_Output tf_input; TF_Tensor *input_tensor; @@ -738,14 +738,6 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n } } -if (nb_output != 1) { -// currently, the filter does not need multiple outputs, -// so we just pending the support until we really need it. -TF_DeleteTensor(input_tensor); -avpriv_report_missing_feature(ctx, "multiple outputs"); -return DNN_ERROR; -} - tf_outputs = av_malloc_array(nb_output, sizeof(*tf_outputs)); if (tf_outputs == NULL) { TF_DeleteTensor(input_tensor); @@ -785,23 +777,31 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n return DNN_ERROR; } +outputs = av_malloc_array(nb_output, sizeof(*outputs)); +if (!outputs) { +TF_DeleteTensor(input_tensor); +av_freep(&tf_outputs); +av_freep(&output_tensors); +av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for *outputs\n"); \ +return DNN_ERROR; +} + for (uint32_t i = 0; i < nb_output; ++i) { -output.height = TF_Dim(output_tensors[i], 1); -output.width = TF_Dim(output_tensors[i], 2); -output.channels = TF_Dim(output_tensors[i], 3); -output.data = TF_TensorData(output_tensors[i]); -output.dt = TF_TensorType(output_tensors[i]); - -if (do_ioproc) { -if (tf_model->model->frame_post_proc != NULL) { -tf_model->model->frame_post_proc(out_frame, &output, tf_model->model->filter_ctx); -} else { -ff_proc_from_dnn_to_frame(out_frame, &output, ctx); -} +outputs[i].height = TF_Dim(output_tensors[i], 1); +outputs[i].width = TF_Dim(output_tensors[i], 2); +outputs[i].channels = TF_Dim(output_tensors[i], 3); +outputs[i].data = TF_TensorData(output_tensors[i]); +outputs[i].dt = TF_TensorType(output_tensors[i]); +} +if (do_ioproc) { +if (tf_model->model->frame_post_proc != NULL) { +tf_model->model->frame_post_proc(out_frame, outputs, tf_model->model->filter_ctx); } else { -out_frame->width = output.width; -out_frame->height = output.height; +ff_proc_from_dnn_to_frame(out_frame, outputs, ctx); } +} else { +out_frame->width = outputs[0].width; +out_frame->height = outputs[0].height; } for (uint32_t i = 0; i < nb_output; ++i) { @@ -812,6 +812,7 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n TF_DeleteTensor(input_tensor); av_freep(&output_tensors); av_freep(&tf_outputs); +av_freep(&outputs); return DNN_SUCCESS; } diff --git a/libavfilter/dnn_filter_common.c b/libavfilter/dnn_filter_common.c index 52c7a5392a..0ed0ac2e30 100644 --- a/libavfilter/dnn_filter_common.c +++ b/libavfilter/dnn_filter_common.c @@ -17,6 +17,39 @@ */ #include "dnn_filter_common.h" +#include "libavutil/avstring.h" + +#define MAX_SUPPORTED_OUTPUTS_NB 4 + +static char **separate_output_names(const char *expr, const char *val_sep, int *separated_nb) +{ +char *val, **parsed_vals = NULL; +int val_num = 0; +if (!expr || !val_sep || !separated_nb) { +return NULL; +} + +parsed_vals = av_mallocz_array(MAX_SUPPORTED_OUTPUTS_NB, sizeof(*parsed_vals)); +if (!parsed_vals) { +return NULL; +} + +do { +val = av_get_token(&expr, val_sep); +if(val) { +parsed_vals[val_num] = val; +val_num++; +} +if (*expr) { +
[FFmpeg-devel] [PATCH V2 3/4] lavfi/dnn_backend_tensorflow: support detect model
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_tf.c | 39 ++-- libavfilter/vf_dnn_detect.c | 32 +- 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index b6b1812cd9..622b5a8464 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -793,15 +793,40 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n outputs[i].data = TF_TensorData(output_tensors[i]); outputs[i].dt = TF_TensorType(output_tensors[i]); } -if (do_ioproc) { -if (tf_model->model->frame_post_proc != NULL) { -tf_model->model->frame_post_proc(out_frame, outputs, tf_model->model->filter_ctx); +switch (model->func_type) { +case DFT_PROCESS_FRAME: +//it only support 1 output if it's frame in & frame out +if (do_ioproc) { +if (tf_model->model->frame_post_proc != NULL) { +tf_model->model->frame_post_proc(out_frame, outputs, tf_model->model->filter_ctx); +} else { +ff_proc_from_dnn_to_frame(out_frame, outputs, ctx); +} } else { -ff_proc_from_dnn_to_frame(out_frame, outputs, ctx); +out_frame->width = outputs[0].width; +out_frame->height = outputs[0].height; +} +break; +case DFT_ANALYTICS_DETECT: +if (!model->detect_post_proc) { +av_log(ctx, AV_LOG_ERROR, "Detect filter needs provide post proc\n"); +return DNN_ERROR; +} +model->detect_post_proc(out_frame, outputs, nb_output, model->filter_ctx); +break; +default: +for (uint32_t i = 0; i < nb_output; ++i) { +if (output_tensors[i]) { +TF_DeleteTensor(output_tensors[i]); +} } -} else { -out_frame->width = outputs[0].width; -out_frame->height = outputs[0].height; +TF_DeleteTensor(input_tensor); +av_freep(&output_tensors); +av_freep(&tf_outputs); +av_freep(&outputs); + +av_log(ctx, AV_LOG_ERROR, "Tensorflow backend does not support this kind of dnn filter now\n"); +return DNN_ERROR; } for (uint32_t i = 0; i < nb_output; ++i) { diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 1dbe4f29a4..7d39acb653 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -203,10 +203,40 @@ static int read_detect_label_file(AVFilterContext *context) return 0; } +static int check_output_nb(DnnDetectContext *ctx, DNNBackendType backend_type, int output_nb) +{ +switch(backend_type) { +case DNN_TF: +if (output_nb != 4) { +av_log(ctx, AV_LOG_ERROR, "Only support tensorflow detect model with 4 outputs, \ + but get %d instead\n", output_nb); +return AVERROR(EINVAL); +} +return 0; +case DNN_OV: +if (output_nb != 1) { +av_log(ctx, AV_LOG_ERROR, "Dnn detect filter with openvino backend needs 1 output only, \ + but get %d instead\n", output_nb); +return AVERROR(EINVAL); +} +return 0; +default: +avpriv_report_missing_feature(ctx, "Dnn detect filter does not support current backend\n"); +return AVERROR(EINVAL); +} +return 0; +} + static av_cold int dnn_detect_init(AVFilterContext *context) { DnnDetectContext *ctx = context->priv; -int ret = ff_dnn_init(&ctx->dnnctx, DFT_ANALYTICS_DETECT, context); +DnnContext *dnn_ctx = &ctx->dnnctx; +int ret; + +ret = ff_dnn_init(&ctx->dnnctx, DFT_ANALYTICS_DETECT, context); +if (ret < 0) +return ret; +ret = check_output_nb(ctx, dnn_ctx->backend_type, dnn_ctx->nb_outputs); if (ret < 0) return ret; ff_dnn_set_detect_post_proc(&ctx->dnnctx, dnn_detect_post_proc); -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 4/4] dnn/vf_dnn_detect: add tensorflow output parse support
Testing model is tensorflow offical model in github repo, please refer https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md to download the detect model as you need. For example, local testing was carried on with 'ssd_mobilenet_v2_coco_2018_03_29.tar.gz', and used one image of dog in https://github.com/tensorflow/models/blob/master/research/object_detection/test_images/image1.jpg Testing command is: ./ffmpeg -i image1.jpg -vf dnn_detect=dnn_backend=tensorflow:input=image_tensor:output=\ "num_detections&detection_scores&detection_classes&detection_boxes":model=ssd_mobilenet_v2_coco.pb,\ showinfo -f null - We will see the result similar as below: [Parsed_showinfo_1 @ 0x33e65f0] side data - detection bounding boxes: [Parsed_showinfo_1 @ 0x33e65f0] source: ssd_mobilenet_v2_coco.pb [Parsed_showinfo_1 @ 0x33e65f0] index: 0, region: (382, 60) -> (1005, 593), label: 18, confidence: 9834/1. [Parsed_showinfo_1 @ 0x33e65f0] index: 1, region: (12, 8) -> (328, 549), label: 18, confidence: 8555/1. [Parsed_showinfo_1 @ 0x33e65f0] index: 2, region: (293, 7) -> (682, 458), label: 1, confidence: 8033/1. [Parsed_showinfo_1 @ 0x33e65f0] index: 3, region: (342, 0) -> (690, 325), label: 1, confidence: 5878/1. There are two boxes of dog with cores 94.05% & 93.45% and two boxes of person with scores 80.33% & 58.78%. Signed-off-by: Ting Fu --- libavfilter/vf_dnn_detect.c | 95 - 1 file changed, 94 insertions(+), 1 deletion(-) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 7d39acb653..818b53a052 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -48,6 +48,9 @@ typedef struct DnnDetectContext { #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM static const AVOption dnn_detect_options[] = { { "dnn_backend", "DNN backend",OFFSET(backend_type), AV_OPT_TYPE_INT, { .i64 = 2 },INT_MIN, INT_MAX, FLAGS, "backend" }, +#if (CONFIG_LIBTENSORFLOW == 1) +{ "tensorflow", "tensorflow backend flag",0, AV_OPT_TYPE_CONST, { .i64 = 1 },0, 0, FLAGS, "backend" }, +#endif #if (CONFIG_LIBOPENVINO == 1) { "openvino","openvino backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 2 },0, 0, FLAGS, "backend" }, #endif @@ -59,7 +62,7 @@ static const AVOption dnn_detect_options[] = { AVFILTER_DEFINE_CLASS(dnn_detect); -static int dnn_detect_post_proc(AVFrame *frame, DNNData *output, uint32_t nb, AVFilterContext *filter_ctx) +static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx) { DnnDetectContext *ctx = filter_ctx->priv; float conf_threshold = ctx->confidence; @@ -136,6 +139,96 @@ static int dnn_detect_post_proc(AVFrame *frame, DNNData *output, uint32_t nb, AV return 0; } +static int dnn_detect_post_proc_tf(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx) +{ +DnnDetectContext *ctx = filter_ctx->priv; +int proposal_count; +float conf_threshold = ctx->confidence; +float *conf, *position, *label_id, x0, y0, x1, y1; +int nb_bboxes = 0; +AVFrameSideData *sd; +AVDetectionBBox *bbox; +AVDetectionBBoxHeader *header; + +proposal_count = *(float *)(output[0].data); +conf = output[1].data; +position = output[3].data; +label_id = output[2].data; + +sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES); +if (sd) { +av_log(filter_ctx, AV_LOG_ERROR, "already have dnn bounding boxes in side data.\n"); +return -1; +} + +for (int i = 0; i < proposal_count; ++i) { +if (conf[i] < conf_threshold) +continue; +nb_bboxes++; +} + +if (nb_bboxes == 0) { +av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this frame.\n"); +return 0; +} + +header = av_detection_bbox_create_side_data(frame, nb_bboxes); +if (!header) { +av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data with %d bounding boxes\n", nb_bboxes); +return -1; +} + +av_strlcpy(header->source, ctx->dnnctx.model_filename, sizeof(header->source)); + +for (int i = 0; i < proposal_count; ++i) { +y0 = position[i * 4]; +x0 = position[i * 4 + 1]; +y1 = position[i * 4 + 2]; +x1 = position[i * 4 + 3]; + +bbox = av_get_detection_bbox(header, i); + +if (conf[i] < conf_threshold) { +continue; +} + +bbox->x = (int)(x0 * frame->width); +bbox->w = (int)(x1 * frame->width) - bbox->x; +bbox->y =
[FFmpeg-devel] [PATCH 1/3] lavfi/drawbox: refine code
Extract common code of filter_frame() and drawgrid_filter_frame() to draw_region(). Signed-off-by: Ting Fu --- libavfilter/vf_drawbox.c | 160 ++- 1 file changed, 58 insertions(+), 102 deletions(-) diff --git a/libavfilter/vf_drawbox.c b/libavfilter/vf_drawbox.c index 2794fc2520..95e26191bd 100644 --- a/libavfilter/vf_drawbox.c +++ b/libavfilter/vf_drawbox.c @@ -85,6 +85,61 @@ typedef struct DrawBoxContext { static const int NUM_EXPR_EVALS = 5; +typedef int (*PixelBelongsToRegion)(DrawBoxContext *s, int x, int y); + +#define ASSIGN_THREE_CHANNELS\ +row[0] = frame->data[0] + y * frame->linesize[0]; \ +row[1] = frame->data[1] + (y >> ctx->vsub) * frame->linesize[1]; \ +row[2] = frame->data[2] + (y >> ctx->vsub) * frame->linesize[2]; + +#define ASSIGN_FOUR_CHANNELS \ +ASSIGN_THREE_CHANNELS \ +row[3] = frame->data[3] + y * frame->linesize[3]; + +static void draw_region(AVFrame *frame, DrawBoxContext *ctx, int left, int top, int right, int down, +PixelBelongsToRegion pixel_belongs_to_region) +{ +unsigned char *row[4]; +int x, y; +if (ctx->have_alpha && ctx->replace) { +for (y = top; y < down; y++) { +ASSIGN_FOUR_CHANNELS +if (ctx->invert_color) { +for (x = left; x < right; x++) +if (pixel_belongs_to_region(ctx, x, y)) +row[0][x] = 0xff - row[0][x]; +} else { +for (x = left; x < right; x++) { +if (pixel_belongs_to_region(ctx, x, y)) { +row[0][x ] = ctx->yuv_color[Y]; +row[1][x >> ctx->hsub] = ctx->yuv_color[U]; +row[2][x >> ctx->hsub] = ctx->yuv_color[V]; +row[3][x ] = ctx->yuv_color[A]; +} +} +} +} +} else { +for (y = top; y < down; y++) { +ASSIGN_THREE_CHANNELS +if (ctx->invert_color) { +if (pixel_belongs_to_region(ctx, x, y)) +row[0][x] = 0xff - row[0][x]; +} else { +for (x = left; x < right; x++) { +double alpha = (double)ctx->yuv_color[A] / 255; + +if (pixel_belongs_to_region(ctx, x, y)) { +row[0][x ] = (1 - alpha) * row[0][x ] + alpha * ctx->yuv_color[Y]; +row[1][x >> ctx->hsub] = (1 - alpha) * row[1][x >> ctx->hsub] + alpha * ctx->yuv_color[U]; +row[2][x >> ctx->hsub] = (1 - alpha) * row[2][x >> ctx->hsub] + alpha * ctx->yuv_color[V]; +} +} +} +} +} +} + static av_cold int init(AVFilterContext *ctx) { DrawBoxContext *s = ctx->priv; @@ -217,58 +272,9 @@ static av_pure av_always_inline int pixel_belongs_to_box(DrawBoxContext *s, int static int filter_frame(AVFilterLink *inlink, AVFrame *frame) { DrawBoxContext *s = inlink->dst->priv; -int plane, x, y, xb = s->x, yb = s->y; -unsigned char *row[4]; - -if (s->have_alpha && s->replace) { -for (y = FFMAX(yb, 0); y < frame->height && y < (yb + s->h); y++) { -row[0] = frame->data[0] + y * frame->linesize[0]; -row[3] = frame->data[3] + y * frame->linesize[3]; - -for (plane = 1; plane < 3; plane++) -row[plane] = frame->data[plane] + - frame->linesize[plane] * (y >> s->vsub); - -if (s->invert_color) { -for (x = FFMAX(xb, 0); x < xb + s->w && x < frame->width; x++) -if (pixel_belongs_to_box(s, x, y)) -row[0][x] = 0xff - row[0][x]; -} else { -for (x = FFMAX(xb, 0); x < xb + s->w && x < frame->width; x++) { -if (pixel_belongs_to_box(s, x, y)) { -row[0][x ] = s->yuv_color[Y]; -row[1][x >> s->hsub] = s->yuv_color[U]; -row[2][x >> s->hsub] = s->yuv_color[V]; -row[3][x ] = s->yuv_color[A]; -} -} -} -} -} else { -for (y = FFMAX(yb, 0); y < frame->height && y < (yb + s->h); y++) { -row[0] = frame->data[0] + y * frame->linesize[0]; -for (plane = 1; plane < 3; plane++) -row[plane
[FFmpeg-devel] [PATCH 2/3] libavfilter: vf_drawbox filter support draw box with detection bounding boxes in side_data
This feature can be used with dnn detection by setting vf_drawbox's option box_source=side_data_detection_bboxes, for example: ./ffmpeg -i face.jpeg -vf dnn_detect=dnn_backend=openvino:model=face-detection-adas-0001.xml:\ input=data:output=detection_out:labels=face-detection-adas-0001.label,\ drawbox=box_source=side_data_detection_bboxes -y face_detect.jpeg Signed-off-by: Ting Fu --- doc/filters.texi | 8 +++ libavfilter/vf_drawbox.c | 52 ++-- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/doc/filters.texi b/doc/filters.texi index a218289ddd..f2ac8c4cc8 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -10356,6 +10356,14 @@ The x and y offset coordinates where the box is drawn. @item h The width and height of the drawn box. +@item box_source +Box source can be set as side_data_detection_bboxes if you want to use box data in +detection bboxes of side data. + +If @var{box_source} is set, the @var{x}, @var{y}, @var{width} and @var{height} will be ignored and +still use box data in detection bboxes of side data. So please do not use this parameter if you were +not sure about the box source. + @item t The thickness of the drawn box. diff --git a/libavfilter/vf_drawbox.c b/libavfilter/vf_drawbox.c index 95e26191bd..fff78862e9 100644 --- a/libavfilter/vf_drawbox.c +++ b/libavfilter/vf_drawbox.c @@ -31,6 +31,7 @@ #include "libavutil/eval.h" #include "libavutil/pixdesc.h" #include "libavutil/parseutils.h" +#include "libavutil/detection_bbox.h" #include "avfilter.h" #include "formats.h" #include "internal.h" @@ -79,8 +80,10 @@ typedef struct DrawBoxContext { char *x_expr, *y_expr; ///< expression for x and y char *w_expr, *h_expr; ///< expression for width and height char *t_expr; ///< expression for thickness +char *box_source_string; ///< string for box data source int have_alpha; int replace; +enum AVFrameSideDataType box_source; } DrawBoxContext; static const int NUM_EXPR_EVALS = 5; @@ -140,11 +143,30 @@ static void draw_region(AVFrame *frame, DrawBoxContext *ctx, int left, int top, } } +static enum AVFrameSideDataType box_source_string_parse(const char *box_source_string) +{ +av_assert0(box_source_string); +if (!strcmp(box_source_string, "side_data_detection_bboxes")) { +return AV_FRAME_DATA_DETECTION_BBOXES; +} else { +// will support side_data_regions_of_interest next +return AVERROR(EINVAL); +} +} + static av_cold int init(AVFilterContext *ctx) { DrawBoxContext *s = ctx->priv; uint8_t rgba_color[4]; +if (s->box_source_string) { +s->box_source = box_source_string_parse(s->box_source_string); +if ((int)s->box_source < 0) { +av_log(ctx, AV_LOG_ERROR, "Error box source: %s\n",s->box_source_string); +return AVERROR(EINVAL); +} +} + if (!strcmp(s->color_str, "invert")) s->invert_color = 1; else if (av_parse_color(rgba_color, s->color_str, -1, ctx) < 0) @@ -272,9 +294,34 @@ static av_pure av_always_inline int pixel_belongs_to_box(DrawBoxContext *s, int static int filter_frame(AVFilterLink *inlink, AVFrame *frame) { DrawBoxContext *s = inlink->dst->priv; +const AVDetectionBBoxHeader *header = NULL; +const AVDetectionBBox *bbox; +AVFrameSideData *sd; +int loop = 1; + +if (s->box_source == AV_FRAME_DATA_DETECTION_BBOXES) { +sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES); +if (sd) { +header = (AVDetectionBBoxHeader *)sd->data; +loop = header->nb_bboxes; +} else { +av_log(s, AV_LOG_WARNING, "No detection bboxes.\n"); +return ff_filter_frame(inlink->dst->outputs[0], frame); +} +} -draw_region(frame, s, FFMAX(s->x, 0), FFMAX(s->y, 0), FFMIN(s->x + s->w, frame->width), -FFMIN(s->y + s->h, frame->height), pixel_belongs_to_box); +for (int i = 0; i < loop; i++) { +if (header) { +bbox = av_get_detection_bbox(header, i); +s->y = bbox->y; +s->x = bbox->x; +s->h = bbox->h; +s->w = bbox->w; +} + +draw_region(frame, s, FFMAX(s->x, 0), FFMAX(s->y, 0), FFMIN(s->x + s->w, frame->width), +FFMIN(s->y + s->h, frame->height), pixel_belongs_to_box); +} return ff_filter_frame(inlink->dst->outputs[0], frame); } @@ -329,6 +376,7 @@ static const AVOption drawbox_options[] = { { "thickness", "set the box thickness", OFFSET(t_expr),AV_OPT_TYPE_STRING, { .str="3" }, 0, 0,
[FFmpeg-devel] [PATCH 3/3] libavfilter: vf_drawtext filter support draw text with detection bounding boxes in side_data
This feature can be used with dnn detection by setting vf_drawtext's option text_source=side_data_detection_bboxes, for example: ./ffmpeg -i face.jpeg -vf dnn_detect=dnn_backend=openvino:model=face-detection-adas-0001.xml:\ input=data:output=detection_out:labels=face-detection-adas-0001.label,drawbox=box_source= side_data_detection_bboxes,drawtext=text_source=side_data_detection_bboxes:fontcolor=green:\ fontsize=40, -y face_detect.jpeg Please note, the default fontsize of vf_drawtext is 12, which may be too small to be seen clearly. Signed-off-by: Ting Fu --- doc/filters.texi | 8 libavfilter/vf_drawtext.c | 77 --- 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/doc/filters.texi b/doc/filters.texi index f2ac8c4cc8..d10e6de03d 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -10788,6 +10788,14 @@ parameter @var{text}. If both @var{text} and @var{textfile} are specified, an error is thrown. +@item text_source +Text source should be set as side_data_detection_bboxes if you want to use text data in +detection bboxes of side data. + +If text source is set, @var{text} and @var{textfile} will be ignored and still use +text data in detection bboxes of side data. So please do not use this parameter +if you are not sure about the text source. + @item reload If set to 1, the @var{textfile} will be reloaded before each frame. Be sure to update it atomically, or it may be read partially, or even fail. diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c index 7ea057b812..382d589e26 100644 --- a/libavfilter/vf_drawtext.c +++ b/libavfilter/vf_drawtext.c @@ -55,6 +55,7 @@ #include "libavutil/time_internal.h" #include "libavutil/tree.h" #include "libavutil/lfg.h" +#include "libavutil/detection_bbox.h" #include "avfilter.h" #include "drawutils.h" #include "formats.h" @@ -199,6 +200,8 @@ typedef struct DrawTextContext { int tc24hmax; ///< 1 if timecode is wrapped to 24 hours, 0 otherwise int reload; ///< reload text file for each frame int start_number; ///< starting frame number for n/frame_num var +char *text_source_string; ///< the string to specify text data source +enum AVFrameSideDataType text_source; #if CONFIG_LIBFRIBIDI int text_shaping; ///< 1 to shape the text before drawing it #endif @@ -246,6 +249,7 @@ static const AVOption drawtext_options[]= { { "alpha", "apply alpha while rendering", OFFSET(a_expr), AV_OPT_TYPE_STRING, { .str = "1" }, .flags = FLAGS }, {"fix_bounds", "check and fix text coords to avoid clipping", OFFSET(fix_bounds), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS}, {"start_number", "start frame number for n/frame_num variable", OFFSET(start_number), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS}, +{"text_source", "the source of text", OFFSET(text_source_string), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS }, #if CONFIG_LIBFRIBIDI {"text_shaping", "attempt to shape text before drawing", OFFSET(text_shaping), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS}, @@ -690,6 +694,16 @@ out: } #endif +static enum AVFrameSideDataType text_source_string_parse(const char *text_source_string) +{ +av_assert0(text_source_string); +if (!strcmp(text_source_string, "side_data_detection_bboxes")) { +return AV_FRAME_DATA_DETECTION_BBOXES; +} else { +return AVERROR(EINVAL); +} +} + static av_cold int init(AVFilterContext *ctx) { int err; @@ -731,9 +745,28 @@ static av_cold int init(AVFilterContext *ctx) s->text = av_strdup(""); } +if (s->text_source_string) { +s->text_source = text_source_string_parse(s->text_source_string); +if ((int)s->text_source < 0) { +av_log(ctx, AV_LOG_ERROR, "Error text source: %s\n", s->text_source_string); +return AVERROR(EINVAL); +} +} + +if (s->text_source == AV_FRAME_DATA_DETECTION_BBOXES) { +if (s->text) { +av_log(ctx, AV_LOG_WARNING, "Multiple texts provided, will use text_source only\n"); +av_free(s->text); +} +s->text = av_mallocz(AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE * + (AV_NUM_DETECTION_BBOX_CLASSIFY + 1)); +if (!s->text) +return AVERROR(ENOMEM); +} + if (!s->text) { av_log(ctx, AV_LOG_ERROR, - "Either text, a valid file or a timecode must be provided\n"); + "Either text, a valid file, a timecode or text source must be provided\n"); return AVERROR(EINVAL);
[FFmpeg-devel] [PATCH 1/2] lavfi/vf_drawbox.c: fix CID 1485004
CID 1485004: Uninitialized variables (UNINIT) Using uninitialized value "x" when calling "*pixel_belongs_to_region". Signed-off-by: Ting Fu --- libavfilter/vf_drawbox.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libavfilter/vf_drawbox.c b/libavfilter/vf_drawbox.c index fff78862e9..1e9e028650 100644 --- a/libavfilter/vf_drawbox.c +++ b/libavfilter/vf_drawbox.c @@ -126,8 +126,9 @@ static void draw_region(AVFrame *frame, DrawBoxContext *ctx, int left, int top, for (y = top; y < down; y++) { ASSIGN_THREE_CHANNELS if (ctx->invert_color) { -if (pixel_belongs_to_region(ctx, x, y)) -row[0][x] = 0xff - row[0][x]; +for (x = left; x < right; x++) +if (pixel_belongs_to_region(ctx, x, y)) +row[0][x] = 0xff - row[0][x]; } else { for (x = left; x < right; x++) { double alpha = (double)ctx->yuv_color[A] / 255; -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/2] lavfi/vf_drawtext.c: fix CID 1485003
CID 1485003: Memory - illegal accesses (UNINIT) Using uninitialized value "sd". Signed-off-by: Ting Fu --- libavfilter/vf_drawtext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c index 382d589e26..c4c09894e4 100644 --- a/libavfilter/vf_drawtext.c +++ b/libavfilter/vf_drawtext.c @@ -1554,7 +1554,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) AVFrameSideData *sd; int loop = 1; -if (s->text_source == AV_FRAME_DATA_DETECTION_BBOXES && sd) { +if (s->text_source == AV_FRAME_DATA_DETECTION_BBOXES) { sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES); if (sd) { header = (AVDetectionBBoxHeader *)sd->data; -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/2] dnn/openvino: add input/output name info
show all input/output names when the input or output name not correct Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_openvino.c | 18 +- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 2f0998046a..e5842906d1 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -79,6 +79,7 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input OVModel *ov_model = (OVModel *)model; OVContext *ctx = &ov_model->ctx; char *model_input_name = NULL; +char *all_input_names = NULL; IEStatusCode status; size_t model_input_count = 0; dimensions_t dims; @@ -118,12 +119,15 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input input->width= dims.dims[3]; input->dt = precision_to_datatype(precision); return DNN_SUCCESS; +} else { +//incorrect input name +APPEND_STRING(all_input_names, model_input_name) } ie_network_name_free(&model_input_name); } -av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", model_input_name); +av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model, all input(s) are: \"%s\"\n", input_name, all_input_names); return DNN_ERROR; } @@ -246,12 +250,15 @@ err: DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, DNNData *outputs, const char **output_names, uint32_t nb_output) { +char *model_output_name = NULL; +char *all_output_names = NULL; dimensions_t dims; precision_e precision; ie_blob_buffer_t blob_buffer; OVModel *ov_model = (OVModel *)model->model; OVContext *ctx = &ov_model->ctx; IEStatusCode status = ie_infer_request_infer(ov_model->infer_request); +size_t model_output_count = 0; if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to start synchronous model inference\n"); return DNN_ERROR; @@ -262,7 +269,16 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, DNNData *outputs, c ie_blob_t *output_blob = NULL; status = ie_infer_request_get_blob(ov_model->infer_request, output_name, &output_blob); if (status != OK) { +//incorrect output name av_log(ctx, AV_LOG_ERROR, "Failed to get model output data\n"); +status = ie_network_get_outputs_number(ov_model->network, &model_output_count); +for (size_t i = 0; i < model_output_count; i++) { +status = ie_network_get_output_name(ov_model->network, i, &model_output_name); +APPEND_STRING(all_output_names, model_output_name) +} +av_log(ctx, AV_LOG_ERROR, + "output \"%s\" may not correct, all output(s) are: \"%s\"\n", + output_name, all_output_names); return DNN_ERROR; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] dnn/openvino: support run inference via GPU
for enabling OpenVINO GPU please: 1. install required OpenCL drivers, see: https://github.com/intel/compute-runtime/releases/tag/19.41.14441 2. build OpenVINO c lib with GPU enabled: use cmake config with: -DENABLE_CLDNN=ON 3. then make, and include the OpenVINO c lib in environment variables detailed steps please refer: https://github.com/openvinotoolkit/openvino/blob/master/build-instruction.md inference model with GPU please add: optioins=device=GPU Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_openvino.c | 52 ++ 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 5d6d3ed542..2f0998046a 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -26,10 +26,18 @@ #include "dnn_backend_openvino.h" #include "libavformat/avio.h" #include "libavutil/avassert.h" +#include "libavutil/opt.h" +#include "libavutil/avstring.h" +#include "../internal.h" #include +typedef struct OVOptions{ +char *device_type; +} OVOptions; + typedef struct OVContext { const AVClass *class; +OVOptions options; } OVContext; typedef struct OVModel{ @@ -41,14 +49,19 @@ typedef struct OVModel{ ie_blob_t *input_blob; } OVModel; -static const AVClass dnn_openvino_class = { -.class_name = "dnn_openvino", -.item_name = av_default_item_name, -.option = NULL, -.version= LIBAVUTIL_VERSION_INT, -.category = AV_CLASS_CATEGORY_FILTER, +#define APPEND_STRING(generated_string, iterate_string) \ +generated_string = generated_string ? av_asprintf("%s %s", generated_string, iterate_string) : \ + av_asprintf("%s", iterate_string); + +#define OFFSET(x) offsetof(OVContext, x) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM +static const AVOption dnn_openvino_options[] = { +{ "device", "device to run model", OFFSET(options.device_type), AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS }, +{ NULL } }; +AVFILTER_DEFINE_CLASS(dnn_openvino); + static DNNDataType precision_to_datatype(precision_e precision) { switch (precision) @@ -159,10 +172,13 @@ err: DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options) { +char *all_dev_names = NULL; DNNModel *model = NULL; OVModel *ov_model = NULL; +OVContext *ctx = NULL; IEStatusCode status; ie_config_t config = {NULL, NULL, NULL}; +ie_available_devices_t a_dev; model = av_malloc(sizeof(DNNModel)); if (!model){ @@ -173,6 +189,14 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options) if (!ov_model) goto err; ov_model->ctx.class = &dnn_openvino_class; +ctx = &ov_model->ctx; + +//parse options +av_opt_set_defaults(ctx); +if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) { +av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options); +goto err; +} status = ie_core_create("", &ov_model->core); if (status != OK) @@ -182,9 +206,21 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options) if (status != OK) goto err; -status = ie_core_load_network(ov_model->core, ov_model->network, "CPU", &config, &ov_model->exe_network); -if (status != OK) +status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to init OpenVINO model\n"); +status = ie_core_get_available_devices(ov_model->core, &a_dev); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get available devices\n"); +goto err; +} +for (int i = 0; i < a_dev.num_devices; i++) { +APPEND_STRING(all_dev_names, a_dev.devices[i]) +} +av_log(ctx, AV_LOG_ERROR,"device %s may not be supported, all available devices are: \"%s\"\n", + ctx->options.device_type, all_dev_names); goto err; +} model->model = (void *)ov_model; model->set_input = &set_input_ov; -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] lavfi/dnn: Modify error message for incorrect backend_type
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/dnn/dnn_interface.c b/libavfilter/dnn/dnn_interface.c index 554a36b0dc..fa484c0905 100644 --- a/libavfilter/dnn/dnn_interface.c +++ b/libavfilter/dnn/dnn_interface.c @@ -71,7 +71,7 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type) #endif break; default: -av_log(NULL, AV_LOG_ERROR, "Module backend_type is not native or tensorflow\n"); +av_log(NULL, AV_LOG_ERROR, "Module backend_type is not supported or enabled.\n"); av_freep(&dnn_module); return NULL; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 1/2] lavfi/dnn: Modify error message for incorrect backend_type
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/dnn/dnn_interface.c b/libavfilter/dnn/dnn_interface.c index 554a36b0dc..fa484c0905 100644 --- a/libavfilter/dnn/dnn_interface.c +++ b/libavfilter/dnn/dnn_interface.c @@ -71,7 +71,7 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type) #endif break; default: -av_log(NULL, AV_LOG_ERROR, "Module backend_type is not native or tensorflow\n"); +av_log(NULL, AV_LOG_ERROR, "Module backend_type is not supported or enabled.\n"); av_freep(&dnn_module); return NULL; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V3 1/3] lavfi/dnn: Mark native backend as deprecated
Mark native as deprecated for backed_type option. Modify realted error message. Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_interface.c | 12 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/libavfilter/dnn/dnn_interface.c b/libavfilter/dnn/dnn_interface.c index 554a36b0dc..12d36f7fed 100644 --- a/libavfilter/dnn/dnn_interface.c +++ b/libavfilter/dnn/dnn_interface.c @@ -24,7 +24,6 @@ */ #include "../dnn_interface.h" -#include "dnn_backend_native.h" #include "dnn_backend_tf.h" #include "dnn_backend_openvino.h" #include "libavutil/mem.h" @@ -40,12 +39,9 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type) switch(backend_type){ case DNN_NATIVE: -dnn_module->load_model = &ff_dnn_load_model_native; -dnn_module->execute_model = &ff_dnn_execute_model_native; -dnn_module->get_result = &ff_dnn_get_result_native; -dnn_module->flush = &ff_dnn_flush_native; -dnn_module->free_model = &ff_dnn_free_model_native; -break; +av_log(NULL, AV_LOG_ERROR, "Native backend is deprecated, please use other supported DNN backends.\n"); +av_freep(&dnn_module); +return NULL; case DNN_TF: #if (CONFIG_LIBTENSORFLOW == 1) dnn_module->load_model = &ff_dnn_load_model_tf; @@ -71,7 +67,7 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type) #endif break; default: -av_log(NULL, AV_LOG_ERROR, "Module backend_type is not native or tensorflow\n"); +av_log(NULL, AV_LOG_ERROR, "Module backend_type is not supported or enabled.\n"); av_freep(&dnn_module); return NULL; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V3 2/3] lavfi/dnn: Delete DNN native backend releated tools and docs.
Signed-off-by: Ting Fu --- doc/filters.texi| 43 +- tools/python/convert.py | 56 --- tools/python/convert_from_tensorflow.py | 607 tools/python/convert_header.py | 26 - 4 files changed, 4 insertions(+), 728 deletions(-) delete mode 100644 tools/python/convert.py delete mode 100644 tools/python/convert_from_tensorflow.py delete mode 100644 tools/python/convert_header.py diff --git a/doc/filters.texi b/doc/filters.texi index 9c32339141..797d1c9fe2 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -11222,9 +11222,6 @@ See @url{http://openaccess.thecvf.com/content_ECCV_2018/papers/Xia_Li_Recurrent_ Training as well as model generation scripts are provided in the repository at @url{https://github.com/XueweiMeng/derain_filter.git}. -Native model files (.model) can be generated from TensorFlow model -files (.pb) by using tools/python/convert.py - The filter accepts the following options: @table @option @@ -11245,21 +11242,16 @@ Specify which DNN backend to use for model loading and execution. This option ac the following values: @table @samp -@item native -Native implementation of DNN loading and execution. - @item tensorflow TensorFlow backend. To enable this backend you need to install the TensorFlow for C library (see @url{https://www.tensorflow.org/install/lang_c}) and configure FFmpeg with @code{--enable-libtensorflow} @end table -Default value is @samp{native}. @item model Set path to model file specifying network architecture and its parameters. -Note that different backends use different file formats. TensorFlow and native -backend can load files for only its format. +Note that different backends use different file formats. TensorFlow can load files for only its format. @end table To get full functionality (such as async execution), please use the @ref{dnn_processing} filter. @@ -11583,9 +11575,6 @@ Specify which DNN backend to use for model loading and execution. This option ac the following values: @table @samp -@item native -Native implementation of DNN loading and execution. - @item tensorflow TensorFlow backend. To enable this backend you need to install the TensorFlow for C library (see @@ -11601,14 +11590,9 @@ be needed if the header files and libraries are not installed into system path) @end table -Default value is @samp{native}. - @item model Set path to model file specifying network architecture and its parameters. -Note that different backends use different file formats. TensorFlow, OpenVINO and native -backend can load files for only its format. - -Native model file (.model) can be generated from TensorFlow model file (.pb) by using tools/python/convert.py +Note that different backends use different file formats. TensorFlow, OpenVINO backend can load files for only its format. @item input Set the input name of the dnn network. @@ -11634,12 +11618,6 @@ Remove rain in rgb24 frame with can.pb (see @ref{derain} filter): ./ffmpeg -i rain.jpg -vf format=rgb24,dnn_processing=dnn_backend=tensorflow:model=can.pb:input=x:output=y derain.jpg @end example -@item -Halve the pixel value of the frame with format gray32f: -@example -ffmpeg -i input.jpg -vf format=grayf32,dnn_processing=model=halve_gray_float.model:input=dnn_in:output=dnn_out:dnn_backend=native -y out.native.png -@end example - @item Handle the Y channel with srcnn.pb (see @ref{sr} filter) for frame with yuv420p (planar YUV formats supported): @example @@ -21648,13 +21626,6 @@ Efficient Sub-Pixel Convolutional Neural Network model (ESPCN). See @url{https://arxiv.org/abs/1609.05158}. @end itemize -Training scripts as well as scripts for model file (.pb) saving can be found at -@url{https://github.com/XueweiMeng/sr/tree/sr_dnn_native}. Original repository -is at @url{https://github.com/HighVoltageRocknRoll/sr.git}. - -Native model files (.model) can be generated from TensorFlow model -files (.pb) by using tools/python/convert.py - The filter accepts the following options: @table @option @@ -21663,9 +21634,6 @@ Specify which DNN backend to use for model loading and execution. This option ac the following values: @table @samp -@item native -Native implementation of DNN loading and execution. - @item tensorflow TensorFlow backend. To enable this backend you need to install the TensorFlow for C library (see @@ -21673,13 +21641,10 @@ need to install the TensorFlow for C library (see @code{--enable-libtensorflow} @end table -Default value is @samp{native}. - @item model Set path to model file specifying network architecture and its parameters. -Note that different backends use different file formats. TensorFlow backend -can load files for both formats, while native backend can load files for only -its format. +Note that different backends use different file formats. TensorFlow, OpenVINO backend +can load files for only its format. @item scale_factor Set scale factor
[FFmpeg-devel] [PATCH V4 1/3] lavfi/dnn: Mark native backend as unsupported
Native is deprecated value for backed_type option. Modify realted error message. Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_interface.c | 10 +- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/libavfilter/dnn/dnn_interface.c b/libavfilter/dnn/dnn_interface.c index 554a36b0dc..5b1695a1dd 100644 --- a/libavfilter/dnn/dnn_interface.c +++ b/libavfilter/dnn/dnn_interface.c @@ -24,7 +24,6 @@ */ #include "../dnn_interface.h" -#include "dnn_backend_native.h" #include "dnn_backend_tf.h" #include "dnn_backend_openvino.h" #include "libavutil/mem.h" @@ -39,13 +38,6 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type) } switch(backend_type){ -case DNN_NATIVE: -dnn_module->load_model = &ff_dnn_load_model_native; -dnn_module->execute_model = &ff_dnn_execute_model_native; -dnn_module->get_result = &ff_dnn_get_result_native; -dnn_module->flush = &ff_dnn_flush_native; -dnn_module->free_model = &ff_dnn_free_model_native; -break; case DNN_TF: #if (CONFIG_LIBTENSORFLOW == 1) dnn_module->load_model = &ff_dnn_load_model_tf; @@ -71,7 +63,7 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type) #endif break; default: -av_log(NULL, AV_LOG_ERROR, "Module backend_type is not native or tensorflow\n"); +av_log(NULL, AV_LOG_ERROR, "Module backend_type is not supported or enabled.\n"); av_freep(&dnn_module); return NULL; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V4 2/3] lavfi/dnn: Delete DNN native backend releated tools and docs.
Signed-off-by: Ting Fu --- doc/filters.texi| 43 +- tools/python/convert.py | 56 --- tools/python/convert_from_tensorflow.py | 607 tools/python/convert_header.py | 26 - 4 files changed, 4 insertions(+), 728 deletions(-) delete mode 100644 tools/python/convert.py delete mode 100644 tools/python/convert_from_tensorflow.py delete mode 100644 tools/python/convert_header.py diff --git a/doc/filters.texi b/doc/filters.texi index 9c32339141..797d1c9fe2 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -11222,9 +11222,6 @@ See @url{http://openaccess.thecvf.com/content_ECCV_2018/papers/Xia_Li_Recurrent_ Training as well as model generation scripts are provided in the repository at @url{https://github.com/XueweiMeng/derain_filter.git}. -Native model files (.model) can be generated from TensorFlow model -files (.pb) by using tools/python/convert.py - The filter accepts the following options: @table @option @@ -11245,21 +11242,16 @@ Specify which DNN backend to use for model loading and execution. This option ac the following values: @table @samp -@item native -Native implementation of DNN loading and execution. - @item tensorflow TensorFlow backend. To enable this backend you need to install the TensorFlow for C library (see @url{https://www.tensorflow.org/install/lang_c}) and configure FFmpeg with @code{--enable-libtensorflow} @end table -Default value is @samp{native}. @item model Set path to model file specifying network architecture and its parameters. -Note that different backends use different file formats. TensorFlow and native -backend can load files for only its format. +Note that different backends use different file formats. TensorFlow can load files for only its format. @end table To get full functionality (such as async execution), please use the @ref{dnn_processing} filter. @@ -11583,9 +11575,6 @@ Specify which DNN backend to use for model loading and execution. This option ac the following values: @table @samp -@item native -Native implementation of DNN loading and execution. - @item tensorflow TensorFlow backend. To enable this backend you need to install the TensorFlow for C library (see @@ -11601,14 +11590,9 @@ be needed if the header files and libraries are not installed into system path) @end table -Default value is @samp{native}. - @item model Set path to model file specifying network architecture and its parameters. -Note that different backends use different file formats. TensorFlow, OpenVINO and native -backend can load files for only its format. - -Native model file (.model) can be generated from TensorFlow model file (.pb) by using tools/python/convert.py +Note that different backends use different file formats. TensorFlow, OpenVINO backend can load files for only its format. @item input Set the input name of the dnn network. @@ -11634,12 +11618,6 @@ Remove rain in rgb24 frame with can.pb (see @ref{derain} filter): ./ffmpeg -i rain.jpg -vf format=rgb24,dnn_processing=dnn_backend=tensorflow:model=can.pb:input=x:output=y derain.jpg @end example -@item -Halve the pixel value of the frame with format gray32f: -@example -ffmpeg -i input.jpg -vf format=grayf32,dnn_processing=model=halve_gray_float.model:input=dnn_in:output=dnn_out:dnn_backend=native -y out.native.png -@end example - @item Handle the Y channel with srcnn.pb (see @ref{sr} filter) for frame with yuv420p (planar YUV formats supported): @example @@ -21648,13 +21626,6 @@ Efficient Sub-Pixel Convolutional Neural Network model (ESPCN). See @url{https://arxiv.org/abs/1609.05158}. @end itemize -Training scripts as well as scripts for model file (.pb) saving can be found at -@url{https://github.com/XueweiMeng/sr/tree/sr_dnn_native}. Original repository -is at @url{https://github.com/HighVoltageRocknRoll/sr.git}. - -Native model files (.model) can be generated from TensorFlow model -files (.pb) by using tools/python/convert.py - The filter accepts the following options: @table @option @@ -21663,9 +21634,6 @@ Specify which DNN backend to use for model loading and execution. This option ac the following values: @table @samp -@item native -Native implementation of DNN loading and execution. - @item tensorflow TensorFlow backend. To enable this backend you need to install the TensorFlow for C library (see @@ -21673,13 +21641,10 @@ need to install the TensorFlow for C library (see @code{--enable-libtensorflow} @end table -Default value is @samp{native}. - @item model Set path to model file specifying network architecture and its parameters. -Note that different backends use different file formats. TensorFlow backend -can load files for both formats, while native backend can load files for only -its format. +Note that different backends use different file formats. TensorFlow, OpenVINO backend +can load files for only its format. @item scale_factor Set scale factor
[FFmpeg-devel] [PATCH] dnn: add NV12 pixel format support
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_io_proc.c | 2 ++ libavfilter/vf_dnn_processing.c | 30 +- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c index c9b49be3bd..2744cb6502 100644 --- a/libavfilter/dnn/dnn_io_proc.c +++ b/libavfilter/dnn/dnn_io_proc.c @@ -64,6 +64,7 @@ DNNReturnType proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ case AV_PIX_FMT_YUV410P: case AV_PIX_FMT_YUV411P: case AV_PIX_FMT_GRAY8: +case AV_PIX_FMT_NV12: sws_ctx = sws_getContext(frame->width, frame->height, AV_PIX_FMT_GRAYF32, @@ -135,6 +136,7 @@ DNNReturnType proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_c case AV_PIX_FMT_YUV410P: case AV_PIX_FMT_YUV411P: case AV_PIX_FMT_GRAY8: +case AV_PIX_FMT_NV12: sws_ctx = sws_getContext(frame->width, frame->height, AV_PIX_FMT_GRAY8, diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c index 334243bd2b..76fd2e88db 100644 --- a/libavfilter/vf_dnn_processing.c +++ b/libavfilter/vf_dnn_processing.c @@ -113,6 +113,7 @@ static int query_formats(AVFilterContext *context) AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAYF32, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, +AV_PIX_FMT_NV12, AV_PIX_FMT_NONE }; AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); @@ -161,6 +162,7 @@ static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLin case AV_PIX_FMT_YUV444P: case AV_PIX_FMT_YUV410P: case AV_PIX_FMT_YUV411P: +case AV_PIX_FMT_NV12: if (model_input->channels != 1) { LOG_FORMAT_CHANNEL_MISMATCH(); return AVERROR(EIO); @@ -212,15 +214,22 @@ static int prepare_uv_scale(AVFilterLink *outlink) if (isPlanarYUV(fmt)) { if (inlink->w != outlink->w || inlink->h != outlink->h) { -const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); -int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); -int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); -int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h); -int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w); -ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8, - sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8, - SWS_BICUBIC, NULL, NULL, NULL); -ctx->sws_uv_height = sws_src_h; +if (fmt == AV_PIX_FMT_NV12) { +ctx->sws_uv_scale = sws_getContext(inlink->w >> 1, inlink->h >> 1, AV_PIX_FMT_YA8, + outlink->w >> 1, outlink->h >> 1, AV_PIX_FMT_YA8, + SWS_BICUBIC, NULL, NULL, NULL); +ctx->sws_uv_height = inlink->h >> 1; +} else { +const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); +int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); +int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); +int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h); +int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w); +ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8, + sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8, + SWS_BICUBIC, NULL, NULL, NULL); +ctx->sws_uv_height = sws_src_h; +} } } @@ -262,6 +271,9 @@ static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame in->data[i], in->linesize[i], bytewidth, uv_height); } +} else if (in->format == AV_PIX_FMT_NV12) { +sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1, + 0, ctx->sws_uv_height, out->data + 1, out->linesize + 1); } else { sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1, 0, ctx->sws_uv_height, out->data + 1, out->linesize + 1); -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/3] dnn/openvino: remove unnecessary code
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_openvino.c | 8 1 file changed, 8 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index d27e451eea..050be97209 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -284,14 +284,6 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input return DNN_ERROR; } -// The order of dims in the openvino is fixed and it is always NCHW for 4-D data. -// while we pass NHWC data from FFmpeg to openvino -status = ie_network_set_input_layout(ov_model->network, input_name, NHWC); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Input \"%s\" does not match layout NHWC\n", input_name); -return DNN_ERROR; -} - input->channels = dims.dims[1]; input->height = dims.dims[2]; input->width= dims.dims[3]; -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/3] dnn/openvino: refine code for better model initialization
Move openvino model/inference request creation and initialization steps from ff_dnn_load_model_ov to new function init_model_ov, for later input resize support. Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_openvino.c | 153 +++-- 1 file changed, 93 insertions(+), 60 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 050be97209..d6e0593a0b 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -217,6 +217,78 @@ static void infer_completion_callback(void *args) task->done = 1; } +static DNNReturnType init_model_ov(OVModel *ov_model) +{ +OVContext *ctx = &ov_model->ctx; +IEStatusCode status; +ie_available_devices_t a_dev; +ie_config_t config = {NULL, NULL, NULL}; +char *all_dev_names = NULL; + +status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n"); +status = ie_core_get_available_devices(ov_model->core, &a_dev); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get available devices\n"); +goto err; +} +for (int i = 0; i < a_dev.num_devices; i++) { +APPEND_STRING(all_dev_names, a_dev.devices[i]) +} +av_log(ctx, AV_LOG_ERROR,"device %s may not be supported, all available devices are: \"%s\"\n", + ctx->options.device_type, all_dev_names); +goto err; +} + +// create infer_request for sync execution +status = ie_exec_network_create_infer_request(ov_model->exe_network, &ov_model->infer_request); +if (status != OK) +goto err; + +// create infer_requests for async execution +if (ctx->options.nireq <= 0) { +// the default value is a rough estimation +ctx->options.nireq = av_cpu_count() / 2 + 1; +} + +ov_model->request_queue = ff_safe_queue_create(); +if (!ov_model->request_queue) { +goto err; +} + +for (int i = 0; i < ctx->options.nireq; i++) { +ie_infer_request_t *request; +RequestItem *item = av_mallocz(sizeof(*item)); +if (!item) { +goto err; +} +status = ie_exec_network_create_infer_request(ov_model->exe_network, &request); +if (status != OK) { +av_freep(&item); +goto err; +} +item->infer_request = request; +item->callback.completeCallBackFunc = infer_completion_callback; +item->callback.args = item; +if (ff_safe_queue_push_back(ov_model->request_queue, item) < 0) { +av_freep(&item); +goto err; +} +} + +ov_model->task_queue = ff_queue_create(); +if (!ov_model->task_queue) { +goto err; +} + +return DNN_SUCCESS; + +err: +ff_dnn_free_model_ov(&ov_model->model); +return DNN_ERROR; +} + static DNNReturnType execute_model_ov(TaskItem *task, RequestItem *request) { IEStatusCode status; @@ -325,6 +397,13 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu in_frame->width = input_width; in_frame->height = input_height; +if (!ov_model->exe_network) { +if (init_model_ov(ov_model) != DNN_SUCCESS) { +av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network or inference request\n"); +return DNN_ERROR; +}; +} + task.done = 0; task.do_ioproc = 0; task.async = 0; @@ -347,13 +426,10 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, AVFilterContext *filter_ctx) { -char *all_dev_names = NULL; DNNModel *model = NULL; OVModel *ov_model = NULL; OVContext *ctx = NULL; IEStatusCode status; -ie_config_t config = {NULL, NULL, NULL}; -ie_available_devices_t a_dev; model = av_mallocz(sizeof(DNNModel)); if (!model){ @@ -385,63 +461,6 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, if (status != OK) goto err; -status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Failed to init OpenVINO model\n"); -status = ie_core_get_available_devices(ov_model->core, &a_dev); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Failed to get available devices\n"); -goto err; -} -for (int i = 0; i &
[FFmpeg-devel] [PATCH 3/3] dnn/openvino: support model input resize
OpenVINO APIs require specify input size to run the model, while some OpenVINO model does accept different input size. To enable this feature adding input_resizable option here for easier use. Setting bool variable input_resizable to specify if the input can be resizable or not. input_resizable = 1 means support input resize, aka accept different input size. input_resizable = 0 (default) means do not support input resize. Please make sure the inference model does accept different input size before use this option, otherwise the inference engine may report error(s). eg: ./ffmpeg -i video_name.mp4 -vf dnn_processing=dnn_backend=openvino:\ model=model_name.xml:input=input_name:output=output_name:\ options=device=CPU\&input_resizable=1 -y output_video_name.mp4 Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_openvino.c | 21 +++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index d6e0593a0b..65d74702ff 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -37,6 +37,7 @@ typedef struct OVOptions{ char *device_type; int nireq; +int input_resizable; } OVOptions; typedef struct OVContext { @@ -83,6 +84,7 @@ typedef struct RequestItem { static const AVOption dnn_openvino_options[] = { { "device", "device to run model", OFFSET(options.device_type), AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS }, { "nireq", "number of request", OFFSET(options.nireq), AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, INT_MAX, FLAGS }, +{ "input_resizable", "can input be resizable or not", OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, { NULL } }; @@ -334,6 +336,7 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input size_t model_input_count = 0; dimensions_t dims; precision_e precision; +int input_resizable = ctx->options.input_resizable; status = ie_network_get_inputs_number(ov_model->network, &model_input_count); if (status != OK) { @@ -357,8 +360,8 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input } input->channels = dims.dims[1]; -input->height = dims.dims[2]; -input->width= dims.dims[3]; +input->height = input_resizable ? -1 : dims.dims[2]; +input->width= input_resizable ? -1 : dims.dims[3]; input->dt = precision_to_datatype(precision); return DNN_SUCCESS; } else { @@ -383,6 +386,8 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu RequestItem request; AVFrame *in_frame = av_frame_alloc(); AVFrame *out_frame = NULL; +IEStatusCode status; +input_shapes_t input_shapes; if (!in_frame) { av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input frame\n"); @@ -397,6 +402,18 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu in_frame->width = input_width; in_frame->height = input_height; +if (ctx->options.input_resizable) { +status = ie_network_get_input_shapes(ov_model->network, &input_shapes); +input_shapes.shapes->shape.dims[2] = input_height; +input_shapes.shapes->shape.dims[3] = input_width; +status |= ie_network_reshape(ov_model->network, input_shapes); +ie_network_input_shapes_free(&input_shapes); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to reshape input size for %s\n", input_name); +return DNN_ERROR; +} +} + if (!ov_model->exe_network) { if (init_model_ov(ov_model) != DNN_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network or inference request\n"); -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 3/3] dnn/openvino: support model input resize
OpenVINO APIs require specify input size to run the model, while some OpenVINO model does accept different input size. To enable this feature adding input_resizable option here for easier use. Setting bool variable input_resizable to specify if the input can be resizable or not. input_resizable = 1 means support input resize, aka accept different input size. input_resizable = 0 (default) means do not support input resize. Please make sure the inference model does accept different input size before use this option, otherwise the inference engine may report error(s). eg: ./ffmpeg -i video_name.mp4 -vf dnn_processing=dnn_backend=openvino:\ model=model_name.xml:input=input_name:output=output_name:\ options=device=CPU\&input_resizable=1 -y output_video_name.mp4 Signed-off-by: Ting Fu --- V2: rebase to latest code libavfilter/dnn/dnn_backend_openvino.c | 21 +++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 0b125eef65..1664ff5268 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -38,6 +38,7 @@ typedef struct OVOptions{ char *device_type; int nireq; int batch_size; +int input_resizable; } OVOptions; typedef struct OVContext { @@ -86,6 +87,7 @@ static const AVOption dnn_openvino_options[] = { { "device", "device to run model", OFFSET(options.device_type), AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS }, { "nireq", "number of request", OFFSET(options.nireq), AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, INT_MAX, FLAGS }, { "batch_size", "batch size per request", OFFSET(options.batch_size), AV_OPT_TYPE_INT,{ .i64 = 1 }, 1, 1000, FLAGS}, +{ "input_resizable", "can input be resizable or not", OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, { NULL } }; @@ -393,6 +395,7 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input size_t model_input_count = 0; dimensions_t dims; precision_e precision; +int input_resizable = ctx->options.input_resizable; status = ie_network_get_inputs_number(ov_model->network, &model_input_count); if (status != OK) { @@ -416,8 +419,8 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input } input->channels = dims.dims[1]; -input->height = dims.dims[2]; -input->width= dims.dims[3]; +input->height = input_resizable ? -1 : dims.dims[2]; +input->width= input_resizable ? -1 : dims.dims[3]; input->dt = precision_to_datatype(precision); return DNN_SUCCESS; } else { @@ -443,6 +446,8 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu AVFrame *in_frame = av_frame_alloc(); AVFrame *out_frame = NULL; TaskItem *ptask = &task; +IEStatusCode status; +input_shapes_t input_shapes; if (!in_frame) { av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input frame\n"); @@ -457,6 +462,18 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu in_frame->width = input_width; in_frame->height = input_height; +if (ctx->options.input_resizable) { +status = ie_network_get_input_shapes(ov_model->network, &input_shapes); +input_shapes.shapes->shape.dims[2] = input_height; +input_shapes.shapes->shape.dims[3] = input_width; +status |= ie_network_reshape(ov_model->network, input_shapes); +ie_network_input_shapes_free(&input_shapes); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to reshape input size for %s\n", input_name); +return DNN_ERROR; +} +} + if (!ov_model->exe_network) { if (init_model_ov(ov_model) != DNN_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network or inference request\n"); -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 1/3] dnn/openvino: remove unnecessary code
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_openvino.c | 8 1 file changed, 8 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 5271d1caa5..8476f4fb38 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -325,14 +325,6 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input return DNN_ERROR; } -// The order of dims in the openvino is fixed and it is always NCHW for 4-D data. -// while we pass NHWC data from FFmpeg to openvino -status = ie_network_set_input_layout(ov_model->network, input_name, NHWC); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Input \"%s\" does not match layout NHWC\n", input_name); -return DNN_ERROR; -} - input->channels = dims.dims[1]; input->height = dims.dims[2]; input->width= dims.dims[3]; -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 2/3] dnn/openvino: refine code for better model initialization
Move openvino model/inference request creation and initialization steps from ff_dnn_load_model_ov to new function init_model_ov, for later input resize support. Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_openvino.c | 196 ++--- 1 file changed, 111 insertions(+), 85 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 8476f4fb38..0b125eef65 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -248,6 +248,96 @@ static void infer_completion_callback(void *args) } } +static DNNReturnType init_model_ov(OVModel *ov_model) +{ +OVContext *ctx = &ov_model->ctx; +IEStatusCode status; +ie_available_devices_t a_dev; +ie_config_t config = {NULL, NULL, NULL}; +char *all_dev_names = NULL; + +// batch size +if (ctx->options.batch_size <= 0) { +ctx->options.batch_size = 1; +} + +if (ctx->options.batch_size > 1) { +input_shapes_t input_shapes; +status = ie_network_get_input_shapes(ov_model->network, &input_shapes); +if (status != OK) +goto err; +for (int i = 0; i < input_shapes.shape_num; i++) +input_shapes.shapes[i].shape.dims[0] = ctx->options.batch_size; +status = ie_network_reshape(ov_model->network, input_shapes); +ie_network_input_shapes_free(&input_shapes); +if (status != OK) +goto err; +} + +status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n"); +status = ie_core_get_available_devices(ov_model->core, &a_dev); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get available devices\n"); +goto err; +} +for (int i = 0; i < a_dev.num_devices; i++) { +APPEND_STRING(all_dev_names, a_dev.devices[i]) +} +av_log(ctx, AV_LOG_ERROR,"device %s may not be supported, all available devices are: \"%s\"\n", + ctx->options.device_type, all_dev_names); +goto err; +} + +// create infer_request for sync execution +status = ie_exec_network_create_infer_request(ov_model->exe_network, &ov_model->infer_request); +if (status != OK) +goto err; + +// create infer_requests for async execution +if (ctx->options.nireq <= 0) { +// the default value is a rough estimation +ctx->options.nireq = av_cpu_count() / 2 + 1; +} + +ov_model->request_queue = ff_safe_queue_create(); +if (!ov_model->request_queue) { +goto err; +} + +for (int i = 0; i < ctx->options.nireq; i++) { +ie_infer_request_t *request; +RequestItem *item = av_mallocz(sizeof(*item)); +if (!item) { +goto err; +} +status = ie_exec_network_create_infer_request(ov_model->exe_network, &request); +if (status != OK) { +av_freep(&item); +goto err; +} +item->infer_request = request; +item->callback.completeCallBackFunc = infer_completion_callback; +item->callback.args = item; +if (ff_safe_queue_push_back(ov_model->request_queue, item) < 0) { +av_freep(&item); +goto err; +} +} + +ov_model->task_queue = ff_queue_create(); +if (!ov_model->task_queue) { +goto err; +} + +return DNN_SUCCESS; + +err: +ff_dnn_free_model_ov(&ov_model->model); +return DNN_ERROR; +} + static DNNReturnType execute_model_ov(RequestItem *request) { IEStatusCode status; @@ -367,6 +457,13 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu in_frame->width = input_width; in_frame->height = input_height; +if (!ov_model->exe_network) { +if (init_model_ov(ov_model) != DNN_SUCCESS) { +av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network or inference request\n"); +return DNN_ERROR; +}; +} + task.done = 0; task.do_ioproc = 0; task.async = 0; @@ -391,13 +488,10 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, AVFilterContext *filter_ctx) { -char *all_dev_names = NULL; DNNModel *model = NULL; OVModel *ov_model = NULL; OVContext *ctx = NULL; IEStatusCode status; -ie_config_t config = {NULL, NULL, NULL}; -ie_available_devices_t a_dev; model = av_mallocz(sizeof(DNNModel)); if (!model){ @@ -429,88
[FFmpeg-devel] [PATCH V3 1/3] dnn/openvino: remove unnecessary code
Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_openvino.c | 8 1 file changed, 8 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 5271d1caa5..8476f4fb38 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -325,14 +325,6 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input return DNN_ERROR; } -// The order of dims in the openvino is fixed and it is always NCHW for 4-D data. -// while we pass NHWC data from FFmpeg to openvino -status = ie_network_set_input_layout(ov_model->network, input_name, NHWC); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Input \"%s\" does not match layout NHWC\n", input_name); -return DNN_ERROR; -} - input->channels = dims.dims[1]; input->height = dims.dims[2]; input->width= dims.dims[3]; -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V3 2/3] dnn/openvino: refine code for better model initialization
Move openvino model/inference request creation and initialization steps from ff_dnn_load_model_ov to new function init_model_ov, for later input resize support. Signed-off-by: Ting Fu --- libavfilter/dnn/dnn_backend_openvino.c | 203 ++--- 1 file changed, 118 insertions(+), 85 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 8476f4fb38..ecfd2b3f36 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -248,6 +248,103 @@ static void infer_completion_callback(void *args) } } +static DNNReturnType init_model_ov(OVModel *ov_model) +{ +OVContext *ctx = &ov_model->ctx; +IEStatusCode status; +ie_available_devices_t a_dev; +ie_config_t config = {NULL, NULL, NULL}; +char *all_dev_names = NULL; + +// batch size +if (ctx->options.batch_size <= 0) { +ctx->options.batch_size = 1; +} + +if (ctx->options.batch_size > 1) { +input_shapes_t input_shapes; +status = ie_network_get_input_shapes(ov_model->network, &input_shapes); +if (status != OK) +goto err; +for (int i = 0; i < input_shapes.shape_num; i++) +input_shapes.shapes[i].shape.dims[0] = ctx->options.batch_size; +status = ie_network_reshape(ov_model->network, input_shapes); +ie_network_input_shapes_free(&input_shapes); +if (status != OK) +goto err; +} + +status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n"); +status = ie_core_get_available_devices(ov_model->core, &a_dev); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get available devices\n"); +goto err; +} +for (int i = 0; i < a_dev.num_devices; i++) { +APPEND_STRING(all_dev_names, a_dev.devices[i]) +} +av_log(ctx, AV_LOG_ERROR,"device %s may not be supported, all available devices are: \"%s\"\n", + ctx->options.device_type, all_dev_names); +goto err; +} + +// create infer_request for sync execution +status = ie_exec_network_create_infer_request(ov_model->exe_network, &ov_model->infer_request); +if (status != OK) +goto err; + +// create infer_requests for async execution +if (ctx->options.nireq <= 0) { +// the default value is a rough estimation +ctx->options.nireq = av_cpu_count() / 2 + 1; +} + +ov_model->request_queue = ff_safe_queue_create(); +if (!ov_model->request_queue) { +goto err; +} + +for (int i = 0; i < ctx->options.nireq; i++) { +RequestItem *item = av_mallocz(sizeof(*item)); +if (!item) { +goto err; +} + +status = ie_exec_network_create_infer_request(ov_model->exe_network, &item->infer_request); +if (status != OK) { +av_freep(&item); +goto err; +} + +item->tasks = av_malloc_array(ctx->options.batch_size, sizeof(*item->tasks)); +if (!item->tasks) { +av_freep(&item); +goto err; +} +item->task_count = 0; + +item->callback.completeCallBackFunc = infer_completion_callback; +item->callback.args = item; +if (ff_safe_queue_push_back(ov_model->request_queue, item) < 0) { +av_freep(&item); +goto err; +} +} + +ov_model->task_queue = ff_queue_create(); +if (!ov_model->task_queue) { +goto err; +} + +return DNN_SUCCESS; + +err: +ff_dnn_free_model_ov(&ov_model->model); +return DNN_ERROR; +} + static DNNReturnType execute_model_ov(RequestItem *request) { IEStatusCode status; @@ -367,6 +464,13 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu in_frame->width = input_width; in_frame->height = input_height; +if (!ov_model->exe_network) { +if (init_model_ov(ov_model) != DNN_SUCCESS) { +av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network or inference request\n"); +return DNN_ERROR; +}; +} + task.done = 0; task.do_ioproc = 0; task.async = 0; @@ -391,13 +495,10 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, AVFilterContext *filter_ctx) { -char *all_dev_names = NULL; DNNModel *model = NULL; OVModel *ov_model = NULL; OVContext *ctx = NULL; IEStatusCode sta
[FFmpeg-devel] [PATCH V3 3/3] dnn/openvino: support model input resize
OpenVINO APIs require specify input size to run the model, while some OpenVINO model does accept different input size. To enable this feature adding input_resizable option here for easier use. Setting bool variable input_resizable to specify if the input can be resizable or not. input_resizable = 1 means support input resize, aka accept different input size. input_resizable = 0 (default) means do not support input resize. Please make sure the inference model does accept different input size before use this option, otherwise the inference engine may report error(s). eg: ./ffmpeg -i video_name.mp4 -vf dnn_processing=dnn_backend=openvino:\ model=model_name.xml:input=input_name:output=output_name:\ options=device=CPU\&input_resizable=1 -y output_video_name.mp4 Signed-off-by: Ting Fu --- V3: rebase to latest code and add missing code libavfilter/dnn/dnn_backend_openvino.c | 21 +++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index ecfd2b3f36..8a7abb33f0 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -38,6 +38,7 @@ typedef struct OVOptions{ char *device_type; int nireq; int batch_size; +int input_resizable; } OVOptions; typedef struct OVContext { @@ -86,6 +87,7 @@ static const AVOption dnn_openvino_options[] = { { "device", "device to run model", OFFSET(options.device_type), AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS }, { "nireq", "number of request", OFFSET(options.nireq), AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, INT_MAX, FLAGS }, { "batch_size", "batch size per request", OFFSET(options.batch_size), AV_OPT_TYPE_INT,{ .i64 = 1 }, 1, 1000, FLAGS}, +{ "input_resizable", "can input be resizable or not", OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, { NULL } }; @@ -400,6 +402,7 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input size_t model_input_count = 0; dimensions_t dims; precision_e precision; +int input_resizable = ctx->options.input_resizable; status = ie_network_get_inputs_number(ov_model->network, &model_input_count); if (status != OK) { @@ -423,8 +426,8 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input } input->channels = dims.dims[1]; -input->height = dims.dims[2]; -input->width= dims.dims[3]; +input->height = input_resizable ? -1 : dims.dims[2]; +input->width= input_resizable ? -1 : dims.dims[3]; input->dt = precision_to_datatype(precision); return DNN_SUCCESS; } else { @@ -450,6 +453,8 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu AVFrame *in_frame = av_frame_alloc(); AVFrame *out_frame = NULL; TaskItem *ptask = &task; +IEStatusCode status; +input_shapes_t input_shapes; if (!in_frame) { av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input frame\n"); @@ -464,6 +469,18 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu in_frame->width = input_width; in_frame->height = input_height; +if (ctx->options.input_resizable) { +status = ie_network_get_input_shapes(ov_model->network, &input_shapes); +input_shapes.shapes->shape.dims[2] = input_height; +input_shapes.shapes->shape.dims[3] = input_width; +status |= ie_network_reshape(ov_model->network, input_shapes); +ie_network_input_shapes_free(&input_shapes); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to reshape input size for %s\n", input_name); +return DNN_ERROR; +} +} + if (!ov_model->exe_network) { if (init_model_ov(ov_model) != DNN_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network or inference request\n"); -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] tests/dnn/mathunary: fix the issue of NAN
When one of output[i] & expected_output is NAN, the unit test will always pass. Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index bf77c44bbe..f251447771 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -74,7 +74,8 @@ static int test(DNNMathUnaryOperation op) output = operands[1].data; for (int i = 0; i < sizeof(input) / sizeof(float); ++i) { float expected_output = get_expected(input[i], op); -if(fabs(output[i] - expected_output) > EPS) { +if ((isnan(output[i]) ^ isnan(expected_output)) || +fabs(output[i] - expected_output) > EPS) { printf("at index %d, output: %f, expected_output: %f\n", i, output[i], expected_output); av_freep(&output); return 1; -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2] tests/dnn/mathunary: fix the issue of NAN
When one of output[i] & expected_output is NAN, the unit test will always pass. Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index 683e623d95..70c6a43f95 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -86,7 +86,8 @@ static int test(DNNMathUnaryOperation op) output = operands[1].data; for (int i = 0; i < sizeof(input) / sizeof(float); ++i) { float expected_output = get_expected(input[i], op); -if(fabs(output[i] - expected_output) > EPS) { +if ((!isnan(output[i]) && !isnan(expected_output) && fabs(output[i] - expected_output) > EPS) || +(isnan(output[i]) && !isnan(expected_output)) || (!isnan(output[i]) && isnan(expected_output))) { printf("at index %d, output: %f, expected_output: %f\n", i, output[i], expected_output); av_freep(&output); return 1; -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V3] tests/dnn/mathunary: fix the issue of NAN
When one of output[i] & expected_output is NAN, the unit test will always pass. Signed-off-by: Ting Fu --- tests/dnn/dnn-layer-mathunary-test.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/dnn/dnn-layer-mathunary-test.c b/tests/dnn/dnn-layer-mathunary-test.c index 683e623d95..5afc5c157e 100644 --- a/tests/dnn/dnn-layer-mathunary-test.c +++ b/tests/dnn/dnn-layer-mathunary-test.c @@ -86,7 +86,10 @@ static int test(DNNMathUnaryOperation op) output = operands[1].data; for (int i = 0; i < sizeof(input) / sizeof(float); ++i) { float expected_output = get_expected(input[i], op); -if(fabs(output[i] - expected_output) > EPS) { +int output_nan = isnan(output[i]); +int expected_nan = isnan(expected_output); +if ((!output_nan && !expected_nan && fabs(output[i] - expected_output) > EPS) || +(output_nan && !expected_nan) || (!output_nan && expected_nan)) { printf("at index %d, output: %f, expected_output: %f\n", i, output[i], expected_output); av_freep(&output); return 1; -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".