On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote:
Note that optimised implementations of these functions will be taken
into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
are *not* overloaded by existing optimisations.
---
This adds the plus ones back, saving two branch instructions in C and
one in assembler (at the cost of two unconditional adds).
See my reply in the previous version. Not sure if it will help with this.
---
libavcodec/h263dsp.c | 26 ++++++++++++++++++++++++++
libavcodec/h263dsp.h | 4 ++++
2 files changed, 30 insertions(+)
diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
index 6a13353499..f4523a68c1 100644
--- a/libavcodec/h263dsp.c
+++ b/libavcodec/h263dsp.c
@@ -19,10 +19,34 @@
#include <stdint.h>
#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
#include "libavutil/common.h"
#include "config.h"
#include "h263dsp.h"
+static void h263_dct_unquantize_inter_c(int16_t *block, size_t len,
+ int qmul, int qadd)
+{
+ for (size_t i = 0; i < len; i++) {
+ int level = block[i];
+
+ if (level) {
+ if (level < 0)
+ level = level * qmul - qadd;
+ else
+ level = level * qmul + qadd;
+ block[i] = level;
+ }
+ }
+}
+
+static void h263_dct_unquantize_intra_c(int16_t *block, size_t len,
+ int qmul, int qadd)
+{
+ av_assert1(len >= 1);
+ h263_dct_unquantize_inter_c(block + 1, len - 1, qmul, qadd);
+}
+
const uint8_t ff_h263_loop_filter_strength[32] = {
0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 7,
7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12
@@ -116,6 +140,8 @@ static void h263_v_loop_filter_c(uint8_t *src, int stride,
int qscale)
av_cold void ff_h263dsp_init(H263DSPContext *ctx)
{
+ ctx->h263_dct_unquantize_intra = h263_dct_unquantize_intra_c;
+ ctx->h263_dct_unquantize_inter = h263_dct_unquantize_inter_c;
ctx->h263_h_loop_filter = h263_h_loop_filter_c;
ctx->h263_v_loop_filter = h263_v_loop_filter_c;
diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h
index 2dccd23392..0ecbe83314 100644
--- a/libavcodec/h263dsp.h
+++ b/libavcodec/h263dsp.h
@@ -24,6 +24,10 @@
extern const uint8_t ff_h263_loop_filter_strength[32];
typedef struct H263DSPContext {
+ void (*h263_dct_unquantize_intra)(int16_t *block /* align 16 */,
+ size_t len, int mul, int add);
+ void (*h263_dct_unquantize_inter)(int16_t *block /* align 16 */,
+ size_t len, int mul, int add);
void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
} H263DSPContext;
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".