On Sun, Oct 18, 2015 at 10:57 AM, Hendrik Leppkes <h.lepp...@gmail.com> wrote: > On Sun, Oct 18, 2015 at 4:47 PM, Ganesh Ajjanagadde > <gajjanaga...@gmail.com> wrote: >> Commit e11e32686fdb21aded1ccf70202f1fffe87bb6a2 explains why replacing >> qsort with AV_QSORT yields performance improvements. >> >> This replaces all existing uses of libc's qsort with AV_QSORT. >> >> Benchmarks deemed unnecessary due to existing claims about AV_QSORT. >> Tested with FATE. > > Claims are nice, data is better. Some testing when doing performance > improvements shouldn't be too much to ask. :)
Just look at the asm, all cmp callbacks are inlined, removing function call overhead. I personally don't care beyond that. > >> >> Signed-off-by: Ganesh Ajjanagadde <gajjanaga...@gmail.com> >> --- >> cmdutils.c | 3 ++- >> cmdutils_opencl.c | 3 ++- >> ffmpeg.c | 3 ++- >> libavcodec/aacsbr_template.c | 14 ++++++++------ >> libavcodec/huffman.c | 3 ++- >> libavcodec/motion_est_template.c | 3 ++- >> libavcodec/utvideodec.c | 4 ++-- >> libavcodec/utvideoenc.c | 5 +++-- >> libavfilter/f_sendcmd.c | 3 ++- >> libavfilter/vf_deshake.c | 3 ++- >> libavfilter/vf_palettegen.c | 2 +- >> libavfilter/vf_paletteuse.c | 2 +- >> libavfilter/vf_removegrain.c | 7 ++++--- >> libavformat/subtitles.c | 10 +++++++--- >> libswresample/swresample-test.c | 3 ++- >> tests/checkasm/checkasm.c | 4 ++-- >> 16 files changed, 44 insertions(+), 28 deletions(-) >> >> diff --git a/cmdutils.c b/cmdutils.c >> index 9cdc801..c7e515e 100644 >> --- a/cmdutils.c >> +++ b/cmdutils.c >> @@ -47,6 +47,7 @@ >> #include "libavutil/libm.h" >> #include "libavutil/parseutils.h" >> #include "libavutil/pixdesc.h" >> +#include "libavutil/qsort.h" >> #include "libavutil/eval.h" >> #include "libavutil/dict.h" >> #include "libavutil/opt.h" >> @@ -1441,7 +1442,7 @@ static unsigned get_codecs_sorted(const >> AVCodecDescriptor ***rcodecs) >> while ((desc = avcodec_descriptor_next(desc))) >> codecs[i++] = desc; >> av_assert0(i == nb_codecs); >> - qsort(codecs, nb_codecs, sizeof(*codecs), compare_codec_desc); >> + AV_QSORT(codecs, nb_codecs, AVCodecDescriptor *, compare_codec_desc); >> *rcodecs = codecs; >> return nb_codecs; >> } >> diff --git a/cmdutils_opencl.c b/cmdutils_opencl.c >> index 61478e2..f6a4ddf 100644 >> --- a/cmdutils_opencl.c >> +++ b/cmdutils_opencl.c >> @@ -22,6 +22,7 @@ >> #include "libavutil/time.h" >> #include "libavutil/log.h" >> #include "libavutil/opencl.h" >> +#include "libavutil/qsort.h" >> #include "libavutil/avstring.h" >> #include "cmdutils.h" >> >> @@ -245,7 +246,7 @@ int opt_opencl_bench(void *optctx, const char *opt, >> const char *arg) >> } >> } >> } >> - qsort(devices, count, sizeof(OpenCLDeviceBenchmark), >> compare_ocl_device_desc); >> + AV_QSORT(devices, count, OpenCLDeviceBenchmark, >> compare_ocl_device_desc); >> fprintf(stderr, "platform_idx\tdevice_idx\tdevice_name\truntime\n"); >> for (i = 0; i < count; i++) >> fprintf(stdout, "%d\t%d\t%s\t%"PRId64"\n", >> diff --git a/ffmpeg.c b/ffmpeg.c >> index 36a68fb..fa6132b 100644 >> --- a/ffmpeg.c >> +++ b/ffmpeg.c >> @@ -43,6 +43,7 @@ >> #include "libavdevice/avdevice.h" >> #include "libswresample/swresample.h" >> #include "libavutil/opt.h" >> +#include "libavutil/qsort.h" >> #include "libavutil/channel_layout.h" >> #include "libavutil/parseutils.h" >> #include "libavutil/samplefmt.h" >> @@ -2705,7 +2706,7 @@ static void parse_forced_key_frames(char *kf, >> OutputStream *ost, >> } >> >> av_assert0(index == size); >> - qsort(pts, size, sizeof(*pts), compare_int64); >> + AV_QSORT(pts, size, int64_t, compare_int64); >> ost->forced_kf_count = size; >> ost->forced_kf_pts = pts; >> } >> diff --git a/libavcodec/aacsbr_template.c b/libavcodec/aacsbr_template.c >> index d31b71e..9561ba0 100644 >> --- a/libavcodec/aacsbr_template.c >> +++ b/libavcodec/aacsbr_template.c >> @@ -32,6 +32,8 @@ >> * @author Zoran Basaric ( zoran.basa...@imgtec.com ) >> */ >> >> +#include "libavutil/qsort.h" >> + >> av_cold void AAC_RENAME(ff_aac_sbr_init)(void) >> { >> static const struct { >> @@ -138,8 +140,8 @@ static void sbr_make_f_tablelim(SpectralBandReplication >> *sbr) >> memcpy(sbr->f_tablelim + sbr->n[0] + 1, patch_borders + 1, >> (sbr->num_patches - 1) * sizeof(patch_borders[0])); >> >> - qsort(sbr->f_tablelim, sbr->num_patches + sbr->n[0], >> - sizeof(sbr->f_tablelim[0]), >> + AV_QSORT(sbr->f_tablelim, sbr->num_patches + sbr->n[0], >> + uint16_t, >> qsort_comparison_function_int16); >> >> sbr->n_lim = sbr->n[0] + sbr->num_patches - 1; >> @@ -296,7 +298,7 @@ static int sbr_make_f_master(AACContext *ac, >> SpectralBandReplication *sbr, >> if (spectrum->bs_stop_freq < 14) { >> sbr->k[2] = stop_min; >> make_bands(stop_dk, stop_min, 64, 13); >> - qsort(stop_dk, 13, sizeof(stop_dk[0]), >> qsort_comparison_function_int16); >> + AV_QSORT(stop_dk, 13, int16_t, qsort_comparison_function_int16); >> for (k = 0; k < spectrum->bs_stop_freq; k++) >> sbr->k[2] += stop_dk[k]; >> } else if (spectrum->bs_stop_freq == 14) { >> @@ -389,7 +391,7 @@ static int sbr_make_f_master(AACContext *ac, >> SpectralBandReplication *sbr, >> >> make_bands(vk0+1, sbr->k[0], sbr->k[1], num_bands_0); >> >> - qsort(vk0 + 1, num_bands_0, sizeof(vk0[1]), >> qsort_comparison_function_int16); >> + AV_QSORT(vk0 + 1, num_bands_0, int16_t, >> qsort_comparison_function_int16); >> vdk0_max = vk0[num_bands_0]; >> >> vk0[0] = sbr->k[0]; >> @@ -430,13 +432,13 @@ static int sbr_make_f_master(AACContext *ac, >> SpectralBandReplication *sbr, >> >> if (vdk1_min < vdk0_max) { >> int change; >> - qsort(vk1 + 1, num_bands_1, sizeof(vk1[1]), >> qsort_comparison_function_int16); >> + AV_QSORT(vk1 + 1, num_bands_1, int16_t, >> qsort_comparison_function_int16); >> change = FFMIN(vdk0_max - vk1[1], (vk1[num_bands_1] - >> vk1[1]) >> 1); >> vk1[1] += change; >> vk1[num_bands_1] -= change; >> } >> >> - qsort(vk1 + 1, num_bands_1, sizeof(vk1[1]), >> qsort_comparison_function_int16); >> + AV_QSORT(vk1 + 1, num_bands_1, int16_t, >> qsort_comparison_function_int16); >> >> vk1[0] = sbr->k[1]; >> for (k = 1; k <= num_bands_1; k++) { >> diff --git a/libavcodec/huffman.c b/libavcodec/huffman.c >> index c771bcf..d7403b8 100644 >> --- a/libavcodec/huffman.c >> +++ b/libavcodec/huffman.c >> @@ -26,6 +26,7 @@ >> >> #include <stdint.h> >> >> +#include "libavutil/qsort.h" >> #include "avcodec.h" >> #include "get_bits.h" >> #include "huffman.h" >> @@ -170,7 +171,7 @@ int ff_huff_build_tree(AVCodecContext *avctx, VLC *vlc, >> int nb_codes, int nb_bit >> "Tree construction is not possible\n"); >> return -1; >> } >> - qsort(nodes, nb_codes, sizeof(Node), cmp); >> + AV_QSORT(nodes, nb_codes, Node, cmp); >> cur_node = nb_codes; >> nodes[nb_codes*2-1].count = 0; >> for (i = 0; i < nb_codes * 2 - 1; i += 2) { >> diff --git a/libavcodec/motion_est_template.c >> b/libavcodec/motion_est_template.c >> index 37ff110..327a24b 100644 >> --- a/libavcodec/motion_est_template.c >> +++ b/libavcodec/motion_est_template.c >> @@ -24,6 +24,7 @@ >> * Motion estimation template. >> */ >> >> +#include "libavutil/qsort.h" >> #include "mpegvideo.h" >> >> //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it >> ...) >> @@ -723,7 +724,7 @@ static int sab_diamond_search(MpegEncContext * s, int >> *best, int dmin, >> j++; >> } >> >> - qsort(minima, j, sizeof(Minima), minima_cmp); >> + AV_QSORT(minima, j, Minima, minima_cmp); >> >> for(; j<minima_count; j++){ >> minima[j].height=256*256*256*64; >> diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c >> index 760d9e5..5efc008 100644 >> --- a/libavcodec/utvideodec.c >> +++ b/libavcodec/utvideodec.c >> @@ -25,9 +25,9 @@ >> */ >> >> #include <inttypes.h> >> -#include <stdlib.h> >> >> #include "libavutil/intreadwrite.h" >> +#include "libavutil/qsort.h" >> #include "avcodec.h" >> #include "bswapdsp.h" >> #include "bytestream.h" >> @@ -50,7 +50,7 @@ static int build_huff(const uint8_t *src, VLC *vlc, int >> *fsym) >> he[i].sym = i; >> he[i].len = *src++; >> } >> - qsort(he, 256, sizeof(*he), ff_ut_huff_cmp_len); >> + AV_QSORT(he, 256, HuffEntry, ff_ut_huff_cmp_len); >> >> if (!he[0].len) { >> *fsym = he[0].sym; >> diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c >> index b8e1cc3..dc7d019 100644 >> --- a/libavcodec/utvideoenc.c >> +++ b/libavcodec/utvideoenc.c >> @@ -26,6 +26,7 @@ >> >> #include "libavutil/imgutils.h" >> #include "libavutil/intreadwrite.h" >> +#include "libavutil/qsort.h" >> #include "avcodec.h" >> #include "internal.h" >> #include "bswapdsp.h" >> @@ -331,7 +332,7 @@ static void calculate_codes(HuffEntry *he) >> int last, i; >> uint32_t code; >> >> - qsort(he, 256, sizeof(*he), ff_ut_huff_cmp_len); >> + AV_QSORT(he, 256, HuffEntry, ff_ut_huff_cmp_len); >> >> last = 255; >> while (he[last].len == 255 && last) >> @@ -343,7 +344,7 @@ static void calculate_codes(HuffEntry *he) >> code += 0x80000000u >> (he[i].len - 1); >> } >> >> - qsort(he, 256, sizeof(*he), huff_cmp_sym); >> + AV_QSORT(he, 256, HuffEntry, huff_cmp_sym); >> } >> >> /* Write huffman bit codes to a memory block */ >> diff --git a/libavfilter/f_sendcmd.c b/libavfilter/f_sendcmd.c >> index 37aedc5..a480684 100644 >> --- a/libavfilter/f_sendcmd.c >> +++ b/libavfilter/f_sendcmd.c >> @@ -28,6 +28,7 @@ >> #include "libavutil/file.h" >> #include "libavutil/opt.h" >> #include "libavutil/parseutils.h" >> +#include "libavutil/qsort.h" >> #include "avfilter.h" >> #include "internal.h" >> #include "avfiltergraph.h" >> @@ -411,7 +412,7 @@ static av_cold int init(AVFilterContext *ctx) >> return AVERROR(EINVAL); >> } >> >> - qsort(s->intervals, s->nb_intervals, sizeof(Interval), cmp_intervals); >> + AV_QSORT(s->intervals, s->nb_intervals, Interval, cmp_intervals); >> >> av_log(ctx, AV_LOG_DEBUG, "Parsed commands:\n"); >> for (i = 0; i < s->nb_intervals; i++) { >> diff --git a/libavfilter/vf_deshake.c b/libavfilter/vf_deshake.c >> index ac13ecd..4252631 100644 >> --- a/libavfilter/vf_deshake.c >> +++ b/libavfilter/vf_deshake.c >> @@ -57,6 +57,7 @@ >> #include "libavutil/mem.h" >> #include "libavutil/opt.h" >> #include "libavutil/pixdesc.h" >> +#include "libavutil/qsort.h" >> >> #include "deshake.h" >> #include "deshake_opencl.h" >> @@ -105,7 +106,7 @@ static double clean_mean(double *values, int count) >> int cut = count / 5; >> int x; >> >> - qsort(values, count, sizeof(double), (void*)cmp); >> + AV_QSORT(values, count, double, cmp); >> >> for (x = cut; x < count - cut; x++) { >> mean += values[x]; >> diff --git a/libavfilter/vf_palettegen.c b/libavfilter/vf_palettegen.c >> index df57c10..243df2d 100644 >> --- a/libavfilter/vf_palettegen.c >> +++ b/libavfilter/vf_palettegen.c >> @@ -382,7 +382,7 @@ static AVFrame *get_palette_frame(AVFilterContext *ctx) >> av_log(ctx, AV_LOG_INFO, "%d%s colors generated out of %d colors; >> ratio=%f\n", >> s->nb_boxes, s->reserve_transparent ? "(+1)" : "", s->nb_refs, >> ratio); >> >> - qsort(s->boxes, s->nb_boxes, sizeof(*s->boxes), cmp_color); >> + AV_QSORT(s->boxes, s->nb_boxes, struct range_box, cmp_color); >> >> write_palette(ctx, out); >> >> diff --git a/libavfilter/vf_paletteuse.c b/libavfilter/vf_paletteuse.c >> index 1225a66..890bb46 100644 >> --- a/libavfilter/vf_paletteuse.c >> +++ b/libavfilter/vf_paletteuse.c >> @@ -704,7 +704,7 @@ static void load_colormap(PaletteUseContext *s) >> struct color_rect box; >> >> /* disable transparent colors and dups */ >> - qsort(s->palette, AVPALETTE_COUNT, sizeof(*s->palette), cmp_pal_entry); >> + AV_QSORT(s->palette, AVPALETTE_COUNT, uint32_t, cmp_pal_entry); >> for (i = 0; i < AVPALETTE_COUNT; i++) { >> const uint32_t c = s->palette[i]; >> if (i != 0 && c == last_color) { >> diff --git a/libavfilter/vf_removegrain.c b/libavfilter/vf_removegrain.c >> index da17f6a..3a28b15 100644 >> --- a/libavfilter/vf_removegrain.c >> +++ b/libavfilter/vf_removegrain.c >> @@ -24,6 +24,7 @@ >> #include "libavutil/imgutils.h" >> #include "libavutil/opt.h" >> #include "libavutil/pixdesc.h" >> +#include "libavutil/qsort.h" >> #include "avfilter.h" >> #include "formats.h" >> #include "internal.h" >> @@ -92,7 +93,7 @@ static int mode02(int c, int a1, int a2, int a3, int a4, >> int a5, int a6, int a7, >> { >> int a[8] = { a1, a2, a3, a4, a5, a6, a7, a8 }; >> >> - qsort(&a, 8, sizeof(a[0]), cmp_int); >> + AV_QSORT(a, 8, int, cmp_int); >> >> return av_clip(c, a[2 - 1 ], a[7 - 1]); >> } >> @@ -101,7 +102,7 @@ static int mode03(int c, int a1, int a2, int a3, int a4, >> int a5, int a6, int a7, >> { >> int a[8] = { a1, a2, a3, a4, a5, a6, a7, a8 }; >> >> - qsort(&a, 8, sizeof(a[0]), cmp_int); >> + AV_QSORT(a, 8, int, cmp_int); >> >> return av_clip(c, a[3 - 1 ], a[6 - 1]); >> } >> @@ -110,7 +111,7 @@ static int mode04(int c, int a1, int a2, int a3, int a4, >> int a5, int a6, int a7, >> { >> int a[8] = { a1, a2, a3, a4, a5, a6, a7, a8 }; >> >> - qsort(&a, 8, sizeof(a[0]), cmp_int); >> + AV_QSORT(a, 8, int, cmp_int); >> >> return av_clip(c, a[4 - 1 ], a[5 - 1]); >> } >> diff --git a/libavformat/subtitles.c b/libavformat/subtitles.c >> index bb89766..16ab245 100644 >> --- a/libavformat/subtitles.c >> +++ b/libavformat/subtitles.c >> @@ -23,6 +23,7 @@ >> #include "avio_internal.h" >> #include "libavutil/avassert.h" >> #include "libavutil/avstring.h" >> +#include "libavutil/qsort.h" >> >> void ff_text_init_avio(void *s, FFTextReader *r, AVIOContext *pb) >> { >> @@ -197,9 +198,12 @@ void ff_subtitles_queue_finalize(void *log_ctx, >> FFDemuxSubtitlesQueue *q) >> { >> int i; >> >> - qsort(q->subs, q->nb_subs, sizeof(*q->subs), >> - q->sort == SUB_SORT_TS_POS ? cmp_pkt_sub_ts_pos >> - : cmp_pkt_sub_pos_ts); >> + if (q->sort == SUB_SORT_TS_POS) { >> + AV_QSORT(q->subs, q->nb_subs, AVPacket, cmp_pkt_sub_ts_pos); >> + } >> + else >> + AV_QSORT(q->subs, q->nb_subs, AVPacket, cmp_pkt_sub_pos_ts); >> + >> for (i = 0; i < q->nb_subs; i++) >> if (q->subs[i].duration == -1 && i < q->nb_subs - 1) >> q->subs[i].duration = q->subs[i + 1].pts - q->subs[i].pts; >> diff --git a/libswresample/swresample-test.c >> b/libswresample/swresample-test.c >> index 9caa750..351ec24 100644 >> --- a/libswresample/swresample-test.c >> +++ b/libswresample/swresample-test.c >> @@ -23,6 +23,7 @@ >> #include "libavutil/channel_layout.h" >> #include "libavutil/common.h" >> #include "libavutil/opt.h" >> +#include "libavutil/qsort.h" >> #include "swresample.h" >> >> #undef time >> @@ -271,7 +272,7 @@ int main(int argc, char **argv){ >> r = (seed * (uint64_t)(max_tests - test)) >>32; >> FFSWAP(int, remaining_tests[r], remaining_tests[max_tests - test - >> 1]); >> } >> - qsort(remaining_tests + max_tests - num_tests, num_tests, >> sizeof(remaining_tests[0]), (void*)cmp); >> + AV_QSORT(remaining_tests + max_tests - num_tests, num_tests, int, cmp); >> in_sample_rate=16000; >> for(test=0; test<num_tests; test++){ >> char in_layout_string[256]; >> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c >> index bba2dbc..7edaa31 100644 >> --- a/tests/checkasm/checkasm.c >> +++ b/tests/checkasm/checkasm.c >> @@ -22,12 +22,12 @@ >> >> #include <stdarg.h> >> #include <stdio.h> >> -#include <stdlib.h> >> #include <string.h> >> #include "checkasm.h" >> #include "libavutil/common.h" >> #include "libavutil/cpu.h" >> #include "libavutil/random_seed.h" >> +#include "libavutil/qsort.h" >> >> #if HAVE_IO_H >> #include <io.h> >> @@ -262,7 +262,7 @@ static int measure_nop_time(void) >> nops[i] = AV_READ_TIME() - t; >> } >> >> - qsort(nops, 10000, sizeof(uint16_t), cmp_nop); >> + AV_QSORT(nops, 10000, uint16_t, cmp_nop); >> for (i = 2500; i < 7500; i++) >> nop_sum += nops[i]; >> >> -- >> 2.6.1 >> >> _______________________________________________ >> ffmpeg-devel mailing list >> ffmpeg-devel@ffmpeg.org >> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel