AVL Tree based set compared to AVDIctionary this has * clone is O(n) instead of O(n²) * copy is O(n*log n) instead of O(n²) * malloc() calls are O(log n) instead of O(n) * get/add/delete is O(log n) * * You can add and remove entries while a iterator stays valid * copy is atomic, a failure means the dst is unchanged * * one cannot randomly change the compare function because the set is ordered by it, for example a set is either case sensitive or not
This may become the base for AVDictionary2 ... maybe Still somewhat work in progress, help more welcome than complaints ;) Signed-off-by: Michael Niedermayer <mich...@niedermayer.cc> --- libavutil/Makefile | 3 + libavutil/set.c | 269 ++++++++++++++++++++++++++++++++++++++ libavutil/set.h | 173 ++++++++++++++++++++++++ libavutil/tests/set.c | 109 +++++++++++++++ libavutil/tree.c | 6 +- libavutil/tree_internal.h | 28 ++++ tests/fate/libavutil.mak | 4 + tests/ref/fate/set | 8 ++ 8 files changed, 595 insertions(+), 5 deletions(-) create mode 100644 libavutil/set.c create mode 100644 libavutil/set.h create mode 100644 libavutil/tests/set.c create mode 100644 libavutil/tree_internal.h create mode 100644 tests/ref/fate/set diff --git a/libavutil/Makefile b/libavutil/Makefile index 9ef118016bb..3ba3b908bc2 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -81,6 +81,7 @@ HEADERS = adler32.h \ replaygain.h \ ripemd.h \ samplefmt.h \ + set.h \ sha.h \ sha512.h \ spherical.h \ @@ -173,6 +174,7 @@ OBJS = adler32.o \ rc4.o \ ripemd.o \ samplefmt.o \ + set.o \ side_data.o \ sha.o \ sha512.o \ @@ -290,6 +292,7 @@ TESTPROGS = adler32 \ random_seed \ rational \ ripemd \ + set \ sha \ sha512 \ side_data_array \ diff --git a/libavutil/set.c b/libavutil/set.c new file mode 100644 index 00000000000..f9f7437b3de --- /dev/null +++ b/libavutil/set.c @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2025 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <inttypes.h> +#include <string.h> + +#include "avassert.h" +#include "error.h" +#include "mem.h" +#include "set.h" + +#include "tree_internal.h" // For improved readability with AVTreeNode, do NOT touch AVTreeNode internals + +typedef struct{ + AVTreeNode node; // tree node, NOTE this may not correspond to this set entry due to rotations + AVSetEntry set_entry; + uint8_t value[0]; +} AVSetInternalEntry; + +struct AVSet{ + AVSetCompareFunc cmp; + AVSetCopyFunc copy; + AVSetFreeFunc freef; + int count; + int deleted; + int next; ///< index of entry in root array after all used entries + unsigned array_len; + AVTreeNode *tree_root; + AVSetInternalEntry *root; +}; + +static uint8_t deleted_entry; + +static inline int internal_entry_len(AVSetInternalEntry *I) { + return (I->set_entry.len + sizeof (*I) - 1) / sizeof (*I) + 1; +} + +AVSet *av_set_new(AVSetCompareFunc cmp, AVSetCopyFunc copy, AVSetFreeFunc freef) +{ + AVSet *s = av_mallocz(sizeof(*s)); + if (!s) + return NULL; + + s->cmp = cmp; + s->copy = copy; + s->freef = freef; + + return s; +} + +AVSetEntry *av_set_get(const AVSet *s, const AVSetEntry *prev, const char *value, int (*cmp)(const void *key, const void *b)) +{ + if (cmp) { + if (prev && cmp) { + void *next_node[2] = { NULL, NULL }; + void *prev_value = av_tree_find2(s->tree_root, prev->value, s->cmp, next_node, 2); + av_assert2(prev_value); + if (!next_node[1] || cmp(next_node[1], value)) + return NULL; + + value = next_node[1]; + } else { + void *next_node[4] = { NULL, NULL, NULL, NULL }; + value = av_tree_find2(s->tree_root, value, cmp, next_node, 4); + if (next_node[2]) // If we have a leftmost equal value, use it instead + value = next_node[2]; + } + } else { + //we cannot have multiple matches with the cmp used for building a set as we could not have added these + value = av_tree_find2(s->tree_root, value, s->cmp, NULL, 0); + } + + if (!value) + return NULL; + + return &((AVSetInternalEntry*)(value - offsetof(AVSetInternalEntry, value)))->set_entry; +} + +int av_set_add(AVSet *s, const char *value, int len, int flags) +{ + int advance = 1 + (len + sizeof(*s->root) - 1) / sizeof(*s->root); + AVSetInternalEntry *new_root = av_fast_realloc(s->root, &s->array_len, (s->next + advance) * sizeof(AVSetInternalEntry)); + + av_assert2(value && len); // patch welcome but how would the compare function compare a len=0 element without knowing it is a len 0 element + + if (!new_root) + return AVERROR(ENOMEM); + + if (new_root != s->root) { + if (s->tree_root) { + AVTreeNode *new_tree_root = s->tree_root - &s->root->node + &new_root->node; + av_tree_move(new_tree_root, s->tree_root, new_root, s->root); + s->tree_root = new_tree_root; + } + + for(int i = 0; i<s->next; i++) { + if (new_root[i].set_entry.value != &deleted_entry) + new_root[i].set_entry.value = new_root[i].value; + i += internal_entry_len(new_root + i) - 1; + } + s->root = new_root; + } + + AVSetEntry *entry = &new_root[s->next].set_entry; + memset(&new_root[s->next].node, 0, sizeof(new_root[s->next].node)); + entry->value = new_root[s->next].value; + entry->len = len; + memcpy(entry->value, value, len); + + AVTreeNode *next = &new_root[s->next].node; + void *elem = av_tree_insert(&s->tree_root, entry->value, s->cmp, &next); + if (elem != entry->value && elem) { + av_assert2(next); + return 0; //entry already in the set + } + av_assert2(!next); + av_assert2(s->tree_root); + s->next += advance; + s->count++; + + return 1; +} + +int av_set_del(AVSet *s, const char *value, int len, int flags) +{ + uint8_t *old_value = av_tree_find2(s->tree_root, value, s->cmp, NULL, 0); + if (!old_value) + return 0; + + AVTreeNode *next = NULL; + void *elem = av_tree_insert(&s->tree_root, value, s->cmp, &next); + av_assert2(next); + + AVSetInternalEntry *internal_entry = (AVSetInternalEntry *)((uint8_t*)old_value - offsetof(AVSetInternalEntry, value)); + internal_entry->set_entry.value = &deleted_entry; + + s->count--; + s->deleted++; + + if ((flags & AV_SET_ALLOW_REBUILD) && s->deleted > s->count) { + AVSet *news = av_set_new(s->cmp, s->copy, s->freef); + if(news) { + int ret = av_set_copy(news, s); + if (ret < 0) { + av_set_free(&news); + } else { + if (s->freef) + for (int i=0; i<s->count; i++) + s->freef(&s->root[i].set_entry); + av_freep(&s->root); + memcpy(s, news, sizeof(*s)); + } + } + } + + return 1; +} + +const AVSetEntry *av_set_iterate(const AVSet *s, + const AVSetEntry *prev) +{ + AVSetInternalEntry *I; + if (prev) { + I = (AVSetInternalEntry*)((uint8_t*)prev - offsetof(AVSetInternalEntry, set_entry)); + I += internal_entry_len(I); + } else { + I = s->root; + } + while (I < s->root + s->next && I->set_entry.value == &deleted_entry) + I += internal_entry_len(I); + + if (I == s->root + s->next) + return NULL; + + return &I->set_entry; +} + +int av_set_count(const AVSet *s) +{ + return s->count; +} + +void av_set_free(AVSet **sp) +{ + AVSet *s = *sp; + + for (int i=0; i<s->count; i++) { + if (s->freef) + s->freef(&s->root[i].set_entry); + } + av_freep(&s->root); + s->next = + s->array_len = + s->count = 0; + av_freep(sp); +} + +int av_set_copy(AVSet *dst, const AVSet *src) +{ + const AVSetEntry *t = NULL; + AVSet *bak = av_memdup(dst, sizeof(*dst)); + if (!bak) + return AVERROR(ENOMEM); + bak->root = av_memdup(bak->root, bak->array_len); + + while ((t = av_set_iterate(src, t))) { + int ret = av_set_add(dst, t->value, t->len, 0); + + if (ret < 0) { + av_free(dst->root); + memcpy(dst, bak, sizeof(*dst)); + return ret; + } + } + av_freep(&bak->root); + av_free(bak); + + return 0; +} + +AVSet *av_set_clone(AVSet *s) +{ + AVSet *dst = av_memdup(s, sizeof(AVSet)); + + if (!dst) + return NULL; + + dst->root = av_memdup(s->root, s->array_len); + + if (!dst->root) + goto err; + + if (s->tree_root) { + dst->tree_root = s->tree_root - &s->root->node + &dst->root->node; + av_tree_move(dst->tree_root, s->tree_root, dst->root, s->root); + } + + //TODO We could attempt to compact free space + for(int i = 0; i<s->next; i++) { + if (dst->root[i].set_entry.value != deleted_entry) + dst->root[i].set_entry.value = dst->root[i].value; + i += internal_entry_len(dst->root + i) - 1; + } + + return dst; +err: + if (dst) { + av_freep(&dst->root); + } + av_free(dst); + return NULL; +} diff --git a/libavutil/set.h b/libavutil/set.h new file mode 100644 index 00000000000..193499a4ab1 --- /dev/null +++ b/libavutil/set.h @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2025 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Public set API. + */ + +#ifndef AVUTIL_SET_H +#define AVUTIL_SET_H + +#include <stdint.h> + +#include "tree.h" + +/** + * Compared to AVDictionary + * clone is O(n) instead of O(n²) + * copy is O(n*log n) instead of O(n²) + * malloc() calls are O(log n) instead of O(n) + * get/add/delete is O(log n) + * + * You can add and remove entries while a iterator stays valid + * copy is atomic, a failure means the dst is unchanged + * + * one cannot randomly change the compare function because the set is ordered by it, for example a set is either case sensitive or not + */ + + +enum { + AV_SET_ALLOW_REBUILD = 1 ///< when removing entries rebuild the set to reduce memory consumption, note, this invalidates previously retrieved elements and iterate state. +}; + + +typedef struct AVSetEntry { + int len; + uint8_t *value; +} AVSetEntry; + +typedef struct AVSet AVSet; +typedef void (* AVSetFreeFunc)(AVSetEntry *c); +typedef void (* AVSetCopyFunc)(AVSetEntry *dst, const AVSetEntry *src, size_t len); +typedef int (* AVSetCompareFunc)(const void *key, const void *b); + +/** + * + * @param freef receives a AVSetEntry and should free any resources except the AVSetEntry->value pointer itself + * for flat structs like strings, this is simply NULL + * + */ +AVSet *av_set_new(AVSetCompareFunc compare, AVSetCopyFunc clone, AVSetFreeFunc freef); + +/** + * Add the given entry into a AVSet. + * + * @param s Pointer AVSet struct. + * @param value Entry value to add to *s + * @param valuelen length of value + * + * @return 1 if the entry was added, 0 if it was already in the set + * otherwise an error code <0 + */ +int av_set_add(AVSet *s, const char *value, int valuelen, int flags); + +/** + * Delete the given entry from a AVSet. + * + * @param s Pointer AVSet struct. + * @param value Entry value to delete from *s + * @param valuelen length of value + * @param flags can be set to AV_SET_ALLOW_REBUILD or 0 + * + * @return 1 if the entry was deleted, 0 if it was not found in the set + * otherwise an error code <0 + */ +int av_set_del(AVSet *s, const char *value, int valuelen, int flags); + +/** + * Get a matching set entry. + * + * The returned entry must not be changed, or it will + * cause undefined behavior. + * + * @param prev Set to the previous matching element to find the next. + * If set to NULL the first matching element is returned. + * @param value Matching value + * @param cmp A compatible comparission function. That is, this function can return 0 + * for a consecutive subset of elements where the cmp function used for the set + * did not return 0. + * An example of such a function is one comparing only a prefix of string. + * An anti-example: would be a case insensitive compare when the set was setup + * with case sensitive one. + * + * @return Found entry or NULL in case no matching entry was found in the dictionary + */ + +AVSetEntry *av_set_get(const AVSet *s, const AVSetEntry *prev, const char *value, int (*cmp)(const void *key, const void *b)); + +/** + * Iterate over a set + * + * Iterates through all entries in the set. + * + * @warning If you call any function with AV_SET_ALLOW_REBUILD set, then the iterator is + * invalidated, and must not be used anymore. Otherwise av_set_add() and av_set_del() + * can saftely be called during iteration. + * + * Typical usage: + * @code + * const AVSetEntry *e = NULL; + * while ((e = av_set_iterate(m, e))) { + * // ... + * } + * @endcode + * + * @param s The set to iterate over + * @param prev Pointer to the previous AVSetEntry, NULL initially + * + * @retval AVSetEntry* The next element in the set + * @retval NULL No more elements in the set + */ +const AVSetEntry *av_set_iterate(const AVSet *s, + const AVSetEntry *prev); + +/** + * Get number of entries in set. + * + * @param s set + * @return number of entries in set + */ +int av_set_count(const AVSet *s); + +/** + * Free all the memory allocated for an AVSet struct + * and all values. + */ +void av_set_free(AVSet **s); + +AVSet *av_set_clone(AVSet *s); + +/** + * Copy entries from one AVSet struct into another. + * + * @param dst Pointer to a pointer to a AVSet struct to copy into. If *dst is NULL, + * this function will allocate a struct for you and put it in *dst + * @param src Pointer to the source AVSet struct to copy items from. + * @param flags Flags to use when setting entries in *dst + * + * @see when the initial dst set is empty use av_set_clone() as its faster + * + * @return 0 on success, negative AVERROR code on failure. + */ + +int av_set_copy(AVSet *dst, const AVSet *src); + +#endif /* AVUTIL_SET_H */ diff --git a/libavutil/tests/set.c b/libavutil/tests/set.c new file mode 100644 index 00000000000..fbe68512bc7 --- /dev/null +++ b/libavutil/tests/set.c @@ -0,0 +1,109 @@ +/* + * copyright (c) 2025 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <string.h> +#include <stdio.h> + +#include "libavutil/avassert.h" +#include "libavutil/mem.h" + +#include "libavutil/set.h" + +static void print_set(const AVSet *s) +{ + const AVSetEntry *t = NULL; + while ((t = av_set_iterate(s, t))) + printf("%s %d ", t->value, t->len); + printf("\n"); +} + +int main(void) +{ + AVSet *set = av_set_new(strcmp, NULL, NULL); + + printf("testing empty set\n"); + + AVSetEntry *e = av_set_get(set, NULL, "foo", NULL); + av_assert0(e == NULL); + + e = av_set_get(set, NULL, "foo", strcmp); + av_assert0(e == NULL); + + int ret = av_set_del(set, "foo", 4, 0); + av_assert0(ret == 0); + + print_set(set); + + printf("testing 1-set\n"); + + ret = av_set_add(set, "foo", 4, 0); + av_assert0(ret == 1); + + ret = av_set_add(set, "foo", 4, 0); + av_assert0(ret == 0); + + e = av_set_get(set, NULL, "foo", NULL); + av_assert0(!strcmp(e->value, "foo")); + + e = av_set_get(set, NULL, "foo", strcmp); + av_assert0(!strcmp(e->value, "foo")); + + ret = av_set_del(set, "foo", 4, 0); + av_assert0(ret == 1); + + print_set(set); + + printf("testing n-set\n"); + unsigned r = 5; + for(int i=0; i<1000; i++) { + r = r*123 + 7; + char str[3] = {0}; + str[0] = r; + ret = av_set_add(set, str , 2, 0); + printf("%d", ret); + } + printf("\n"); + + r = 5; + for(int i=0; i<1000; i++) { + r = r*123 + 7; + char str[3] = {0}; + str[0] = r; + e = av_set_get(set, NULL, str, NULL); + av_assert0(!strcmp(e->value, str)); + e = av_set_get(set, NULL, str, strcmp); + av_assert0(!strcmp(e->value, str)); + ret = av_set_add(set, str , 2, 0); + av_assert0(ret == 0); + + str[1]="x"; + + e = av_set_get(set, NULL, str, NULL); + av_assert0(e == NULL); + e = av_set_get(set, NULL, str, strcmp); + av_assert0(e == NULL); + } + print_set(set); + + av_set_free(&set); + av_assert0(!set); + + return 0; +} diff --git a/libavutil/tree.c b/libavutil/tree.c index 4aaaa4f7af3..fbfb87e3561 100644 --- a/libavutil/tree.c +++ b/libavutil/tree.c @@ -24,11 +24,7 @@ #include "mem.h" #include "tree.h" -typedef struct AVTreeNode { - struct AVTreeNode *child[2]; - void *elem; - int state; -} AVTreeNode; +#include "tree_internal.h" const int av_tree_node_size = sizeof(AVTreeNode); diff --git a/libavutil/tree_internal.h b/libavutil/tree_internal.h new file mode 100644 index 00000000000..6207c321a8e --- /dev/null +++ b/libavutil/tree_internal.h @@ -0,0 +1,28 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_TREE_INTERNAL_H +#define AVUTIL_TREE_INTERNAL_H + +typedef struct AVTreeNode { + struct AVTreeNode *child[2]; + void *elem; + int state; +} AVTreeNode; + +#endif /* AVUTIL_TREE_INTERNAL_H */ diff --git a/tests/fate/libavutil.mak b/tests/fate/libavutil.mak index 6bf03b24386..3583492054f 100644 --- a/tests/fate/libavutil.mak +++ b/tests/fate/libavutil.mak @@ -74,6 +74,10 @@ FATE_LIBAVUTIL += fate-dict fate-dict: libavutil/tests/dict$(EXESUF) fate-dict: CMD = run libavutil/tests/dict$(EXESUF) +FATE_LIBAVUTIL += fate-set +fate-set: libavutil/tests/set$(EXESUF) +fate-set: CMD = run libavutil/tests/set$(EXESUF) + FATE_LIBAVUTIL += fate-encryption-info fate-encryption-info: libavutil/tests/encryption_info$(EXESUF) fate-encryption-info: CMD = run libavutil/tests/encryption_info$(EXESUF) diff --git a/tests/ref/fate/set b/tests/ref/fate/set new file mode 100644 index 00000000000..76f9dbbbbed --- /dev/null +++ b/tests/ref/fate/set @@ -0,0 +1,8 @@ +testing empty set + +testing 1-set + +testing n-set +1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +n 2 � 2 " 2 ] 2 � 2 y 2 * 2 5 2 ~ 2 � 2 � 2 � 2 � 2 ) 2 � 2 e 2 � 2 A 2 B 2 � 2 � 2 � 2 J 2 � 2 � 2 � 2 � 2 � 2 � 2 � 2 � 2 � 2 � 2 � 2 b 2 2 � 2 9 2 j 2 � 2 � 2 Q 2 � 2 M 2 2 � 2 � 2 % 2 � 2 2 � 2 } 2 2 � 2 � 2 U 2 � 2 � 2 2 � 2 & 2 I 2 2 � 2 � 2 a 2 � 2 � 2 6 2 � 2 � 2 � 2 � 2 2 2 2 2 F 2 � 2 : 2 � 2 2 � 2 � 2 = 2 V 2 Y 2 � 2 2 2 q 2 R 2 m 2 f 2 2 Z 2 E 2 . 2 ! 2 � 2 � 2 v 2 � 2 � 2 u 2 > 2 � 2 r 2 � 2 � 2 i 2 z 2 � 2 N 2 � 2 2 � 2 � 2 2 + 2 � 2 ^ 2 1 2 � 2 - 2 � 2 � 2 � 2 2 -- 2.49.0
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".