On Sat, Jan 20, 2018 at 6:32 PM, Rostislav Pehlivanov <atomnu...@gmail.com> wrote: > On 20 January 2018 at 11:13, Rostislav Pehlivanov <atomnu...@gmail.com> > wrote: > >> >> >> On 20 January 2018 at 10:22, wm4 <nfx...@googlemail.com> wrote: >> >>> On Sat, 20 Jan 2018 11:29:13 +0700 >>> Muhammad Faiz <mfc...@gmail.com> wrote: >>> >>> > Help avoiding malloc-free cycles when allocating-freeing common >>> > structures. >>> > >>> > Signed-off-by: Muhammad Faiz <mfc...@gmail.com> >>> > --- >>> > libavutil/staticpool.h | 117 ++++++++++++++++++++++++++++++ >>> +++++++++++++++++++ >>> > 1 file changed, 117 insertions(+) >>> > create mode 100644 libavutil/staticpool.h >>> > >>> > diff --git a/libavutil/staticpool.h b/libavutil/staticpool.h >>> > new file mode 100644 >>> > index 0000000000..9c9b2784bc >>> > --- /dev/null >>> > +++ b/libavutil/staticpool.h >>> > @@ -0,0 +1,117 @@ >>> > +/* >>> > + * This file is part of FFmpeg. >>> > + * >>> > + * FFmpeg is free software; you can redistribute it and/or >>> > + * modify it under the terms of the GNU Lesser General Public >>> > + * License as published by the Free Software Foundation; either >>> > + * version 2.1 of the License, or (at your option) any later version. >>> > + * >>> > + * FFmpeg is distributed in the hope that it will be useful, >>> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of >>> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>> > + * Lesser General Public License for more details. >>> > + * >>> > + * You should have received a copy of the GNU Lesser General Public >>> > + * License along with FFmpeg; if not, write to the Free Software >>> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA >>> 02110-1301 USA >>> > + */ >>> > + >>> > +#ifndef AVUTIL_STATICPOOL_H >>> > +#define AVUTIL_STATICPOOL_H >>> > + >>> > +#include <stdatomic.h> >>> > +#include "avassert.h" >>> > +#include "mem.h" >>> > + >>> > +/** >>> > + * FF_STATICPOOL allocate memory without av_malloc if possible >>> > + * @param size must be 2^n between 64 and 4096 >>> > + */ >>> > +#define FF_STATICPOOL_DECLARE(type, size) >>> \ >>> > +typedef struct type##_StaticPoolWrapper { >>> \ >>> > + type buf; >>> \ >>> > + unsigned index; >>> \ >>> > + atomic_uint next; >>> \ >>> > +} type##_StaticPoolWrapper; >>> \ >>> > + >>> \ >>> > +static atomic_uint type##_staticpool_next; >>> \ >>> > +static atomic_uint type##_staticpool_last; >>> \ >>> > +static type##_StaticPoolWrapper type##_staticpool_table[size]; >>> \ >>> > + >>> \ >>> > +static type *type##_staticpool_malloc(void) >>> \ >>> > +{ >>> \ >>> > + unsigned val, index, serial, new_val; >>> \ >>> > + >>> \ >>> > + av_assert0((size) >= 64 && (size) <= 4096 && !((size) & ((size) - >>> 1))); \ >>> > + >>> \ >>> > + /* use serial, avoid spinlock */ >>> \ >>> > + /* acquire, so we don't get stalled table[index].next */ >>> \ >>> > + val = atomic_load_explicit(&type##_staticpool_next, >>> memory_order_acquire); \ >>> > + do { >>> \ >>> > + index = val & ((size) - 1); >>> \ >>> > + serial = val & ~((size) - 1); >>> \ >>> > + new_val = >>> > atomic_load_explicit(&type##_staticpool_table[index].next, >>> memory_order_relaxed) | (serial + (size)); \ >>> > + } while (!atomic_compare_exchange_stro >>> ng_explicit(&type##_staticpool_next, &val, new_val, \ >>> > + >>> memory_order_acquire, memory_order_acquire)); \ >>> > + >>> \ >>> > + index = val & ((size) - 1); >>> \ >>> > + if (index) >>> \ >>> > + return &type##_staticpool_table[index].buf; >>> \ >>> > + >>> \ >>> > + index = atomic_fetch_add_explicit(&type##_staticpool_last, 1, >>> memory_order_relaxed) + 1; \ >>> > + if (index < (size)) { >>> \ >>> > + type##_staticpool_table[index].index = index; >>> \ >>> > + return &type##_staticpool_table[index].buf; >>> \ >>> > + } >>> \ >>> > + >>> \ >>> > + atomic_fetch_add_explicit(&type##_staticpool_last, -1, >>> memory_order_relaxed); \ >>> > + return av_malloc(sizeof(type)); >>> \ >>> > +} >>> \ >>> > + >>> \ >>> > +static inline type *type##_staticpool_mallocz(void) >>> \ >>> > +{ >>> \ >>> > + type *ptr = type##_staticpool_malloc(); >>> \ >>> > + if (ptr) >>> \ >>> > + memset(ptr, 0, sizeof(*ptr)); >>> \ >>> > + return ptr; >>> \ >>> > +} >>> \ >>> > + >>> \ >>> > +static void type##_staticpool_free(type *ptr) >>> \ >>> > +{ >>> \ >>> > + type##_StaticPoolWrapper *entry = (type##_StaticPoolWrapper *) >>> ptr; \ >>> > + unsigned val, serial, index, new_val; >>> \ >>> > + >>> \ >>> > + if ((uintptr_t)ptr <= (uintptr_t)(type##_staticpool_table) || >>> \ >>> > + (uintptr_t)ptr >= (uintptr_t)(type##_staticpool_table + >>> size)) { \ >>> > + av_free(ptr); >>> \ >>> > + return; >>> \ >>> > + } >>> \ >>> > + >>> \ >>> > + if (CONFIG_MEMORY_POISONING) >>> \ >>> > + memset(&entry->buf, FF_MEMORY_POISON, sizeof(entry->buf)); >>> \ >>> > + >>> \ >>> > + val = atomic_load_explicit(&type##_staticpool_next, >>> memory_order_relaxed); \ >>> > + do { >>> \ >>> > + index = val & ((size) - 1); >>> \ >>> > + serial = val & ~((size) - 1); >>> \ >>> > + atomic_store_explicit(&entry->next, index, >>> memory_order_relaxed); \ >>> > + new_val = entry->index | (serial + (size)); >>> \ >>> > + } while (!atomic_compare_exchange_stro >>> ng_explicit(&type##_staticpool_next, &val, new_val, \ >>> > + >>> memory_order_release, memory_order_relaxed)); \ >>> > +} >>> \ >>> > + >>> \ >>> > +static inline void type##_staticpool_freep(type **ptr) >>> \ >>> > +{ >>> \ >>> > + if (ptr) { >>> \ >>> > + type##_staticpool_free(*ptr); >>> \ >>> > + *ptr = NULL; >>> \ >>> > + } >>> \ >>> > +} >>> \ >>> > +/* FF_STATICPOOL_DECLARE */ >>> > + >>> > +#define FF_STATICPOOL_MALLOC(type) type##_staticpool_malloc() >>> > +#define FF_STATICPOOL_MALLOCZ(type) type##_staticpool_mallocz() >>> > +#define FF_STATICPOOL_FREE(type, v) type##_staticpool_free(v) >>> > +#define FF_STATICPOOL_FREEP(type, v) type##_staticpool_freep(v) >>> > + >>> > +#endif >>> >>> I'm against this. Global mutable data is always bad. If you have 2 >>> uses ffmpeg in the same process, their efficiency might get halved >>> because the available pool size is shared by both. The macro mess is >>> very ugly. Can we please not define pages of code as macros. Last but >>> not least this is the job of the memory manager, which already does >>> fancy things like per thread pools. I don't trust the atomics use >>> either, I'm don't want to have to debug that ever. >>> >>> _______________________________________________ >>> ffmpeg-devel mailing list >>> ffmpeg-devel@ffmpeg.org >>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel >>> >> >> >> I agree. >> Also the last GCC version has multithreaded malloc cache already >> implemented so it makes this patch only useful on non-glibc systems (or >> systems which don't implement malloc caching). >> > > Not gcc, sorry, glibc.
Interesting. Can you provide a benchmark for new version of glibc? My glibc is outdated. Thank's. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel