> On Jun 8, 2015, at 8:40 AM, sor...@apache.org wrote: > > Repository: trafficserver > Updated Branches: > refs/heads/master 1a0832b3b -> bba557870 > > > TS-3122: Add support for hugepages on Linux > > > Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo > Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/bba55787 > Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/bba55787 > Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/bba55787 > > Branch: refs/heads/master > Commit: bba557870c05222d302a05ec948871cdde8bf63b > Parents: 1a0832b > Author: Phil Sorber <sor...@apache.org> > Authored: Thu Oct 16 19:58:08 2014 -0600 > Committer: Phil Sorber <sor...@apache.org> > Committed: Mon Jun 8 09:28:56 2015 -0600 > > ---------------------------------------------------------------------- > .../configuration/records.config.en.rst | 12 ++ > iocore/cache/Cache.cc | 10 +- > iocore/cache/CacheDir.cc | 53 +++++-- > iocore/cache/P_CacheDir.h | 4 +- > lib/ts/Makefile.am | 2 + > lib/ts/hugepages.cc | 144 +++++++++++++++++++ > lib/ts/hugepages.h | 32 +++++ > lib/ts/ink_queue.cc | 21 ++- > lib/ts/libts.h | 1 + > mgmt/RecordsConfig.cc | 2 + > proxy/Main.cc | 7 + > 11 files changed, 267 insertions(+), 21 deletions(-) > ---------------------------------------------------------------------- > > > http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/doc/reference/configuration/records.config.en.rst > ---------------------------------------------------------------------- > diff --git a/doc/reference/configuration/records.config.en.rst > b/doc/reference/configuration/records.config.en.rst > index 694d338..ccced34 100644 > --- a/doc/reference/configuration/records.config.en.rst > +++ b/doc/reference/configuration/records.config.en.rst > @@ -2849,6 +2849,18 @@ Sockets > Sets the minimum number of items a ProxyAllocator (per-thread) will > guarantee to be > holding at any one time. > > +.. ts:cv:: CONFIG proxy.config.allocator.hugepages INT 0 > + > + Enable (1) the use of huge pages on supported platforms. (Currently only > Linux) > + > + You must also enable hugepages at the OS level. In a modern linux Kernel > + this can be done by setting ``/proc/sys/vm/nr_overcommit_hugepages`` to a > + sufficiently large value. It is reasonable to use (system > + memory/hugepage size) because these pages are only created on demand. > + > + For more information on the implications of enabling huge pages, see > + `Wikipedia > <http://en.wikipedia.org/wiki/Page_%28computer_memory%29#Page_size_trade-off>_`. > + > .. ts:cv:: CONFIG proxy.config.http.enabled INT 1
I think it would be very helpful to hear about your experiences with this feature ... how about starting a thread on dev@? > > Turn on or off support for HTTP proxying. This is rarely used, the one > > http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/Cache.cc > ---------------------------------------------------------------------- > diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc > index becf713..370c516 100644 > --- a/iocore/cache/Cache.cc > +++ b/iocore/cache/Cache.cc > @@ -38,6 +38,8 @@ > #include "P_CacheBC.h" > #endif > > +#include "hugepages.h" > + > // Compilation Options > #define USELESS_REENABLES // allow them for now > // #define VERIFY_JTEST_DATA > @@ -1495,7 +1497,13 @@ Vol::init(char *s, off_t blocks, off_t dir_skip, bool > clear) > > Debug("cache_init", "allocating %zu directory bytes for a %lld byte volume > (%lf%%)", vol_dirlen(this), (long long)this->len, > (double)vol_dirlen(this) / (double)this->len * 100.0); > - raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this)); > + > + raw_dir = NULL; > + if (ats_hugepage_enabled()) > + raw_dir = (char *)ats_alloc_hugepage(vol_dirlen(this)); > + if (raw_dir == NULL) > + raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this)); > + > dir = (Dir *)(raw_dir + vol_headerlen(this)); > header = (VolHeaderFooter *)raw_dir; > footer = (VolHeaderFooter *)(raw_dir + vol_dirlen(this) - > ROUND_TO_STORE_BLOCK(sizeof(VolHeaderFooter))); > > http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/CacheDir.cc > ---------------------------------------------------------------------- > diff --git a/iocore/cache/CacheDir.cc b/iocore/cache/CacheDir.cc > index 3a7b9c4..e0f20d3 100644 > --- a/iocore/cache/CacheDir.cc > +++ b/iocore/cache/CacheDir.cc > @@ -24,6 +24,8 @@ > > #include "P_Cache.h" > > +#include "hugepages.h" > + > // #define LOOP_CHECK_MODE 1 > #ifdef LOOP_CHECK_MODE > #define DIR_LOOP_THRESHOLD 1000 > @@ -1011,6 +1013,7 @@ sync_cache_dir_on_shutdown(void) > Debug("cache_dir_sync", "sync started"); > char *buf = NULL; > size_t buflen = 0; > + bool buf_huge = false; > > EThread *t = (EThread *)0xdeadbeef; > for (int i = 0; i < gnvol; i++) { > @@ -1077,10 +1080,21 @@ sync_cache_dir_on_shutdown(void) > #endif > > if (buflen < dirlen) { > - if (buf) > - ats_memalign_free(buf); > - buf = (char *)ats_memalign(ats_pagesize(), dirlen); > + if (buf) { > + if (buf_huge) > + ats_free_hugepage(buf, buflen); > + else > + ats_memalign_free(buf); > + } > buflen = dirlen; > + if (ats_hugepage_enabled()) { > + buf = (char *)ats_alloc_hugepage(buflen); > + buf_huge = true; > + } > + if (buf == NULL) { > + buf = (char *)ats_memalign(ats_pagesize(), buflen); > + buf_huge = false; > + } > } > > if (!d->dir_sync_in_progress) { > @@ -1104,8 +1118,15 @@ sync_cache_dir_on_shutdown(void) > Debug("cache_dir_sync", "done syncing dir for vol %s", > d->hash_text.get()); > } > Debug("cache_dir_sync", "sync done"); > - if (buf) > - ats_memalign_free(buf); > + if (buf) { > + if (buf_huge) > + ats_free_hugepage(buf, buflen); > + else > + ats_memalign_free(buf); > + buflen = 0; > + buf = NULL; > + buf_huge = false; > + } > } > > > @@ -1120,11 +1141,6 @@ CacheSync::mainEvent(int event, Event *e) > Lrestart: > if (vol_idx >= gnvol) { > vol_idx = 0; > - if (buf) { > - ats_memalign_free(buf); > - buf = 0; > - buflen = 0; > - } > Debug("cache_dir_sync", "sync done"); > if (event == EVENT_INTERVAL) > trigger = e->ethread->schedule_in(this, > HRTIME_SECONDS(cache_config_dir_sync_frequency)); > @@ -1196,10 +1212,21 @@ Lrestart: > Debug("cache_dir_sync", "pos: %" PRIu64 " Dir %s dirty...syncing to > disk", vol->header->write_pos, vol->hash_text.get()); > vol->header->dirty = 0; > if (buflen < dirlen) { > - if (buf) > - ats_memalign_free(buf); > - buf = (char *)ats_memalign(ats_pagesize(), dirlen); > + if (buf) { > + if (buf_huge) > + ats_free_hugepage(buf, buflen); > + else > + ats_memalign_free(buf); > + } > buflen = dirlen; > + if (ats_hugepage_enabled()) { > + buf = (char *)ats_alloc_hugepage(buflen); > + buf_huge = true; > + } > + if (buf == NULL) { > + buf = (char *)ats_memalign(ats_pagesize(), buflen); > + buf_huge = false; > + } > } > vol->header->sync_serial++; > vol->footer->sync_serial = vol->header->sync_serial; > > http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/P_CacheDir.h > ---------------------------------------------------------------------- > diff --git a/iocore/cache/P_CacheDir.h b/iocore/cache/P_CacheDir.h > index 268ecfb..881d6be 100644 > --- a/iocore/cache/P_CacheDir.h > +++ b/iocore/cache/P_CacheDir.h > @@ -295,6 +295,7 @@ struct CacheSync : public Continuation { > int vol_idx; > char *buf; > size_t buflen; > + bool buf_huge; > off_t writepos; > AIOCallbackInternal io; > Event *trigger; > @@ -302,7 +303,8 @@ struct CacheSync : public Continuation { > int mainEvent(int event, Event *e); > void aio_write(int fd, char *b, int n, off_t o); > > - CacheSync() : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), > buflen(0), writepos(0), trigger(0), start_time(0) > + CacheSync() > + : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), > buf_huge(false), writepos(0), trigger(0), start_time(0) > { > SET_HANDLER(&CacheSync::mainEvent); > } > > http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/Makefile.am > ---------------------------------------------------------------------- > diff --git a/lib/ts/Makefile.am b/lib/ts/Makefile.am > index 7e04222..18c18cf 100644 > --- a/lib/ts/Makefile.am > +++ b/lib/ts/Makefile.am > @@ -107,6 +107,8 @@ libtsutil_la_SOURCES = \ > defalloc.h \ > fastlz.c \ > fastlz.h \ > + hugepages.cc \ > + hugepages.h \ > ink_aiocb.h \ > ink_align.h \ > ink_apidefs.h \ > > http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.cc > ---------------------------------------------------------------------- > diff --git a/lib/ts/hugepages.cc b/lib/ts/hugepages.cc > new file mode 100644 > index 0000000..216bf0b > --- /dev/null > +++ b/lib/ts/hugepages.cc > @@ -0,0 +1,144 @@ > +/** @file > + > + @section license License > + > + Licensed to the Apache Software Foundation (ASF) under one > + or more contributor license agreements. See the NOTICE file > + distributed with this work for additional information > + regarding copyright ownership. The ASF licenses this file > + to you under the Apache License, Version 2.0 (the > + "License"); you may not use this file except in compliance > + with the License. You may obtain a copy of the License at > + > + http://www.apache.org/licenses/LICENSE-2.0 > + > + Unless required by applicable law or agreed to in writing, software > + distributed under the License is distributed on an "AS IS" BASIS, > + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + See the License for the specific language governing permissions and > + limitations under the License. > + */ > + > +#include <cstdio> > +#include <sys/mman.h> > +#include "Diags.h" > +#include "ink_align.h" > + > +#define DEBUG_TAG "hugepages" > +#define MEMINFO_PATH "/proc/meminfo" > +#define LINE_SIZE 256 > +#define TOKEN "Hugepagesize:" > +#define TOKEN_SIZE (strlen(TOKEN)) > + > +static int hugepage_size = -1; > +static bool hugepage_enabled; > + > +size_t > +ats_hugepage_size(void) > +{ > +#ifdef MAP_HUGETLB > + return hugepage_size; > +#else > + Debug(DEBUG_TAG, "MAP_HUGETLB not defined"); > + return 0; > +#endif > +} > + > +bool > +ats_hugepage_enabled(void) > +{ > +#ifdef MAP_HUGETLB > + return hugepage_enabled; > +#else > + return false; > +#endif > +} > + > +void > +ats_hugepage_init(int enabled) > +{ > +#ifdef MAP_HUGETLB > + FILE *fp; > + char line[LINE_SIZE]; > + char *p, *ep; > + > + hugepage_size = 0; > + > + if (!enabled) { > + Debug(DEBUG_TAG, "hugepages not enabled"); > + return; > + } > + > + fp = fopen(MEMINFO_PATH, "r"); > + > + if (fp == NULL) { > + Debug(DEBUG_TAG, "Cannot open file %s", MEMINFO_PATH); > + return; > + } > + > + while (fgets(line, sizeof(line), fp)) { > + if (strncmp(line, TOKEN, TOKEN_SIZE) == 0) { > + p = line + TOKEN_SIZE; > + while (*p == ' ') { > + p++; > + } > + hugepage_size = strtol(p, &ep, 10); > + // What other values can this be? > + if (strncmp(ep, " kB", 4)) { > + hugepage_size *= 1024; > + } > + break; > + } > + } > + > + fclose(fp); > + > + if (hugepage_size) { > + hugepage_enabled = true; > + } > + > + Debug(DEBUG_TAG, "Hugepage size = %d", hugepage_size); > +#else > + Debug(DEBUG_TAG, "MAP_HUGETLB not defined"); > +#endif > +} > + > +void * > +ats_alloc_hugepage(size_t s) > +{ > +#ifdef MAP_HUGETLB > + size_t size; > + void *mem; > + > + size = INK_ALIGN(s, ats_hugepage_size()); > + > + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS > | MAP_HUGETLB, -1, 0); > + > + if (mem == MAP_FAILED) { > + Debug(DEBUG_TAG, "Could not allocate hugepages size = %zu", size); > + return NULL; > + } > + > + return mem; > +#else > + (void)s; > + Debug(DEBUG_TAG, "MAP_HUGETLB not defined"); > + return NULL; > +#endif > +} > + > +bool > +ats_free_hugepage(void *ptr, size_t s) > +{ > +#ifdef MAP_HUGETLB > + size_t size; > + > + size = INK_ALIGN(s, ats_hugepage_size()); > + return (munmap(ptr, size) == 0); > +#else > + (void)ptr; > + (void)s; > + Debug(DEBUG_TAG, "MAP_HUGETLB not defined"); > + return false; > +#endif > +} > > http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.h > ---------------------------------------------------------------------- > diff --git a/lib/ts/hugepages.h b/lib/ts/hugepages.h > new file mode 100644 > index 0000000..812542b > --- /dev/null > +++ b/lib/ts/hugepages.h > @@ -0,0 +1,32 @@ > +/** @file > + > + @section license License > + > + Licensed to the Apache Software Foundation (ASF) under one > + or more contributor license agreements. See the NOTICE file > + distributed with this work for additional information > + regarding copyright ownership. The ASF licenses this file > + to you under the Apache License, Version 2.0 (the > + "License"); you may not use this file except in compliance > + with the License. You may obtain a copy of the License at > + > + http://www.apache.org/licenses/LICENSE-2.0 > + > + Unless required by applicable law or agreed to in writing, software > + distributed under the License is distributed on an "AS IS" BASIS, > + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + See the License for the specific language governing permissions and > + limitations under the License. > + */ > +#ifndef _hugepages_h_ > +#define _hugepages_h_ > + > +#include <cstring> Why use the c* versions of these headers? We don't do that anywhere else and you are not actually using the names from teh std namespace in this patch either ... > + > +size_t ats_hugepage_size(void); > +bool ats_hugepage_enabled(void); > +void ats_hugepage_init(int); > +void *ats_alloc_hugepage(size_t); > +bool ats_free_hugepage(void *, size_t); > + > +#endif > > http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/ink_queue.cc > ---------------------------------------------------------------------- > diff --git a/lib/ts/ink_queue.cc b/lib/ts/ink_queue.cc > index e718b3f..0f14b68 100644 > --- a/lib/ts/ink_queue.cc > +++ b/lib/ts/ink_queue.cc > @@ -50,6 +50,7 @@ > #include "ink_assert.h" > #include "ink_queue_ext.h" > #include "ink_align.h" > +#include "hugepages.h" > > inkcoreapi volatile int64_t fastalloc_mem_in_use = 0; > inkcoreapi volatile int64_t fastalloc_mem_total = 0; > @@ -100,9 +101,13 @@ ink_freelist_init(InkFreeList **fl, const char *name, > uint32_t type_size, uint32 > /* quick test for power of 2 */ > ink_assert(!(alignment & (alignment - 1))); > f->alignment = alignment; > - f->chunk_size = chunk_size; > // Make sure we align *all* the objects in the allocation, not just the > first one > f->type_size = INK_ALIGN(type_size, alignment); > + if (ats_hugepage_enabled()) { > + f->chunk_size = INK_ALIGN(chunk_size * f->type_size, > ats_hugepage_size()) / f->type_size; > + } else { > + f->chunk_size = chunk_size; > + } > SET_FREELIST_POINTER_VERSION(f->head, FROM_PTR(0), 0); > > f->used = 0; > @@ -171,12 +176,16 @@ ink_freelist_new(InkFreeList *f) > #ifdef DEBUG > char *oldsbrk = (char *)sbrk(0), *newsbrk = NULL; > #endif > - if (f->alignment) > - newp = ats_memalign(f->alignment, f->chunk_size * type_size); > - else > - newp = ats_malloc(f->chunk_size * type_size); > + if (ats_hugepage_enabled()) > + newp = ats_alloc_hugepage(f->chunk_size * type_size); > + > + if (newp == NULL) { > + if (f->alignment) > + newp = ats_memalign(f->alignment, f->chunk_size * type_size); > + else > + newp = ats_malloc(f->chunk_size * type_size); > + } > ats_madvise((caddr_t)newp, f->chunk_size * type_size, f->advice); > - > fl_memadd(f->chunk_size * type_size); > #ifdef DEBUG > newsbrk = (char *)sbrk(0); > > http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/libts.h > ---------------------------------------------------------------------- > diff --git a/lib/ts/libts.h b/lib/ts/libts.h > index f136d74..a99e67f 100644 > --- a/lib/ts/libts.h > +++ b/lib/ts/libts.h > @@ -41,6 +41,7 @@ > #define std *** _FIXME_REMOVE_DEPENDENCY_ON_THE_STL_ *** > */ > > +#include "hugepages.h" > #include "ink_config.h" > #include "ink_platform.h" > #include "ink_align.h" > > http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/mgmt/RecordsConfig.cc > ---------------------------------------------------------------------- > diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc > index ebcb8fd..ec3387a 100644 > --- a/mgmt/RecordsConfig.cc > +++ b/mgmt/RecordsConfig.cc > @@ -2079,6 +2079,8 @@ static const RecordElement RecordsConfig[] = > , > {RECT_CONFIG, "proxy.config.allocator.debug_filter", RECD_INT, "0", > RECU_NULL, RR_NULL, RECC_NULL, "[0-3]", RECA_NULL} > , > + {RECT_CONFIG, "proxy.config.allocator.hugepages", RECD_INT, "0", > RECU_NULL, RR_NULL, RECC_NULL, "[0-1]", RECA_NULL} > + , I would expect bad things to happen if you change this setting at runtime, so the RECU value should be RECU_RESTART_TS. > > //############ > //# > > http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/proxy/Main.cc > ---------------------------------------------------------------------- > diff --git a/proxy/Main.cc b/proxy/Main.cc > index 202da33..4684945 100644 > --- a/proxy/Main.cc > +++ b/proxy/Main.cc > @@ -1458,6 +1458,13 @@ main(int /* argc ATS_UNUSED */, const char **argv) > // Restart syslog now that we have configuration info > syslog_log_configure(); > > + // init huge pages > + int enabled; > + REC_ReadConfigInteger(enabled, "proxy.config.allocator.hugepages"); > + ats_hugepage_init(enabled); > + Debug("hugepages", "ats_pagesize reporting %zu", ats_pagesize()); > + Debug("hugepages", "ats_hugepage_size reporting %zu", ats_hugepage_size()); Could you imagine there being a need to set this separately from RAM cache and for allocators? J