Burakov, Anatoly <anatoly.bura...@intel.com> 于2023年5月22日周一 21:28写道: > > On 5/22/2023 1:41 PM, Fengnan Chang wrote: > > Under legacy mode, if the number of continuous memsegs greater > > than RTE_MAX_MEMSEG_PER_LIST, eal init will failed even though > > another memseg list is empty, because only one memseg list used > > to check in remap_needed_hugepages. > > Fix this by add a argment indicate how many pages mapped in > > remap_segment, remap_segment try to mapped most pages it can, if > > exceed it's capbility, remap_needed_hugepages will continue to > > map other left pages. > > > > For example: > > hugepage configure: > > cat /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages > > 10241 > > 10239 > > > > startup log: > > EAL: Detected memory type: socket_id:0 hugepage_sz:2097152 > > EAL: Detected memory type: socket_id:1 hugepage_sz:2097152 > > EAL: Creating 4 segment lists: n_segs:8192 socket_id:0 hugepage_sz:2097152 > > EAL: Creating 4 segment lists: n_segs:8192 socket_id:1 hugepage_sz:2097152 > > EAL: Requesting 13370 pages of size 2MB from socket 0 > > EAL: Requesting 7110 pages of size 2MB from socket 1 > > EAL: Attempting to map 14220M on socket 1 > > EAL: Allocated 14220M on socket 1 > > EAL: Attempting to map 26740M on socket 0 > > EAL: Could not find space for memseg. Please increase 32768 and/or 65536 in > > configuration. > > EAL: Couldn't remap hugepage files into memseg lists > > EAL: FATAL: Cannot init memory > > EAL: Cannot init memory > > > > Signed-off-by: Fengnan Chang <changfeng...@bytedance.com> > > Signed-off-by: Lin Li <lilint...@bytedance.com> > > --- > > lib/eal/linux/eal_memory.c | 33 +++++++++++++++++++++------------ > > 1 file changed, 21 insertions(+), 12 deletions(-) > > > > diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c > > index 60fc8cc6ca..b2e6453fbe 100644 > > --- a/lib/eal/linux/eal_memory.c > > +++ b/lib/eal/linux/eal_memory.c > > @@ -657,12 +657,12 @@ unmap_unneeded_hugepages(struct hugepage_file > > *hugepg_tbl, > > } > > > > static int > > -remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end) > > +remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end, > > int *mapped_seg_len) > > { > > struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; > > struct rte_memseg_list *msl; > > struct rte_fbarray *arr; > > - int cur_page, seg_len; > > + int cur_page, seg_len, cur_len; > > unsigned int msl_idx; > > int ms_idx; > > uint64_t page_sz; > > @@ -692,8 +692,9 @@ remap_segment(struct hugepage_file *hugepages, int > > seg_start, int seg_end) > > > > /* leave space for a hole if array is not empty */ > > empty = arr->count == 0; > > + cur_len = RTE_MIN((unsigned int)seg_len, arr->len - > > arr->count - (empty ? 0 : 1)); > > ms_idx = rte_fbarray_find_next_n_free(arr, 0, > > - seg_len + (empty ? 0 : 1)); > > + cur_len + (empty ? 0 : 1)); > > > > /* memseg list is full? */ > > if (ms_idx < 0) > > @@ -704,12 +705,12 @@ remap_segment(struct hugepage_file *hugepages, int > > seg_start, int seg_end) > > */ > > if (!empty) > > ms_idx++; > > + *mapped_seg_len = cur_len; > > break; > > } > > if (msl_idx == RTE_MAX_MEMSEG_LISTS) { > > - RTE_LOG(ERR, EAL, "Could not find space for memseg. Please > > increase %s and/or %s in configuration.\n", > > - RTE_STR(RTE_MAX_MEMSEG_PER_TYPE), > > - RTE_STR(RTE_MAX_MEM_MB_PER_TYPE)); > > + RTE_LOG(ERR, EAL, "Could not find space for memseg. Please > > increase RTE_MAX_MEMSEG_PER_LIST " > > + "RTE_MAX_MEMSEG_PER_TYPE and/or > > RTE_MAX_MEM_MB_PER_TYPE in configuration.\n"); > > return -1; > > } > > > > @@ -725,6 +726,8 @@ remap_segment(struct hugepage_file *hugepages, int > > seg_start, int seg_end) > > void *addr; > > int fd; > > > > + if (cur_page - seg_start == *mapped_seg_len) > > + break; > > fd = open(hfile->filepath, O_RDWR); > > if (fd < 0) { > > RTE_LOG(ERR, EAL, "Could not open '%s': %s\n", > > @@ -986,7 +989,7 @@ prealloc_segments(struct hugepage_file *hugepages, int > > n_pages) > > static int > > remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages) > > { > > - int cur_page, seg_start_page, new_memseg, ret; > > + int cur_page, seg_start_page, new_memseg, ret, mapped_seg_len = 0; > > > > seg_start_page = 0; > > for (cur_page = 0; cur_page < n_pages; cur_page++) { > > @@ -1023,21 +1026,27 @@ remap_needed_hugepages(struct hugepage_file > > *hugepages, int n_pages) > > /* if this isn't the first time, remap segment */ > > if (cur_page != 0) { > > ret = remap_segment(hugepages, seg_start_page, > > - cur_page); > > + cur_page, &mapped_seg_len); > > if (ret != 0) > > return -1; > > } > > + cur_page = seg_start_page + mapped_seg_len; > > /* remember where we started */ > > seg_start_page = cur_page; > > + mapped_seg_len = 0; > > } > > /* continuation of previous memseg */ > > } > > /* we were stopped, but we didn't remap the last segment, do it now */ > > if (cur_page != 0) { > > - ret = remap_segment(hugepages, seg_start_page, > > - cur_page); > > - if (ret != 0) > > - return -1; > > + while (seg_start_page < n_pages) { > > + ret = remap_segment(hugepages, seg_start_page, > > + cur_page, &mapped_seg_len); > > + if (ret != 0) > > + return -1; > > + seg_start_page = seg_start_page + mapped_seg_len; > > + mapped_seg_len = 0; > > + } > > } > > return 0; > > } > > This works, but I feel like it's overcomplicated - the same logic you're > trying to use can just be implemented using `find_biggest_free()` + > `find_contig_free()` and returning `seg_len` from `remap_segment()`? > > Something like the following: > > --- > > diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c > index 60fc8cc6ca..08acc787fc 100644 > --- a/lib/eal/linux/eal_memory.c > +++ b/lib/eal/linux/eal_memory.c > @@ -681,6 +681,7 @@ remap_segment(struct hugepage_file *hugepages, int > seg_start, int seg_end) > > /* find free space in memseg lists */ > for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { > + int free_len; > bool empty; > msl = &mcfg->memsegs[msl_idx]; > arr = &msl->memseg_arr; > @@ -692,18 +693,27 @@ remap_segment(struct hugepage_file *hugepages, int > seg_start, int seg_end) > > /* leave space for a hole if array is not empty */ > empty = arr->count == 0; > - ms_idx = rte_fbarray_find_next_n_free(arr, 0, > - seg_len + (empty ? 0 : 1)); > > - /* memseg list is full? */ > - if (ms_idx < 0) > - continue; > + /* find start of the biggest contiguous block and its size */ > + ms_idx = rte_fbarray_find_biggest_free(arr, 0); > + free_len = rte_fbarray_find_contig_free(arr, ms_idx); > > /* leave some space between memsegs, they are not IOVA > * contiguous, so they shouldn't be VA contiguous either. > */ > - if (!empty) > + if (!empty) { > ms_idx++; > + free_len--; > + } > + > + /* memseg list is full? */ > + if (free_len < 1) > + continue; > + > + /* we might not get all of the space we wanted */ > + free_len = RTE_MIN(seg_len, free_len); > + seg_end = seg_start + free_len; > + seg_len = seg_end - seg_start; > break; > } > if (msl_idx == RTE_MAX_MEMSEG_LISTS) { > @@ -787,7 +797,7 @@ remap_segment(struct hugepage_file *hugepages, int > seg_start, int seg_end) > } > RTE_LOG(DEBUG, EAL, "Allocated %" PRIu64 "M on socket %i\n", > (seg_len * page_sz) >> 20, socket_id); > - return 0; > + return seg_len; > } > > static uint64_t > @@ -1022,10 +1032,17 @@ remap_needed_hugepages(struct hugepage_file > *hugepages, int n_pages) > if (new_memseg) { > /* if this isn't the first time, remap segment */ > if (cur_page != 0) { > - ret = remap_segment(hugepages, seg_start_page, > - cur_page); > - if (ret != 0) > - return -1; > + int n_remapped = 0; > + int n_needed = cur_page - seg_start_page; > + > + while (n_remapped < n_needed) { > + ret = remap_segment(hugepages, > + seg_start_page, > + cur_page); > + if (ret < 0) > + return -1; > + n_remapped += ret; > + } > } > /* remember where we started */ > seg_start_page = cur_page; > @@ -1034,10 +1051,16 @@ remap_needed_hugepages(struct hugepage_file > *hugepages, int n_pages) > } > /* we were stopped, but we didn't remap the last segment, do it now */ > if (cur_page != 0) { > - ret = remap_segment(hugepages, seg_start_page, > - cur_page); > - if (ret != 0) > - return -1; > + int n_remapped = 0; > + int n_needed = cur_page - seg_start_page; > + > + while (n_remapped < n_needed) { > + ret = remap_segment( > + hugepages, seg_start_page, cur_page); > + if (ret < 0) > + return -1; > + n_remapped += ret; > + } > } > return 0; > } > > --- > > This should do the trick? Also, this probably needs to be duplicated for > Windows and FreeBSD init as well, because AFAIK they follow the legacy > mem init logic. I take a simple look at FreeBSD and Windows code, FreeBSD don't have this problem, FreeBSD map hugepage is one by one, not mapp multiple at once and windows call eal_dynmem_hugepage_init when have hugepage. So It seems just modify linux is ok.
> > -- > Thanks, > Anatoly >