On 29-Apr-20 12:50 AM, Dmitry Kozlyuk wrote:
Introduce OS-independent wrappers for memory management operations used
across DPDK and specifically in common code of EAL:
* rte_mem_map()
* rte_mem_unmap()
* rte_get_page_size()
* rte_mem_lock()
Windows uses different APIs for memory mapping and reservation, while
Unices reserve memory by mapping it. Introduce EAL private functions to
support memory reservation in common code:
* eal_mem_reserve()
* eal_mem_free()
* eal_mem_set_dump()
Wrappers follow POSIX semantics limited to DPDK tasks, but their
signatures deliberately differ from POSIX ones to be more safe and
expressive.
Signed-off-by: Dmitry Kozlyuk <dmitry.kozl...@gmail.com>
---
<snip>
RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
@@ -105,24 +94,24 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
return NULL;
}
- mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_NONE,
- mmap_flags, -1, 0);
- if (mapped_addr == MAP_FAILED && allow_shrink)
- *size -= page_sz;
+ mapped_addr = eal_mem_reserve(
+ requested_addr, (size_t)map_sz, reserve_flags);
+ if ((mapped_addr == NULL) && allow_shrink)
+ size -= page_sz;
Should be *size -= page_sz, size is a pointer in this case.
- if (mapped_addr != MAP_FAILED && addr_is_hint &&
- mapped_addr != requested_addr) {
+ if ((mapped_addr != NULL) && addr_is_hint &&
+ (mapped_addr != requested_addr)) {
try++;
next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES) {
/* hint was not used. Try with another offset */
- munmap(mapped_addr, map_sz);
- mapped_addr = MAP_FAILED;
+ eal_mem_free(mapped_addr, *size);
Why change map_sz to *size?
+ mapped_addr = NULL;
requested_addr = next_baseaddr;
}
}
} while ((allow_shrink || addr_is_hint) &&
- mapped_addr == MAP_FAILED && *size > 0);
+ (mapped_addr == NULL) && (*size > 0));
<snip>
@@ -547,10 +531,10 @@ rte_eal_memdevice_init(void)
int
rte_mem_lock_page(const void *virt)
{
- unsigned long virtual = (unsigned long)virt;
- int page_size = getpagesize();
- unsigned long aligned = (virtual & ~(page_size - 1));
- return mlock((void *)aligned, page_size);
+ uintptr_t virtual = (uintptr_t)virt;
+ int page_size = rte_get_page_size();
+ uintptr_t aligned = (virtual & ~(page_size - 1));
Might as well fix to use macros? e.g.
size_t pagesz = rte_get_page_size();
return rte_mem_lock(RTE_PTR_ALIGN(virt, pagesz), pagesz);
(also, note that rte_get_page_size() returns size_t, not int)
+ return rte_mem_lock((void *)aligned, page_size);
}
int
diff --git a/lib/librte_eal/common/eal_private.h
b/lib/librte_eal/common/eal_private.h
index 3aafd892f..67ca83e47 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -11,6 +11,7 @@
<snip>
+ * Reservation size. Must be a multiple of system page size.
+ * @param flags
+ * Reservation options, a combination of eal_mem_reserve_flags.
+ * @returns
+ * Starting address of the reserved area on success, NULL on failure.
+ * Callers must not access this memory until remapping it.
+ */
+void *eal_mem_reserve(void *requested_addr, size_t size, int flags);
Should we also require requested_addr to be page-aligned?
Also, here and in other added API's, nitpick but our coding style guide
(and the code style in this file) suggests that return value should be
on a separate line, e.g.
void *
eal_mem_reserve(...);
+
+/**
+ * Free memory obtained by eal_mem_reserve() or eal_mem_alloc().
+ *
+ * If *virt* and *size* describe a part of the reserved region,
+ * only this part of the region is freed (accurately up to the system
+ * page size). If *virt* points to allocated memory, *size* must match
+ * the one specified on allocation. The behavior is undefined
+ * if the memory pointed by *virt* is obtained from another source
+ * than listed above.
+ *
<snip>
+}
+
+static int
+mem_rte_to_sys_prot(int prot)
+{
+ int sys_prot = 0;
Maybe set it to PROT_NONE to make it more obvious?
+
+ if (prot & RTE_PROT_READ)
+ sys_prot |= PROT_READ;
+ if (prot & RTE_PROT_WRITE)
+ sys_prot |= PROT_WRITE;
+ if (prot & RTE_PROT_EXECUTE)
+ sys_prot |= PROT_EXEC;
+
+ return sys_prot;
+}
+
+void *
+rte_mem_map(void *requested_addr, size_t size, int prot, int flags,
+ int fd, size_t offset)
+{
+ int sys_prot = 0;
Not necessary to initialize sys_prot (and it's counter-productive as
compiler warning about uninitialized usage is a *good thing*!).
+ int sys_flags = 0;
+
+ sys_prot = mem_rte_to_sys_prot(prot);
+
+ if (flags & RTE_MAP_SHARED)
+ sys_flags |= MAP_SHARED;
+ if (flags & RTE_MAP_ANONYMOUS)
+ sys_flags |= MAP_ANONYMOUS;
+ if (flags & RTE_MAP_PRIVATE)
+ sys_flags |= MAP_PRIVATE;
+ if (flags & RTE_MAP_FORCE_ADDRESS)
+ sys_flags |= MAP_FIXED;
+
+ return mem_map(requested_addr, size, sys_prot, sys_flags, fd, offset);
+}
+
+int
+rte_mem_unmap(void *virt, size_t size)
+{
+ return mem_unmap(virt, size);
+}
+
+size_t
+rte_get_page_size(void)
+{
+ return sysconf(_SC_PAGESIZE);
Can we perhaps cache this value?
+}
+
+int
+rte_mem_lock(const void *virt, size_t size)
+{
+ return mlock(virt, size);
This call can fail. It should pass errno as rte_errno as well, just like
all other calls from this family.
Also, if the implementation "may require" page alignment, how about
requiring it unconditionally?
+}
diff --git a/lib/librte_eal/unix/meson.build b/lib/librte_eal/unix/meson.build
index cfa1b4ef9..5734f26ad 100644
--- a/lib/librte_eal/unix/meson.build
+++ b/lib/librte_eal/unix/meson.build
@@ -3,4 +3,5 @@
sources += files(
'eal_unix.c',
+ 'eal_unix_memory.c',
)
--
Thanks,
Anatoly