On 29-Apr-20 12:50 AM, Dmitry Kozlyuk wrote:
Introduce OS-independent wrappers for memory management operations used
across DPDK and specifically in common code of EAL:

* rte_mem_map()
* rte_mem_unmap()
* rte_get_page_size()
* rte_mem_lock()

Windows uses different APIs for memory mapping and reservation, while
Unices reserve memory by mapping it. Introduce EAL private functions to
support memory reservation in common code:

* eal_mem_reserve()
* eal_mem_free()
* eal_mem_set_dump()

Wrappers follow POSIX semantics limited to DPDK tasks, but their
signatures deliberately differ from POSIX ones to be more safe and
expressive.

Signed-off-by: Dmitry Kozlyuk <dmitry.kozl...@gmail.com>
---

<snip>

        RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
@@ -105,24 +94,24 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
                        return NULL;
                }
- mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_NONE,
-                               mmap_flags, -1, 0);
-               if (mapped_addr == MAP_FAILED && allow_shrink)
-                       *size -= page_sz;
+               mapped_addr = eal_mem_reserve(
+                       requested_addr, (size_t)map_sz, reserve_flags);
+               if ((mapped_addr == NULL) && allow_shrink)
+                       size -= page_sz;

Should be *size -= page_sz, size is a pointer in this case.

- if (mapped_addr != MAP_FAILED && addr_is_hint &&
-                   mapped_addr != requested_addr) {
+               if ((mapped_addr != NULL) && addr_is_hint &&
+                               (mapped_addr != requested_addr)) {
                        try++;
                        next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
                        if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES) {
                                /* hint was not used. Try with another offset */
-                               munmap(mapped_addr, map_sz);
-                               mapped_addr = MAP_FAILED;
+                               eal_mem_free(mapped_addr, *size);

Why change map_sz to *size?

+                               mapped_addr = NULL;
                                requested_addr = next_baseaddr;
                        }
                }
        } while ((allow_shrink || addr_is_hint) &&
-                mapped_addr == MAP_FAILED && *size > 0);
+               (mapped_addr == NULL) && (*size > 0));

<snip>

@@ -547,10 +531,10 @@ rte_eal_memdevice_init(void)
  int
  rte_mem_lock_page(const void *virt)
  {
-       unsigned long virtual = (unsigned long)virt;
-       int page_size = getpagesize();
-       unsigned long aligned = (virtual & ~(page_size - 1));
-       return mlock((void *)aligned, page_size);
+       uintptr_t virtual = (uintptr_t)virt;
+       int page_size = rte_get_page_size();
+       uintptr_t aligned = (virtual & ~(page_size - 1));

Might as well fix to use macros? e.g.

size_t pagesz = rte_get_page_size();
return rte_mem_lock(RTE_PTR_ALIGN(virt, pagesz), pagesz);

(also, note that rte_get_page_size() returns size_t, not int)

+       return rte_mem_lock((void *)aligned, page_size);
  }
int
diff --git a/lib/librte_eal/common/eal_private.h 
b/lib/librte_eal/common/eal_private.h
index 3aafd892f..67ca83e47 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -11,6 +11,7 @@

<snip>

+ *  Reservation size. Must be a multiple of system page size.
+ * @param flags
+ *  Reservation options, a combination of eal_mem_reserve_flags.
+ * @returns
+ *  Starting address of the reserved area on success, NULL on failure.
+ *  Callers must not access this memory until remapping it.
+ */
+void *eal_mem_reserve(void *requested_addr, size_t size, int flags);

Should we also require requested_addr to be page-aligned?

Also, here and in other added API's, nitpick but our coding style guide (and the code style in this file) suggests that return value should be on a separate line, e.g.

void *
eal_mem_reserve(...);

+
+/**
+ * Free memory obtained by eal_mem_reserve() or eal_mem_alloc().
+ *
+ * If *virt* and *size* describe a part of the reserved region,
+ * only this part of the region is freed (accurately up to the system
+ * page size). If *virt* points to allocated memory, *size* must match
+ * the one specified on allocation. The behavior is undefined
+ * if the memory pointed by *virt* is obtained from another source
+ * than listed above.
+ *

<snip>

+}
+
+static int
+mem_rte_to_sys_prot(int prot)
+{
+       int sys_prot = 0;

Maybe set it to PROT_NONE to make it more obvious?

+
+       if (prot & RTE_PROT_READ)
+               sys_prot |= PROT_READ;
+       if (prot & RTE_PROT_WRITE)
+               sys_prot |= PROT_WRITE;
+       if (prot & RTE_PROT_EXECUTE)
+               sys_prot |= PROT_EXEC;
+
+       return sys_prot;
+}
+
+void *
+rte_mem_map(void *requested_addr, size_t size, int prot, int flags,
+       int fd, size_t offset)
+{
+       int sys_prot = 0;

Not necessary to initialize sys_prot (and it's counter-productive as compiler warning about uninitialized usage is a *good thing*!).

+       int sys_flags = 0;
+
+       sys_prot = mem_rte_to_sys_prot(prot);
+
+       if (flags & RTE_MAP_SHARED)
+               sys_flags |= MAP_SHARED;
+       if (flags & RTE_MAP_ANONYMOUS)
+               sys_flags |= MAP_ANONYMOUS;
+       if (flags & RTE_MAP_PRIVATE)
+               sys_flags |= MAP_PRIVATE;
+       if (flags & RTE_MAP_FORCE_ADDRESS)
+               sys_flags |= MAP_FIXED;
+
+       return mem_map(requested_addr, size, sys_prot, sys_flags, fd, offset);
+}
+
+int
+rte_mem_unmap(void *virt, size_t size)
+{
+       return mem_unmap(virt, size);
+}
+
+size_t
+rte_get_page_size(void)
+{
+       return sysconf(_SC_PAGESIZE);

Can we perhaps cache this value?

+}
+
+int
+rte_mem_lock(const void *virt, size_t size)
+{
+       return mlock(virt, size);

This call can fail. It should pass errno as rte_errno as well, just like all other calls from this family.

Also, if the implementation "may require" page alignment, how about requiring it unconditionally?

+}
diff --git a/lib/librte_eal/unix/meson.build b/lib/librte_eal/unix/meson.build
index cfa1b4ef9..5734f26ad 100644
--- a/lib/librte_eal/unix/meson.build
+++ b/lib/librte_eal/unix/meson.build
@@ -3,4 +3,5 @@
sources += files(
        'eal_unix.c',
+       'eal_unix_memory.c',
  )



--
Thanks,
Anatoly

Reply via email to