From: Diego Nieto Cid <dnie...@gmail.com> This is the first iteration of the patch. I incorporated the suggestions made by Sergey is his review. I also used the implementation of the vm_set_size_limit based on the host port being the reciever of the RPC (that Sergey sent to the other thread).
There remains a TODO regarding how to get swap size to set the initial limit parameters to the value suggested by Samuel here[1]. [1] https://lists.gnu.org/archive/html/bug-hurd/2024-12/msg00217.html --- * include/mach/gnumach.defs: (vm_set_size_limit) new routine (vm_get_size_limit) likewise * kern/task.c: (task_create_kernel) if parent_task is not null copy virtual memory limit * tests/test-vm.c: (test_vm_limit) add test for the new routines * vm/vm_map.h: (struct vm_map) new fields size_none, size_cur_limit and size_max_limit * vm/vm_map.c: (vm_map_setup) initialize new fields (vm_map_enforce_limit) new function (vm_map_copy_limits) new function (vm_map_find_entry) call limit enforcer function (vm_map_enter) likewise (vm_map_copyout) likewise (vm_map_copyout_page_list) likewise (vm_map_fork) copy parent limit to the new map and compute and set size_none of the new map * vm/vm_user.c: (vm_set_size_limit) new function (vm_get_size_limit) likewise --- include/mach/gnumach.defs | 17 +++++++++ kern/task.c | 5 +++ tests/test-vm.c | 44 ++++++++++++++++++++++ vm/vm_map.c | 79 +++++++++++++++++++++++++++++++++++++++ vm/vm_map.h | 13 +++++++ vm/vm_user.c | 66 ++++++++++++++++++++++++++++++++ 6 files changed, 224 insertions(+) diff --git a/include/mach/gnumach.defs b/include/mach/gnumach.defs index f13e866b..a09256b8 100644 --- a/include/mach/gnumach.defs +++ b/include/mach/gnumach.defs @@ -223,3 +223,20 @@ simpleroutine thread_set_name( routine thread_get_name( thread : thread_t; out name : kernel_debug_name_t); + +/* + * Set a task virtual memory limit parameters + */ +routine vm_set_size_limit( + host_port : mach_port_t; + map : vm_task_t; + current_limit : vm_size_t; + max_limit : vm_size_t); + +/* + * Get a task virtual memory limit parameters + */ +routine vm_get_size_limit( + map : vm_task_t; + out current_limit : vm_size_t; + out max_limit : vm_size_t); diff --git a/kern/task.c b/kern/task.c index bd57ca2a..255f4388 100644 --- a/kern/task.c +++ b/kern/task.c @@ -126,6 +126,11 @@ task_create_kernel( trunc_page(VM_MAX_USER_ADDRESS)); if (new_task->map == VM_MAP_NULL) pmap_destroy(new_pmap); + else { + vm_map_lock(parent_task->map); + vm_map_copy_limits(parent_task->map, new_task->map); + vm_map_unlock(parent_task->map); + } } } if (new_task->map == VM_MAP_NULL) { diff --git a/tests/test-vm.c b/tests/test-vm.c index 4ece792e..74fcc309 100644 --- a/tests/test-vm.c +++ b/tests/test-vm.c @@ -75,11 +75,55 @@ static void test_wire() // TODO check that all memory is actually wired or unwired } +void test_vm_limit() +{ + kern_return_t err; + vm_address_t mem; + const size_t M_128M = 128l * 1024l * 1024l; + const size_t M_512M = 512l * 1024l * 1024l; + vm_size_t cur; + vm_size_t max; + + /* set VM memory limitations */ + err = vm_set_size_limit(mach_host_self(), mach_task_self(), M_128M, M_512M); + ASSERT(err == KERN_SUCCESS, "cannot set VM limits"); + + /* check limits are actually saved */ + err = vm_get_size_limit(mach_task_self(), &cur, &max); + ASSERT(err == KERN_SUCCESS, "getting the VM limit failed"); + ASSERT(cur == M_128M, "cur limit was not expected"); + ASSERT(max == M_512M, "max limit was not expected"); + + /* check we can no longer increase the hard limit */ + err = vm_set_size_limit(mach_host_self(), mach_task_self(), M_128M, M_512M * 2); + ASSERT(err == KERN_INVALID_HOST, "raising VM hard limit shall fail"); + + /* alloc some memory below the limit */ + err = vm_allocate(mach_task_self(), &mem, (128l * 1024l), TRUE); + ASSERT(err == KERN_SUCCESS, "allocating memory below the limit must succeed"); + vm_deallocate(mach_task_self(), mem, (128l * 1024l)); + + /* alloc a bigger chunk to make it hit the limit */ + err = vm_allocate(mach_task_self(), &mem, (M_512M * 2), TRUE); + ASSERT(err == KERN_NO_SPACE, "allocation must fail with KERN_NO_SPACE"); + + /* check that "root" can increase the hard limit */ + err = vm_set_size_limit(host_priv(), mach_task_self(), M_128M, M_512M * 2); + ASSERT(err == KERN_SUCCESS, "privileged tasks shall be allowed to increase the max limit"); + + /* check limits are actually saved */ + err = vm_get_size_limit(mach_task_self(), &cur, &max); + ASSERT(err == KERN_SUCCESS, "getting the VM limit failed"); + ASSERT(cur == M_128M, "cur limit was not expected"); + ASSERT(max == (M_512M * 2), "max limit was not expected"); +} + int main(int argc, char *argv[], int envc, char *envp[]) { printf("VM_MIN_ADDRESS=0x%p\n", VM_MIN_ADDRESS); printf("VM_MAX_ADDRESS=0x%p\n", VM_MAX_ADDRESS); test_wire(); test_memobj(); + test_vm_limit(); return 0; } diff --git a/vm/vm_map.c b/vm/vm_map.c index 03d22ea1..eded31a0 100644 --- a/vm/vm_map.c +++ b/vm/vm_map.c @@ -189,6 +189,7 @@ void vm_map_setup( map->size = 0; map->size_wired = 0; + map->size_none = 0; map->ref_count = 1; map->pmap = pmap; map->min_offset = min; @@ -198,6 +199,9 @@ void vm_map_setup( map->first_free = vm_map_to_entry(map); map->hint = vm_map_to_entry(map); map->name = NULL; + /* TODO add to default limit the swap size */ + map->size_cur_limit = vm_page_mem_size() / 2; + map->size_max_limit = vm_page_mem_size() / 2; vm_map_lock_init(map); simple_lock_init(&map->ref_lock); simple_lock_init(&map->hint_lock); @@ -268,6 +272,49 @@ void vm_map_unlock(struct vm_map *map) lock_write_done(&map->lock); } +/* + * Enforces the VM limit of a target map. + */ +static kern_return_t +vm_map_enforce_limit( + vm_map_t map, + vm_size_t size, + const char *fn_name) +{ + /* Limit is ignored for the kernel map */ + if (vm_map_pmap(map) == kernel_pmap) { + return KERN_SUCCESS; + } + + /* Avoid taking into account the total VM_PROT_NONE virtual memory */ + vm_size_t usable_size = map->size - map->size_none; + vm_size_t new_size = size + usable_size; + /* Check for integer overflow */ + if (new_size < size) { + return KERN_INVALID_ARGUMENT; + } + + if (new_size > map->size_cur_limit) { + task_t task = current_task(); + printf("[%s] [task %s] map size: %lu, none: %lu, requested: %lu, limit: %lu\n", + fn_name, task->name, map->size, map->size_none, size, map->size_cur_limit); + return KERN_NO_SPACE; + } + + return KERN_SUCCESS; +} + +/* + * Copies the limits from source to destination map. + * Called by task_create_kernel with the src_map locked. + */ +void +vm_map_copy_limits(vm_map_t src_map, vm_map_t dst_map) +{ + dst_map->size_cur_limit = src_map->size_cur_limit; + dst_map->size_max_limit = src_map->size_max_limit; +} + /* * vm_map_entry_create: [ internal use only ] * @@ -789,6 +836,10 @@ kern_return_t vm_map_find_entry( vm_map_entry_t entry, new_entry; vm_offset_t start; vm_offset_t end; + kern_return_t err; + + if ((err = vm_map_enforce_limit(map, size, "vm_map_find_entry")) != KERN_SUCCESS) + return err; entry = vm_map_find_entry_anywhere(map, size, mask, TRUE, &start); @@ -1037,6 +1088,16 @@ kern_return_t vm_map_enter( RETURN(KERN_NO_SPACE); } + /* + * If the allocation has protection equal to VM_PROT_NONE, + * don't check for limits as the map's size_none field is + * not yet incremented. + */ + if (max_protection != VM_PROT_NONE) { + if ((result = vm_map_enforce_limit(map, size, "vm_map_enter")) != KERN_SUCCESS) + RETURN(result); + } + /* * At this point, * "start" and "end" should define the endpoints of the @@ -1160,6 +1221,7 @@ kern_return_t vm_map_enter( vm_map_entry_link(map, entry, new_entry); map->size += size; + map->size_none += ((max_protection == VM_PROT_NONE) ? size : 0); /* * Update the free space hint and the lookup hint @@ -2042,6 +2104,7 @@ void vm_map_entry_delete( vm_map_entry_unlink(map, entry); map->size -= size; + map->size_none -= ((entry->max_protection == VM_PROT_NONE) ? size : 0); vm_map_entry_dispose(map, entry); } @@ -2882,6 +2945,11 @@ kern_return_t vm_map_copyout( return KERN_NO_SPACE; } + if ((kr = vm_map_enforce_limit(dst_map, size, "vm_map_copyout")) != KERN_SUCCESS) { + vm_map_unlock(dst_map); + return kr; + } + /* * Adjust the addresses in the copy chain, and * reset the region attributes. @@ -3055,6 +3123,11 @@ kern_return_t vm_map_copyout_page_list( vm_map_lock(dst_map); + if ((result = vm_map_enforce_limit(dst_map, size, "vm_map_copyout_page_lists")) != KERN_SUCCESS) { + vm_map_unlock(dst_map); + return result; + } + last = vm_map_find_entry_anywhere(dst_map, size, 0, TRUE, &start); if (last == NULL) { @@ -4390,6 +4463,7 @@ vm_map_t vm_map_fork(vm_map_t old_map) vm_map_entry_t new_entry; pmap_t new_pmap = pmap_create((vm_size_t) 0); vm_size_t new_size = 0; + vm_size_t new_size_none = 0; vm_size_t entry_size; vm_object_t object; @@ -4524,6 +4598,7 @@ vm_map_t vm_map_fork(vm_map_t old_map) old_entry->vme_start); new_size += entry_size; + new_size_none += ((old_entry->max_protection == VM_PROT_NONE) ? entry_size : 0); break; case VM_INHERIT_COPY: @@ -4572,6 +4647,7 @@ vm_map_t vm_map_fork(vm_map_t old_map) new_size += entry_size; + new_size_none += ((old_entry->max_protection == VM_PROT_NONE) ? entry_size : 0); break; } @@ -4609,6 +4685,7 @@ vm_map_t vm_map_fork(vm_map_t old_map) vm_map_copy_insert(new_map, last, copy); new_size += entry_size; + new_size_none += ((old_entry->max_protection == VM_PROT_NONE) ? entry_size : 0); /* * Pick up the traversal at the end of @@ -4630,6 +4707,8 @@ vm_map_t vm_map_fork(vm_map_t old_map) } new_map->size = new_size; + new_map->size_none = new_size_none; + vm_map_copy_limits(old_map, new_map); vm_map_unlock(old_map); return(new_map); diff --git a/vm/vm_map.h b/vm/vm_map.h index 900f1218..0bdd985f 100644 --- a/vm/vm_map.h +++ b/vm/vm_map.h @@ -184,6 +184,7 @@ struct vm_map { pmap_t pmap; /* Physical map */ vm_size_t size; /* virtual size */ vm_size_t size_wired; /* wired size */ + vm_size_t size_none; /* none protection size */ int ref_count; /* Reference count */ decl_simple_lock_data(, ref_lock) /* Lock for ref_count field */ vm_map_entry_t hint; /* hint for quick lookups */ @@ -198,6 +199,10 @@ struct vm_map { unsigned int timestamp; /* Version number */ const char *name; /* Associated name */ + + vm_size_t size_cur_limit; /* current limit on virtual memory size */ + vm_size_t size_max_limit; /* maximum size an unprivileged user can + change current limit to */ }; #define vm_map_to_entry(map) ((struct vm_map_entry *) &(map)->hdr.links) @@ -582,4 +587,12 @@ void _vm_map_clip_end( vm_offset_t end, boolean_t link_gap); +/* + * This function is called to inherit the virtual memory limits + * from one vm_map_t to another. + */ +void vm_map_copy_limits( + vm_map_t src, + vm_map_t dst); + #endif /* _VM_VM_MAP_H_ */ diff --git a/vm/vm_user.c b/vm/vm_user.c index 62aedad3..517fb8d2 100644 --- a/vm/vm_user.c +++ b/vm/vm_user.c @@ -804,3 +804,69 @@ kern_return_t vm_pages_phys( return KERN_SUCCESS; } + +/* + * vm_set_size_limit + * + * Sets the current/maximum virtual adress space limits + * of the `target_task`. + * + * The host privileged port must be provided to increase + * the max limit. + */ +kern_return_t +vm_set_size_limit( + const ipc_port_t host_port, + vm_map_t map, + vm_size_t current_limit, + vm_size_t max_limit) +{ + ipc_kobject_type_t ikot_host = IKOT_NONE; + + if (current_limit > max_limit) + return KERN_INVALID_ARGUMENT; + if (map == VM_MAP_NULL) + return KERN_INVALID_ARGUMENT; + + if (!IP_VALID(host_port)) + return KERN_INVALID_HOST; + ip_lock(host_port); + if (ip_active(host_port)) + ikot_host = ip_kotype(host_port); + ip_unlock(host_port); + + if (ikot_host != IKOT_HOST && ikot_host != IKOT_HOST_PRIV) + return KERN_INVALID_HOST; + + vm_map_lock(map); + if (max_limit > map->size_max_limit && ikot_host != IKOT_HOST_PRIV) { + vm_map_unlock(map); + return KERN_INVALID_HOST; + } + + map->size_cur_limit = current_limit; + map->size_max_limit = max_limit; + vm_map_unlock(map); + + return KERN_SUCCESS; +} + +/* + * vm_get_size_limit + * + * Gets the current/maximum virtual adress space limits + * of the provided `map`. + */ +kern_return_t +vm_get_size_limit( + vm_map_t map, + vm_size_t *current_limit, + vm_size_t *max_limit) +{ + vm_map_lock_read(map); + *current_limit = map->size_cur_limit; + *max_limit = map->size_max_limit; + vm_map_unlock_read(map); + + return KERN_SUCCESS; +} -- 2.45.2