Getting really close :) On Mon, Dec 23, 2024 at 10:07 PM <dnie...@gmail.com> wrote: > + > +/* > + * Set a task virtual memory limit parameters > + */ > +routine vm_set_size_limit( > + host_port : mach_port_t; > + map : vm_task_t; > + current_limit : vm_size_t; > + max_limit : vm_size_t);
Would be good to document the host priv port usage. For example: "To increase the max limit, a privileged host control port must be supplied." > + > +/* > + * Get a task virtual memory limit parameters > + */ > +routine vm_get_size_limit( > + map : vm_task_t; > + out current_limit : vm_size_t; > + out max_limit : vm_size_t); Ok. > diff --git a/kern/task.c b/kern/task.c > index bd57ca2a..255f4388 100644 > --- a/kern/task.c > +++ b/kern/task.c > @@ -126,6 +126,11 @@ task_create_kernel( > trunc_page(VM_MAX_USER_ADDRESS)); > if (new_task->map == VM_MAP_NULL) > pmap_destroy(new_pmap); > + else { > + vm_map_lock(parent_task->map); > + vm_map_copy_limits(parent_task->map, > new_task->map); > + vm_map_unlock(parent_task->map); > + } Here too, vm_map_{lock,unlock}_read should suffice. > } > } > if (new_task->map == VM_MAP_NULL) { > diff --git a/tests/test-vm.c b/tests/test-vm.c > index 4ece792e..74fcc309 100644 > --- a/tests/test-vm.c > +++ b/tests/test-vm.c > @@ -75,11 +75,55 @@ static void test_wire() > // TODO check that all memory is actually wired or unwired > } > > +void test_vm_limit() > +{ > + kern_return_t err; > + vm_address_t mem; > + const size_t M_128M = 128l * 1024l * 1024l; > + const size_t M_512M = 512l * 1024l * 1024l; > + vm_size_t cur; > + vm_size_t max; > + > + /* set VM memory limitations */ > + err = vm_set_size_limit(mach_host_self(), mach_task_self(), M_128M, > M_512M); > + ASSERT(err == KERN_SUCCESS, "cannot set VM limits"); There's ASSERT_RET, which also stringifies the error code. > + /* check limits are actually saved */ > + err = vm_get_size_limit(mach_task_self(), &cur, &max); > + ASSERT(err == KERN_SUCCESS, "getting the VM limit failed"); > + ASSERT(cur == M_128M, "cur limit was not expected"); > + ASSERT(max == M_512M, "max limit was not expected"); > + > + /* check we can no longer increase the hard limit */ > + err = vm_set_size_limit(mach_host_self(), mach_task_self(), M_128M, M_512M > * 2); > + ASSERT(err == KERN_INVALID_HOST, "raising VM hard limit shall fail"); Well, uh. Looking at it like this, I wonder if this should return KERN_INVALID_ARGUMENT or something in this case, not KERN_INVALID_HOST. "Invalid host" makes sense if the way you think of it is "I want to increase the size limit, would this host port suffice"; but when written out like this it reads the other way around: here's a host port, I'm attempting to set the limit ("no sorry, increasing the limit is not allowed" => KERN_INVALID_ARGUMENT). > + /* alloc some memory below the limit */ > + err = vm_allocate(mach_task_self(), &mem, (128l * 1024l), TRUE); > + ASSERT(err == KERN_SUCCESS, "allocating memory below the limit must > succeed"); > + vm_deallocate(mach_task_self(), mem, (128l * 1024l)); Could check for the error here as well :) > + /* alloc a bigger chunk to make it hit the limit */ > + err = vm_allocate(mach_task_self(), &mem, (M_512M * 2), TRUE); > + ASSERT(err == KERN_NO_SPACE, "allocation must fail with KERN_NO_SPACE"); > + > + /* check that "root" can increase the hard limit */ > + err = vm_set_size_limit(host_priv(), mach_task_self(), M_128M, M_512M * 2); > + ASSERT(err == KERN_SUCCESS, "privileged tasks shall be allowed to increase > the max limit"); > + > + /* check limits are actually saved */ > + err = vm_get_size_limit(mach_task_self(), &cur, &max); > + ASSERT(err == KERN_SUCCESS, "getting the VM limit failed"); > + ASSERT(cur == M_128M, "cur limit was not expected"); > + ASSERT(max == (M_512M * 2), "max limit was not expected"); And perhaps check that you can alloc that bigger chunk now? > diff --git a/vm/vm_map.c b/vm/vm_map.c > index 03d22ea1..eded31a0 100644 > --- a/vm/vm_map.c > +++ b/vm/vm_map.c > @@ -189,6 +189,7 @@ void vm_map_setup( > > map->size = 0; > map->size_wired = 0; > + map->size_none = 0; > map->ref_count = 1; > map->pmap = pmap; > map->min_offset = min; > @@ -198,6 +199,9 @@ void vm_map_setup( > map->first_free = vm_map_to_entry(map); > map->hint = vm_map_to_entry(map); > map->name = NULL; > + /* TODO add to default limit the swap size */ > + map->size_cur_limit = vm_page_mem_size() / 2; > + map->size_max_limit = vm_page_mem_size() / 2; Perhaps also skip this for kernel maps? > vm_map_lock_init(map); > simple_lock_init(&map->ref_lock); > simple_lock_init(&map->hint_lock); > @@ -268,6 +272,49 @@ void vm_map_unlock(struct vm_map *map) > lock_write_done(&map->lock); > } > @@ -789,6 +836,10 @@ kern_return_t vm_map_find_entry( > vm_map_entry_t entry, new_entry; > vm_offset_t start; > vm_offset_t end; > + kern_return_t err; > + > + if ((err = vm_map_enforce_limit(map, size, "vm_map_find_entry")) != > KERN_SUCCESS) > + return err; Hm, so the map is said to be locked, good. But: we don't have max_protection here (?), so we cannot skip the check for max_prot == VM_PROT_NONE. Oh well. > > entry = vm_map_find_entry_anywhere(map, size, mask, TRUE, &start); > > @@ -1037,6 +1088,16 @@ kern_return_t vm_map_enter( > RETURN(KERN_NO_SPACE); > } > > + /* > + * If the allocation has protection equal to VM_PROT_NONE, > + * don't check for limits as the map's size_none field is > + * not yet incremented. > + */ > + if (max_protection != VM_PROT_NONE) { > + if ((result = vm_map_enforce_limit(map, size, > "vm_map_enter")) != KERN_SUCCESS) > + RETURN(result); > + } Ok. > + > /* > * At this point, > * "start" and "end" should define the endpoints of the > @@ -1160,6 +1221,7 @@ kern_return_t vm_map_enter( > > vm_map_entry_link(map, entry, new_entry); > map->size += size; > + map->size_none += ((max_protection == VM_PROT_NONE) ? size : 0); I'd rather have written this as just if (max_protection == VM_PROT_NONE) map->size_none += size; but whatever. > > /* > * Update the free space hint and the lookup hint > @@ -2042,6 +2104,7 @@ void vm_map_entry_delete( > > vm_map_entry_unlink(map, entry); > map->size -= size; > + map->size_none -= ((entry->max_protection == VM_PROT_NONE) ? size : > 0); > > vm_map_entry_dispose(map, entry); > } > @@ -2882,6 +2945,11 @@ kern_return_t vm_map_copyout( > return KERN_NO_SPACE; > } > > + if ((kr = vm_map_enforce_limit(dst_map, size, "vm_map_copyout")) != > KERN_SUCCESS) { > + vm_map_unlock(dst_map); > + return kr; > + } Ok. Perhaps add a comment mentioning that the new entries are protected VM_PROT_DEFAULT / VM_PROT_ALL, hence no need to adjust map->size_none. > + > /* > * Adjust the addresses in the copy chain, and > * reset the region attributes. > @@ -3055,6 +3123,11 @@ kern_return_t vm_map_copyout_page_list( > > vm_map_lock(dst_map); > > + if ((result = vm_map_enforce_limit(dst_map, size, > "vm_map_copyout_page_lists")) != KERN_SUCCESS) { > + vm_map_unlock(dst_map); > + return result; > + } Ok. (Though in my understanding nothing does copyout of page lists. Page lists are primarily the format used for copying *in* memory supplied by pagers.) > + > last = vm_map_find_entry_anywhere(dst_map, size, 0, TRUE, &start); > > if (last == NULL) { > @@ -4390,6 +4463,7 @@ vm_map_t vm_map_fork(vm_map_t old_map) > vm_map_entry_t new_entry; > pmap_t new_pmap = pmap_create((vm_size_t) 0); > vm_size_t new_size = 0; > + vm_size_t new_size_none = 0; > vm_size_t entry_size; > vm_object_t object; > > @@ -4524,6 +4598,7 @@ vm_map_t vm_map_fork(vm_map_t old_map) > old_entry->vme_start); > > new_size += entry_size; > + new_size_none += ((old_entry->max_protection == > VM_PROT_NONE) ? entry_size : 0); > break; > > case VM_INHERIT_COPY: > @@ -4572,6 +4647,7 @@ vm_map_t vm_map_fork(vm_map_t old_map) > > > new_size += entry_size; > + new_size_none += > ((old_entry->max_protection == VM_PROT_NONE) ? entry_size : 0); > break; > } > > @@ -4609,6 +4685,7 @@ vm_map_t vm_map_fork(vm_map_t old_map) > > vm_map_copy_insert(new_map, last, copy); > new_size += entry_size; > + new_size_none += ((old_entry->max_protection == > VM_PROT_NONE) ? entry_size : 0); > > /* > * Pick up the traversal at the end of > @@ -4630,6 +4707,8 @@ vm_map_t vm_map_fork(vm_map_t old_map) > } > > new_map->size = new_size; > + new_map->size_none = new_size_none; > + vm_map_copy_limits(old_map, new_map); > vm_map_unlock(old_map); > > return(new_map); > diff --git a/vm/vm_map.h b/vm/vm_map.h > index 900f1218..0bdd985f 100644 > --- a/vm/vm_map.h > +++ b/vm/vm_map.h > @@ -184,6 +184,7 @@ struct vm_map { > pmap_t pmap; /* Physical map */ > vm_size_t size; /* virtual size */ > vm_size_t size_wired; /* wired size */ > + vm_size_t size_none; /* none protection size */ > int ref_count; /* Reference count */ > decl_simple_lock_data(, ref_lock) /* Lock for ref_count field */ > vm_map_entry_t hint; /* hint for quick lookups */ > @@ -198,6 +199,10 @@ struct vm_map { > unsigned int timestamp; /* Version number */ > > const char *name; /* Associated name */ > + > + vm_size_t size_cur_limit; /* current limit on virtual > memory size */ > + vm_size_t size_max_limit; /* maximum size an > unprivileged user can > + change current limit to */ > }; > > #define vm_map_to_entry(map) ((struct vm_map_entry *) &(map)->hdr.links) > @@ -582,4 +587,12 @@ void _vm_map_clip_end( > vm_offset_t end, > boolean_t link_gap); > > +/* > + * This function is called to inherit the virtual memory limits > + * from one vm_map_t to another. > + */ > +void vm_map_copy_limits( > + vm_map_t src, > + vm_map_t dst); > + > #endif /* _VM_VM_MAP_H_ */ > diff --git a/vm/vm_user.c b/vm/vm_user.c > index 62aedad3..517fb8d2 100644 > --- a/vm/vm_user.c > +++ b/vm/vm_user.c > @@ -804,3 +804,69 @@ kern_return_t vm_pages_phys( > > return KERN_SUCCESS; > } > + > +/* > + * vm_set_size_limit > + * > + * Sets the current/maximum virtual adress space limits > + * of the `target_task`. > + * > + * The host privileged port must be provided to increase > + * the max limit. > + */ > +kern_return_t > +vm_set_size_limit( > + const ipc_port_t host_port, > + vm_map_t map, > + vm_size_t current_limit, > + vm_size_t max_limit) > +{ > + ipc_kobject_type_t ikot_host = IKOT_NONE; > + > + if (current_limit > max_limit) > + return KERN_INVALID_ARGUMENT; > + if (map == VM_MAP_NULL) > + return KERN_INVALID_ARGUMENT; > + > + if (!IP_VALID(host_port)) > + return KERN_INVALID_HOST; > + ip_lock(host_port); > + if (ip_active(host_port)) > + ikot_host = ip_kotype(host_port); > + ip_unlock(host_port); > + > + if (ikot_host != IKOT_HOST && ikot_host != IKOT_HOST_PRIV) > + return KERN_INVALID_HOST; > + > + vm_map_lock(map); > + if (max_limit > map->size_max_limit && ikot_host != IKOT_HOST_PRIV) { > + vm_map_unlock(map); > + return KERN_INVALID_HOST; > + } > + > + map->size_cur_limit = current_limit; > + map->size_max_limit = max_limit; > + vm_map_unlock(map); > + > + return KERN_SUCCESS; > +} Ok. My current understanding is you indeed don't need to vm_map_deallocate in the successful case, but it would be good to get a confirmation from Samuel or Flávio here. > + > +/* > + * vm_get_size_limit > + * > + * Gets the current/maximum virtual adress space limits > + * of the provided `map`. > + */ > +kern_return_t > +vm_get_size_limit( > + vm_map_t map, > + vm_size_t *current_limit, > + vm_size_t *max_limit) > +{ Missing the check for map == VM_MAP_NULL. > + vm_map_lock_read(map); > + *current_limit = map->size_cur_limit; > + *max_limit = map->size_max_limit; > + vm_map_unlock_read(map); > + > + return KERN_SUCCESS; > +} And remember, you still have to handle the cases of: - deallocating a VM_PROT_NONE mapping, - downgrading an existing mapping to VM_PROT_NONE. Sergey