Re: [PATCH gnumach v1] Implement per task virtual memory limit

Sergey Bugaev Tue, 24 Dec 2024 01:45:37 -0800

Getting really close :)

On Mon, Dec 23, 2024 at 10:07 PM <dnie...@gmail.com> wrote:
> +
> +/*
> + *     Set a task virtual memory limit parameters
> + */
> +routine vm_set_size_limit(
> +       host_port     : mach_port_t;
> +       map           : vm_task_t;
> +       current_limit : vm_size_t;
> +       max_limit     : vm_size_t);


Would be good to document the host priv port usage. For example: "To
increase the max limit, a privileged host control port must be
supplied."

> +
> +/*
> + *     Get a task virtual memory limit parameters
> + */
> +routine vm_get_size_limit(
> +               map           : vm_task_t;
> +       out     current_limit : vm_size_t;
> +       out     max_limit     : vm_size_t);

Ok.

> diff --git a/kern/task.c b/kern/task.c
> index bd57ca2a..255f4388 100644
> --- a/kern/task.c
> +++ b/kern/task.c
> @@ -126,6 +126,11 @@ task_create_kernel(
>                                         trunc_page(VM_MAX_USER_ADDRESS));
>                         if (new_task->map == VM_MAP_NULL)
>                                 pmap_destroy(new_pmap);
> +                       else {
> +                               vm_map_lock(parent_task->map);
> +                               vm_map_copy_limits(parent_task->map, 
> new_task->map);
> +                               vm_map_unlock(parent_task->map);
> +                       }

Here too, vm_map_{lock,unlock}_read should suffice.

>                 }
>         }
>         if (new_task->map == VM_MAP_NULL) {
> diff --git a/tests/test-vm.c b/tests/test-vm.c
> index 4ece792e..74fcc309 100644
> --- a/tests/test-vm.c
> +++ b/tests/test-vm.c
> @@ -75,11 +75,55 @@ static void test_wire()
>    // TODO check that all memory is actually wired or unwired
>  }
>
> +void test_vm_limit()
> +{
> +  kern_return_t err;
> +  vm_address_t mem;
> +  const size_t M_128M = 128l * 1024l * 1024l;
> +  const size_t M_512M = 512l * 1024l * 1024l;
> +  vm_size_t cur;
> +  vm_size_t max;
> +
> +  /* set VM memory limitations */
> +  err = vm_set_size_limit(mach_host_self(), mach_task_self(), M_128M, 
> M_512M);
> +  ASSERT(err == KERN_SUCCESS, "cannot set VM limits");

There's ASSERT_RET, which also stringifies the error code.

> +  /* check limits are actually saved */
> +  err = vm_get_size_limit(mach_task_self(), &cur, &max);
> +  ASSERT(err == KERN_SUCCESS, "getting the VM limit failed");
> +  ASSERT(cur == M_128M, "cur limit was not expected");
> +  ASSERT(max == M_512M, "max limit was not expected");
> +
> +  /* check we can no longer increase the hard limit */
> +  err = vm_set_size_limit(mach_host_self(), mach_task_self(), M_128M, M_512M 
> * 2);
> +  ASSERT(err == KERN_INVALID_HOST, "raising VM hard limit shall fail");

Well, uh. Looking at it like this, I wonder if this should return
KERN_INVALID_ARGUMENT or something in this case, not
KERN_INVALID_HOST. "Invalid host" makes sense if the way you think of
it is "I want to increase the size limit, would this host port
suffice"; but when written out like this it reads the other way
around: here's a host port, I'm attempting to set the limit ("no
sorry, increasing the limit is not allowed" => KERN_INVALID_ARGUMENT).

> +  /* alloc some memory below the limit */
> +  err = vm_allocate(mach_task_self(), &mem, (128l * 1024l), TRUE);
> +  ASSERT(err == KERN_SUCCESS, "allocating memory below the limit must 
> succeed");
> +  vm_deallocate(mach_task_self(), mem, (128l * 1024l));

Could check for the error here as well :)

> +  /* alloc a bigger chunk to make it hit the limit */
> +  err = vm_allocate(mach_task_self(), &mem, (M_512M * 2), TRUE);
> +  ASSERT(err == KERN_NO_SPACE, "allocation must fail with KERN_NO_SPACE");
> +
> +  /* check that "root" can increase the hard limit */
> +  err = vm_set_size_limit(host_priv(), mach_task_self(), M_128M, M_512M * 2);
> +  ASSERT(err == KERN_SUCCESS, "privileged tasks shall be allowed to increase 
> the max limit");
> +
> +  /* check limits are actually saved */
> +  err = vm_get_size_limit(mach_task_self(), &cur, &max);
> +  ASSERT(err == KERN_SUCCESS, "getting the VM limit failed");
> +  ASSERT(cur == M_128M, "cur limit was not expected");
> +  ASSERT(max == (M_512M * 2), "max limit was not expected");

And perhaps check that you can alloc that bigger chunk now?

> diff --git a/vm/vm_map.c b/vm/vm_map.c
> index 03d22ea1..eded31a0 100644
> --- a/vm/vm_map.c
> +++ b/vm/vm_map.c
> @@ -189,6 +189,7 @@ void vm_map_setup(
>
>         map->size = 0;
>         map->size_wired = 0;
> +       map->size_none = 0;
>         map->ref_count = 1;
>         map->pmap = pmap;
>         map->min_offset = min;
> @@ -198,6 +199,9 @@ void vm_map_setup(
>         map->first_free = vm_map_to_entry(map);
>         map->hint = vm_map_to_entry(map);
>         map->name = NULL;
> +       /* TODO add to default limit the swap size */
> +       map->size_cur_limit = vm_page_mem_size() / 2;
> +       map->size_max_limit = vm_page_mem_size() / 2;

Perhaps also skip this for kernel maps?

>         vm_map_lock_init(map);
>         simple_lock_init(&map->ref_lock);
>         simple_lock_init(&map->hint_lock);
> @@ -268,6 +272,49 @@ void vm_map_unlock(struct vm_map *map)
>         lock_write_done(&map->lock);
>  }
> @@ -789,6 +836,10 @@ kern_return_t vm_map_find_entry(
>         vm_map_entry_t  entry, new_entry;
>         vm_offset_t     start;
>         vm_offset_t     end;
> +       kern_return_t   err;
> +
> +       if ((err = vm_map_enforce_limit(map, size, "vm_map_find_entry")) != 
> KERN_SUCCESS)
> +               return err;

Hm, so the map is said to be locked, good. But: we don't have
max_protection here (?), so we cannot skip the check for max_prot ==
VM_PROT_NONE. Oh well.

>
>         entry = vm_map_find_entry_anywhere(map, size, mask, TRUE, &start);
>
> @@ -1037,6 +1088,16 @@ kern_return_t vm_map_enter(
>                         RETURN(KERN_NO_SPACE);
>         }
>
> +       /*
> +        *      If the allocation has protection equal to VM_PROT_NONE,
> +        *      don't check for limits as the map's size_none field is
> +        *      not yet incremented.
> +        */
> +       if (max_protection != VM_PROT_NONE) {
> +               if ((result = vm_map_enforce_limit(map, size, 
> "vm_map_enter")) != KERN_SUCCESS)
> +                       RETURN(result);
> +       }

Ok.

> +
>         /*
>          *      At this point,
>          *              "start" and "end" should define the endpoints of the
> @@ -1160,6 +1221,7 @@ kern_return_t vm_map_enter(
>
>         vm_map_entry_link(map, entry, new_entry);
>         map->size += size;
> +       map->size_none += ((max_protection == VM_PROT_NONE) ? size : 0);

I'd rather have written this as just

if (max_protection == VM_PROT_NONE)
        map->size_none += size;

but whatever.

>
>         /*
>          *      Update the free space hint and the lookup hint
> @@ -2042,6 +2104,7 @@ void vm_map_entry_delete(
>
>         vm_map_entry_unlink(map, entry);
>         map->size -= size;
> +       map->size_none -= ((entry->max_protection == VM_PROT_NONE) ? size : 
> 0);
>
>         vm_map_entry_dispose(map, entry);
>  }
> @@ -2882,6 +2945,11 @@ kern_return_t vm_map_copyout(
>                 return KERN_NO_SPACE;
>         }
>
> +       if ((kr = vm_map_enforce_limit(dst_map, size, "vm_map_copyout")) != 
> KERN_SUCCESS) {
> +               vm_map_unlock(dst_map);
> +               return kr;
> +       }

Ok. Perhaps add a comment mentioning that the new entries are
protected VM_PROT_DEFAULT / VM_PROT_ALL, hence no need to adjust
map->size_none.

> +
>         /*
>          *      Adjust the addresses in the copy chain, and
>          *      reset the region attributes.
> @@ -3055,6 +3123,11 @@ kern_return_t vm_map_copyout_page_list(
>
>         vm_map_lock(dst_map);
>
> +       if ((result = vm_map_enforce_limit(dst_map, size, 
> "vm_map_copyout_page_lists")) != KERN_SUCCESS) {
> +               vm_map_unlock(dst_map);
> +               return result;
> +       }

Ok. (Though in my understanding nothing does copyout of page lists.
Page lists are primarily the format used for copying *in* memory
supplied by pagers.)

> +
>         last = vm_map_find_entry_anywhere(dst_map, size, 0, TRUE, &start);
>
>         if (last == NULL) {
> @@ -4390,6 +4463,7 @@ vm_map_t vm_map_fork(vm_map_t old_map)
>         vm_map_entry_t  new_entry;
>         pmap_t          new_pmap = pmap_create((vm_size_t) 0);
>         vm_size_t       new_size = 0;
> +       vm_size_t       new_size_none = 0;
>         vm_size_t       entry_size;
>         vm_object_t     object;
>
> @@ -4524,6 +4598,7 @@ vm_map_t vm_map_fork(vm_map_t old_map)
>                                 old_entry->vme_start);
>
>                         new_size += entry_size;
> +                       new_size_none += ((old_entry->max_protection == 
> VM_PROT_NONE) ? entry_size : 0);
>                         break;
>
>                 case VM_INHERIT_COPY:
> @@ -4572,6 +4647,7 @@ vm_map_t vm_map_fork(vm_map_t old_map)
>
>
>                                         new_size += entry_size;
> +                                       new_size_none += 
> ((old_entry->max_protection == VM_PROT_NONE) ? entry_size : 0);
>                                         break;
>                                 }
>
> @@ -4609,6 +4685,7 @@ vm_map_t vm_map_fork(vm_map_t old_map)
>
>                         vm_map_copy_insert(new_map, last, copy);
>                         new_size += entry_size;
> +                       new_size_none += ((old_entry->max_protection == 
> VM_PROT_NONE) ? entry_size : 0);
>
>                         /*
>                          *      Pick up the traversal at the end of
> @@ -4630,6 +4707,8 @@ vm_map_t vm_map_fork(vm_map_t old_map)
>         }
>
>         new_map->size = new_size;
> +       new_map->size_none = new_size_none;
> +       vm_map_copy_limits(old_map, new_map);
>         vm_map_unlock(old_map);
>
>         return(new_map);
> diff --git a/vm/vm_map.h b/vm/vm_map.h
> index 900f1218..0bdd985f 100644
> --- a/vm/vm_map.h
> +++ b/vm/vm_map.h
> @@ -184,6 +184,7 @@ struct vm_map {
>         pmap_t                  pmap;           /* Physical map */
>         vm_size_t               size;           /* virtual size */
>         vm_size_t               size_wired;     /* wired size */
> +       vm_size_t               size_none;      /* none protection size */
>         int                     ref_count;      /* Reference count */
>         decl_simple_lock_data(, ref_lock)       /* Lock for ref_count field */
>         vm_map_entry_t          hint;           /* hint for quick lookups */
> @@ -198,6 +199,10 @@ struct vm_map {
>         unsigned int            timestamp;      /* Version number */
>
>         const char              *name;          /* Associated name */
> +
> +       vm_size_t               size_cur_limit; /* current limit on virtual 
> memory size */
> +       vm_size_t               size_max_limit; /* maximum size an 
> unprivileged user can
> +                                                  change current limit to */
>  };
>
>  #define vm_map_to_entry(map)   ((struct vm_map_entry *) &(map)->hdr.links)
> @@ -582,4 +587,12 @@ void _vm_map_clip_end(
>         vm_offset_t             end,
>         boolean_t               link_gap);
>
> +/*
> + *      This function is called to inherit the virtual memory limits
> + *      from one vm_map_t to another.
> + */
> +void vm_map_copy_limits(
> +       vm_map_t src,
> +       vm_map_t dst);
> +
>  #endif /* _VM_VM_MAP_H_ */
> diff --git a/vm/vm_user.c b/vm/vm_user.c
> index 62aedad3..517fb8d2 100644
> --- a/vm/vm_user.c
> +++ b/vm/vm_user.c
> @@ -804,3 +804,69 @@ kern_return_t vm_pages_phys(
>
>         return KERN_SUCCESS;
>  }
> +
> +/*
> + *     vm_set_size_limit
> + *
> + *     Sets the current/maximum virtual adress space limits
> + *     of the `target_task`.
> + *
> + *     The host privileged port must be provided to increase
> + *      the max limit.
> + */
> +kern_return_t
> +vm_set_size_limit(
> +       const ipc_port_t host_port,
> +       vm_map_t         map,
> +       vm_size_t        current_limit,
> +       vm_size_t        max_limit)
> +{
> +       ipc_kobject_type_t ikot_host = IKOT_NONE;
> +
> +       if (current_limit > max_limit)
> +               return KERN_INVALID_ARGUMENT;
> +       if (map == VM_MAP_NULL)
> +               return KERN_INVALID_ARGUMENT;
> +
> +       if (!IP_VALID(host_port))
> +               return KERN_INVALID_HOST;
> +       ip_lock(host_port);
> +       if (ip_active(host_port))
> +               ikot_host = ip_kotype(host_port);
> +       ip_unlock(host_port);
> +
> +       if (ikot_host != IKOT_HOST && ikot_host != IKOT_HOST_PRIV)
> +               return KERN_INVALID_HOST;
> +
> +       vm_map_lock(map);
> +       if (max_limit > map->size_max_limit && ikot_host != IKOT_HOST_PRIV) {
> +               vm_map_unlock(map);
> +               return KERN_INVALID_HOST;
> +       }
> +
> +       map->size_cur_limit = current_limit;
> +       map->size_max_limit = max_limit;
> +       vm_map_unlock(map);
> +
> +       return KERN_SUCCESS;
> +}

Ok. My current understanding is you indeed don't need to
vm_map_deallocate in the successful case, but it would be good to get
a confirmation from Samuel or Flávio here.

> +
> +/*
> + *     vm_get_size_limit
> + *
> + *     Gets the current/maximum virtual adress space limits
> + *     of the provided `map`.
> + */
> +kern_return_t
> +vm_get_size_limit(
> +       vm_map_t        map,
> +       vm_size_t       *current_limit,
> +       vm_size_t       *max_limit)
> +{

Missing the check for map == VM_MAP_NULL.

> +       vm_map_lock_read(map);
> +       *current_limit = map->size_cur_limit;
> +       *max_limit = map->size_max_limit;
> +       vm_map_unlock_read(map);
> +
> +       return KERN_SUCCESS;
> +}

And remember, you still have to handle the cases of:
- deallocating a VM_PROT_NONE mapping,
- downgrading an existing mapping to VM_PROT_NONE.

Sergey

Re: [PATCH gnumach v1] Implement per task virtual memory limit

Reply via email to