On Thu, Aug 24, 2017 at 07:14:47PM +1000, Paul Mackerras wrote:
> Nixiaoming pointed out that there is a memory leak in
> kvm_vm_ioctl_create_spapr_tce() if the call to anon_inode_getfd()
> fails; the memory allocated for the kvmppc_spapr_tce_table struct
> is not freed, and nor are the pages allocated for the iommu
> tables.  In addition, we have already incremented the process's
> count of locked memory pages, and this doesn't get restored on
> error.
> 
> David Hildenbrand pointed out that there is a race in that the
> function checks early on that there is not already an entry in the
> stt->iommu_tables list with the same LIOBN, but an entry with the
> same LIOBN could get added between then and when the new entry is
> added to the list.
> 
> This fixes all three problems.  To simplify things, we now call
> anon_inode_getfd() before placing the new entry in the list.  The
> check for an existing entry is done while holding the kvm->lock
> mutex, immediately before adding the new entry to the list.
> Finally, on failure we now call kvmppc_account_memlimit to
> decrement the process's count of locked memory pages.
> 
> Reported-by: Nixiaoming <nixiaom...@huawei.com>
> Reported-by: David Hildenbrand <da...@redhat.com>
> Signed-off-by: Paul Mackerras <pau...@ozlabs.org>

Reviewed-by: David Gibson <da...@gibson.dropbear.id.au>

> ---
> v2: Don't overwrite stt in loop over spapr_tce_tables
> 
>  arch/powerpc/kvm/book3s_64_vio.c | 56 
> ++++++++++++++++++++++++----------------
>  1 file changed, 34 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_64_vio.c 
> b/arch/powerpc/kvm/book3s_64_vio.c
> index a160c14..53766e2 100644
> --- a/arch/powerpc/kvm/book3s_64_vio.c
> +++ b/arch/powerpc/kvm/book3s_64_vio.c
> @@ -294,32 +294,26 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>                                  struct kvm_create_spapr_tce_64 *args)
>  {
>       struct kvmppc_spapr_tce_table *stt = NULL;
> +     struct kvmppc_spapr_tce_table *siter;
>       unsigned long npages, size;
>       int ret = -ENOMEM;
>       int i;
> +     int fd = -1;
>  
>       if (!args->size)
>               return -EINVAL;
>  
> -     /* Check this LIOBN hasn't been previously allocated */
> -     list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
> -             if (stt->liobn == args->liobn)
> -                     return -EBUSY;
> -     }
> -
>       size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
>       npages = kvmppc_tce_pages(size);
>       ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
> -     if (ret) {
> -             stt = NULL;
> -             goto fail;
> -     }
> +     if (ret)
> +             return ret;
>  
>       ret = -ENOMEM;
>       stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
>                     GFP_KERNEL);
>       if (!stt)
> -             goto fail;
> +             goto fail_acct;
>  
>       stt->liobn = args->liobn;
>       stt->page_shift = args->page_shift;
> @@ -334,24 +328,42 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>                       goto fail;
>       }
>  
> -     kvm_get_kvm(kvm);
> +     ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
> +                                 stt, O_RDWR | O_CLOEXEC);
> +     if (ret < 0)
> +             goto fail;
>  
>       mutex_lock(&kvm->lock);
> -     list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> +
> +     /* Check this LIOBN hasn't been previously allocated */
> +     ret = 0;
> +     list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
> +             if (siter->liobn == args->liobn) {
> +                     ret = -EBUSY;
> +                     break;
> +             }
> +     }
> +
> +     if (!ret) {
> +             list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
> +             kvm_get_kvm(kvm);
> +     }
>  
>       mutex_unlock(&kvm->lock);
>  
> -     return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
> -                             stt, O_RDWR | O_CLOEXEC);
> +     if (!ret)
> +             return fd;
>  
> -fail:
> -     if (stt) {
> -             for (i = 0; i < npages; i++)
> -                     if (stt->pages[i])
> -                             __free_page(stt->pages[i]);
> +     put_unused_fd(fd);
>  
> -             kfree(stt);
> -     }
> + fail:
> +     for (i = 0; i < npages; i++)
> +             if (stt->pages[i])
> +                     __free_page(stt->pages[i]);
> +
> +     kfree(stt);
> + fail_acct:
> +     kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
>       return ret;
>  }
>  

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature

Reply via email to