From: "Eric W. Biederman" <ebied...@xmission.com>

Implement a helper elf_load() that wraps elf_map() and performs all
of the necessary work to ensure that when "memsz > filesz" the bytes
described by "memsz > filesz" are zeroed.

An outstanding issue is if the first segment has filesz 0, and has a
randomized location. But that is the same as today.

In this change I replaced an open coded padzero() that did not clear
all of the way to the end of the page, with padzero() that does.

I also stopped checking the return of padzero() as there is at least
one known case where testing for failure is the wrong thing to do.
It looks like binfmt_elf_fdpic may have the proper set of tests
for when error handling can be safely completed.

I found a couple of commits in the old history
https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git,
that look very interesting in understanding this code.

commit 39b56d902bf3 ("[PATCH] binfmt_elf: clearing bss may fail")
commit c6e2227e4a3e ("[SPARC64]: Missing user access return value checks in 
fs/binfmt_elf.c and fs/compat.c")
commit 5bf3be033f50 ("v2.4.10.1 -> v2.4.10.2")

Looking at commit 39b56d902bf3 ("[PATCH] binfmt_elf: clearing bss may fail"):
>  commit 39b56d902bf35241e7cba6cc30b828ed937175ad
>  Author: Pavel Machek <pa...@ucw.cz>
>  Date:   Wed Feb 9 22:40:30 2005 -0800
>
>     [PATCH] binfmt_elf: clearing bss may fail
>
>     So we discover that Borland's Kylix application builder emits weird elf
>     files which describe a non-writeable bss segment.
>
>     So remove the clear_user() check at the place where we zero out the bss.  
> I
>     don't _think_ there are any security implications here (plus we've never
>     checked that clear_user() return value, so whoops if it is a problem).
>
>     Signed-off-by: Pavel Machek <pa...@suse.cz>
>     Signed-off-by: Andrew Morton <a...@osdl.org>
>     Signed-off-by: Linus Torvalds <torva...@osdl.org>

It seems pretty clear that binfmt_elf_fdpic with skipping clear_user() for
non-writable segments and otherwise calling clear_user(), aka padzero(),
and checking it's return code is the right thing to do.

I just skipped the error checking as that avoids breaking things.

And notably, it looks like Borland's Kylix died in 2005 so it might be
safe to just consider read-only segments with memsz > filesz an error.

Reported-by: Sebastian Ott <seb...@redhat.com>
Reported-by: Thomas Weißschuh <li...@weissschuh.net>
Closes: 
https://lkml.kernel.org/r/20230914-bss-alloc-v1-1-78de67d2c...@weissschuh.net
Signed-off-by: "Eric W. Biederman" <ebied...@xmission.com>
Link: https://lore.kernel.org/r/87sf71f123....@email.froward.int.ebiederm.org
Signed-off-by: Kees Cook <keesc...@chromium.org>
---
 fs/binfmt_elf.c | 111 +++++++++++++++++++++---------------------------
 1 file changed, 48 insertions(+), 63 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7b3d2d491407..2a615f476e44 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -110,25 +110,6 @@ static struct linux_binfmt elf_format = {
 
 #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
 
-static int set_brk(unsigned long start, unsigned long end, int prot)
-{
-       start = ELF_PAGEALIGN(start);
-       end = ELF_PAGEALIGN(end);
-       if (end > start) {
-               /*
-                * Map the last of the bss segment.
-                * If the header is requesting these pages to be
-                * executable, honour that (ppc32 needs this).
-                */
-               int error = vm_brk_flags(start, end - start,
-                               prot & PROT_EXEC ? VM_EXEC : 0);
-               if (error)
-                       return error;
-       }
-       current->mm->start_brk = current->mm->brk = end;
-       return 0;
-}
-
 /* We need to explicitly zero any fractional pages
    after the data section (i.e. bss).  This would
    contain the junk from the file that should not
@@ -406,6 +387,51 @@ static unsigned long elf_map(struct file *filep, unsigned 
long addr,
        return(map_addr);
 }
 
+static unsigned long elf_load(struct file *filep, unsigned long addr,
+               const struct elf_phdr *eppnt, int prot, int type,
+               unsigned long total_size)
+{
+       unsigned long zero_start, zero_end;
+       unsigned long map_addr;
+
+       if (eppnt->p_filesz) {
+               map_addr = elf_map(filep, addr, eppnt, prot, type, total_size);
+               if (BAD_ADDR(map_addr))
+                       return map_addr;
+               if (eppnt->p_memsz > eppnt->p_filesz) {
+                       zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
+                               eppnt->p_filesz;
+                       zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
+                               eppnt->p_memsz;
+
+                       /* Zero the end of the last mapped page */
+                       padzero(zero_start);
+               }
+       } else {
+               map_addr = zero_start = ELF_PAGESTART(addr);
+               zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) +
+                       eppnt->p_memsz;
+       }
+       if (eppnt->p_memsz > eppnt->p_filesz) {
+               /*
+                * Map the last of the segment.
+                * If the header is requesting these pages to be
+                * executable, honour that (ppc32 needs this).
+                */
+               int error;
+
+               zero_start = ELF_PAGEALIGN(zero_start);
+               zero_end = ELF_PAGEALIGN(zero_end);
+
+               error = vm_brk_flags(zero_start, zero_end - zero_start,
+                                    prot & PROT_EXEC ? VM_EXEC : 0);
+               if (error)
+                       map_addr = error;
+       }
+       return map_addr;
+}
+
+
 static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
 {
        elf_addr_t min_addr = -1;
@@ -829,7 +855,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
        struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
        struct elf_phdr *elf_property_phdata = NULL;
        unsigned long elf_bss, elf_brk;
-       int bss_prot = 0;
        int retval, i;
        unsigned long elf_entry;
        unsigned long e_entry;
@@ -1040,33 +1065,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
                if (elf_ppnt->p_type != PT_LOAD)
                        continue;
 
-               if (unlikely (elf_brk > elf_bss)) {
-                       unsigned long nbyte;
-
-                       /* There was a PT_LOAD segment with p_memsz > p_filesz
-                          before this one. Map anonymous pages, if needed,
-                          and clear the area.  */
-                       retval = set_brk(elf_bss + load_bias,
-                                        elf_brk + load_bias,
-                                        bss_prot);
-                       if (retval)
-                               goto out_free_dentry;
-                       nbyte = ELF_PAGEOFFSET(elf_bss);
-                       if (nbyte) {
-                               nbyte = ELF_MIN_ALIGN - nbyte;
-                               if (nbyte > elf_brk - elf_bss)
-                                       nbyte = elf_brk - elf_bss;
-                               if (clear_user((void __user *)elf_bss +
-                                                       load_bias, nbyte)) {
-                                       /*
-                                        * This bss-zeroing can fail if the ELF
-                                        * file specifies odd protections. So
-                                        * we don't check the return value
-                                        */
-                               }
-                       }
-               }
-
                elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
                                     !!interpreter, false);
 
@@ -1162,7 +1160,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
                        }
                }
 
-               error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
+               error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
                                elf_prot, elf_flags, total_size);
                if (BAD_ADDR(error)) {
                        retval = IS_ERR_VALUE(error) ?
@@ -1217,10 +1215,8 @@ static int load_elf_binary(struct linux_binprm *bprm)
                if (end_data < k)
                        end_data = k;
                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
-               if (k > elf_brk) {
-                       bss_prot = elf_prot;
+               if (k > elf_brk)
                        elf_brk = k;
-               }
        }
 
        e_entry = elf_ex->e_entry + load_bias;
@@ -1232,18 +1228,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
        start_data += load_bias;
        end_data += load_bias;
 
-       /* Calling set_brk effectively mmaps the pages that we need
-        * for the bss and break sections.  We must do this before
-        * mapping in the interpreter, to make sure it doesn't wind
-        * up getting placed where the bss needs to go.
-        */
-       retval = set_brk(elf_bss, elf_brk, bss_prot);
-       if (retval)
-               goto out_free_dentry;
-       if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
-               retval = -EFAULT; /* Nobody gets to see this, but.. */
-               goto out_free_dentry;
-       }
+       current->mm->start_brk = current->mm->brk = ELF_PAGEALIGN(elf_brk);
 
        if (interpreter) {
                elf_entry = load_elf_interp(interp_elf_ex,
-- 
2.34.1


Reply via email to