Hi, On Fri, 2024-10-11 at 13:38 +0800, Tiwei Bie wrote: > When a PTE is updated in the page table, the _PAGE_NEWPAGE bit will > always be set. And the corresponding page will always be mapped or > unmapped depending on whether the PTE is present or not. The check > on the _PAGE_NEWPROT bit is not really reachable. Abandoning it will > allow us to simplify the code and remove the unreachable code.
Oh, nice cleanup! And I like that mprotect is gone as I don't want it in SECCOMP mode :-) Maybe we should rename _PAGE_NEWPAGE to something like _PAGE_NEEDSYNC? I think that might make it more clear how everything ties together. Anyway, the change looks good to me. Benjamin Reviewed-by: Benjamin Berg <benjamin.b...@intel.com> > Signed-off-by: Tiwei Bie <tiwei....@antgroup.com> > --- > arch/um/include/asm/pgtable.h | 40 ++++----------- > arch/um/include/shared/os.h | 2 - > arch/um/include/shared/skas/stub-data.h | 1 - > arch/um/kernel/skas/stub.c | 10 ---- > arch/um/kernel/tlb.c | 66 +++++++++++------------ > -- > arch/um/os-Linux/skas/mem.c | 21 -------- > 6 files changed, 37 insertions(+), 103 deletions(-) > > diff --git a/arch/um/include/asm/pgtable.h > b/arch/um/include/asm/pgtable.h > index bd7a9593705f..a32424cfe792 100644 > --- a/arch/um/include/asm/pgtable.h > +++ b/arch/um/include/asm/pgtable.h > @@ -12,7 +12,6 @@ > > #define _PAGE_PRESENT 0x001 > #define _PAGE_NEWPAGE 0x002 > -#define _PAGE_NEWPROT 0x004 > #define _PAGE_RW 0x020 > #define _PAGE_USER 0x040 > #define _PAGE_ACCESSED 0x080 > @@ -151,23 +150,12 @@ static inline int pte_newpage(pte_t pte) > return pte_get_bits(pte, _PAGE_NEWPAGE); > } > > -static inline int pte_newprot(pte_t pte) > -{ > - return(pte_present(pte) && (pte_get_bits(pte, > _PAGE_NEWPROT))); > -} > - > /* > * ================================= > * Flags setting section. > * ================================= > */ > > -static inline pte_t pte_mknewprot(pte_t pte) > -{ > - pte_set_bits(pte, _PAGE_NEWPROT); > - return(pte); > -} > - > static inline pte_t pte_mkclean(pte_t pte) > { > pte_clear_bits(pte, _PAGE_DIRTY); > @@ -184,17 +172,14 @@ static inline pte_t pte_wrprotect(pte_t pte) > { > if (likely(pte_get_bits(pte, _PAGE_RW))) > pte_clear_bits(pte, _PAGE_RW); > - else > - return pte; > - return(pte_mknewprot(pte)); > + return pte; > } > > static inline pte_t pte_mkread(pte_t pte) > { > - if (unlikely(pte_get_bits(pte, _PAGE_USER))) > - return pte; > - pte_set_bits(pte, _PAGE_USER); > - return(pte_mknewprot(pte)); > + if (likely(!pte_get_bits(pte, _PAGE_USER))) > + pte_set_bits(pte, _PAGE_USER); > + return pte; > } > > static inline pte_t pte_mkdirty(pte_t pte) > @@ -211,18 +196,15 @@ static inline pte_t pte_mkyoung(pte_t pte) > > static inline pte_t pte_mkwrite_novma(pte_t pte) > { > - if (unlikely(pte_get_bits(pte, _PAGE_RW))) > - return pte; > - pte_set_bits(pte, _PAGE_RW); > - return(pte_mknewprot(pte)); > + if (likely(!pte_get_bits(pte, _PAGE_RW))) > + pte_set_bits(pte, _PAGE_RW); > + return pte; > } > > static inline pte_t pte_mkuptodate(pte_t pte) > { > pte_clear_bits(pte, _PAGE_NEWPAGE); > - if(pte_present(pte)) > - pte_clear_bits(pte, _PAGE_NEWPROT); > - return(pte); > + return pte; > } > > static inline pte_t pte_mknewpage(pte_t pte) > @@ -236,12 +218,10 @@ static inline void set_pte(pte_t *pteptr, pte_t > pteval) > pte_copy(*pteptr, pteval); > > /* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE > so > - * fix_range knows to unmap it. _PAGE_NEWPROT is specific > to > - * mapped pages. > + * update_pte_range knows to unmap it. > */ > > *pteptr = pte_mknewpage(*pteptr); > - if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr); > } > > #define PFN_PTE_SHIFT PAGE_SHIFT > @@ -298,8 +278,6 @@ static inline int pte_same(pte_t pte_a, pte_t > pte_b) > ({ pte_t pte; \ > \ > pte_set_val(pte, page_to_phys(page), (pgprot)); \ > - if (pte_present(pte)) \ > - pte_mknewprot(pte_mknewpage(pte)); \ > pte;}) > > static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) > diff --git a/arch/um/include/shared/os.h > b/arch/um/include/shared/os.h > index bf539fee7831..09f8201de5db 100644 > --- a/arch/um/include/shared/os.h > +++ b/arch/um/include/shared/os.h > @@ -279,8 +279,6 @@ int map(struct mm_id *mm_idp, unsigned long virt, > unsigned long len, int prot, int phys_fd, > unsigned long long offset); > int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long > len); > -int protect(struct mm_id *mm_idp, unsigned long addr, > - unsigned long len, unsigned int prot); > > /* skas/process.c */ > extern int is_skas_winch(int pid, int fd, void *data); > diff --git a/arch/um/include/shared/skas/stub-data.h > b/arch/um/include/shared/skas/stub-data.h > index 3fbdda727373..81a4cace032c 100644 > --- a/arch/um/include/shared/skas/stub-data.h > +++ b/arch/um/include/shared/skas/stub-data.h > @@ -30,7 +30,6 @@ enum stub_syscall_type { > STUB_SYSCALL_UNSET = 0, > STUB_SYSCALL_MMAP, > STUB_SYSCALL_MUNMAP, > - STUB_SYSCALL_MPROTECT, > }; > > struct stub_syscall { > diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c > index 5d52ffa682dc..796fc266d3bb 100644 > --- a/arch/um/kernel/skas/stub.c > +++ b/arch/um/kernel/skas/stub.c > @@ -35,16 +35,6 @@ static __always_inline int syscall_handler(struct > stub_data *d) > return -1; > } > break; > - case STUB_SYSCALL_MPROTECT: > - res = stub_syscall3(__NR_mprotect, > - sc->mem.addr, sc- > >mem.length, > - sc->mem.prot); > - if (res) { > - d->err = res; > - d->syscall_data_len = i; > - return -1; > - } > - break; > default: > d->err = -95; /* EOPNOTSUPP */ > d->syscall_data_len = i; > diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c > index 548af31d4111..23c1f550cd7c 100644 > --- a/arch/um/kernel/tlb.c > +++ b/arch/um/kernel/tlb.c > @@ -23,9 +23,6 @@ struct vm_ops { > int phys_fd, unsigned long long offset); > int (*unmap)(struct mm_id *mm_idp, > unsigned long virt, unsigned long len); > - int (*mprotect)(struct mm_id *mm_idp, > - unsigned long virt, unsigned long len, > - unsigned int prot); > }; > > static int kern_map(struct mm_id *mm_idp, > @@ -44,15 +41,6 @@ static int kern_unmap(struct mm_id *mm_idp, > return os_unmap_memory((void *)virt, len); > } > > -static int kern_mprotect(struct mm_id *mm_idp, > - unsigned long virt, unsigned long len, > - unsigned int prot) > -{ > - return os_protect_memory((void *)virt, len, > - prot & UM_PROT_READ, prot & > UM_PROT_WRITE, > - 1); > -} > - > void report_enomem(void) > { > printk(KERN_ERR "UML ran out of memory on the host side! " > @@ -65,33 +53,37 @@ static inline int update_pte_range(pmd_t *pmd, > unsigned long addr, > struct vm_ops *ops) > { > pte_t *pte; > - int r, w, x, prot, ret = 0; > + int ret = 0; > > pte = pte_offset_kernel(pmd, addr); > do { > - r = pte_read(*pte); > - w = pte_write(*pte); > - x = pte_exec(*pte); > - if (!pte_young(*pte)) { > - r = 0; > - w = 0; > - } else if (!pte_dirty(*pte)) > - w = 0; > - > - prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE > : 0) | > - (x ? UM_PROT_EXEC : 0)); > - if (pte_newpage(*pte)) { > - if (pte_present(*pte)) { > - __u64 offset; > - unsigned long phys = pte_val(*pte) & > PAGE_MASK; > - int fd = phys_mapping(phys, > &offset); > - > - ret = ops->mmap(ops->mm_idp, addr, > PAGE_SIZE, > - prot, fd, offset); > - } else > - ret = ops->unmap(ops->mm_idp, addr, > PAGE_SIZE); > - } else if (pte_newprot(*pte)) > - ret = ops->mprotect(ops->mm_idp, addr, > PAGE_SIZE, prot); > + if (!pte_newpage(*pte)) > + continue; > + > + if (pte_present(*pte)) { > + __u64 offset; > + unsigned long phys = pte_val(*pte) & > PAGE_MASK; > + int fd = phys_mapping(phys, &offset); > + int r, w, x, prot; > + > + r = pte_read(*pte); > + w = pte_write(*pte); > + x = pte_exec(*pte); > + if (!pte_young(*pte)) { > + r = 0; > + w = 0; > + } else if (!pte_dirty(*pte)) > + w = 0; > + > + prot = (r ? UM_PROT_READ : 0) | > + (w ? UM_PROT_WRITE : 0) | > + (x ? UM_PROT_EXEC : 0); > + > + ret = ops->mmap(ops->mm_idp, addr, > PAGE_SIZE, > + prot, fd, offset); > + } else > + ret = ops->unmap(ops->mm_idp, addr, > PAGE_SIZE); > + > *pte = pte_mkuptodate(*pte); > } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); > return ret; > @@ -180,11 +172,9 @@ int um_tlb_sync(struct mm_struct *mm) > if (mm == &init_mm) { > ops.mmap = kern_map; > ops.unmap = kern_unmap; > - ops.mprotect = kern_mprotect; > } else { > ops.mmap = map; > ops.unmap = unmap; > - ops.mprotect = protect; > } > > pgd = pgd_offset(mm, addr); > diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os- > Linux/skas/mem.c > index 9a13ac23c606..d7f1814b0e5a 100644 > --- a/arch/um/os-Linux/skas/mem.c > +++ b/arch/um/os-Linux/skas/mem.c > @@ -217,24 +217,3 @@ int unmap(struct mm_id *mm_idp, unsigned long > addr, unsigned long len) > > return 0; > } > - > -int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long > len, > - unsigned int prot) > -{ > - struct stub_syscall *sc; > - > - /* Compress with previous syscall if that is possible */ > - sc = syscall_stub_get_previous(mm_idp, > STUB_SYSCALL_MPROTECT, addr); > - if (sc && sc->mem.prot == prot) { > - sc->mem.length += len; > - return 0; > - } > - > - sc = syscall_stub_alloc(mm_idp); > - sc->syscall = STUB_SYSCALL_MPROTECT; > - sc->mem.addr = addr; > - sc->mem.length = len; > - sc->mem.prot = prot; > - > - return 0; > -}