[PATCH] android: binder: Disable preemption while holding the global binder lock
In Android systems, the display pipeline relies on low latency binder transactions and is therefore sensitive to delays caused by contention for the global binder lock. Jank is siginificantly reduced by disabling preemption while the global binder lock is held. Originally-from: Riley Andrews Signed-off-by: Todd Kjos --- drivers/android/binder.c | 194 +++ 1 file changed, 146 insertions(+), 48 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 16288e7..c36e420 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) struct files_struct *files = proc->files; unsigned long rlim_cur; unsigned long irqs; + int ret; if (files == NULL) return -ESRCH; @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); unlock_task_sighand(proc->tsk, &irqs); - return __alloc_fd(files, 0, rlim_cur, flags); + preempt_enable_no_resched(); + ret = __alloc_fd(files, 0, rlim_cur, flags); + preempt_disable(); + + return ret; } /* @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) static void task_fd_install( struct binder_proc *proc, unsigned int fd, struct file *file) { - if (proc->files) + if (proc->files) { + preempt_enable_no_resched(); __fd_install(proc->files, fd, file); + preempt_disable(); + } } /* @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag) { trace_binder_lock(tag); mutex_lock(&binder_main_lock); + preempt_disable(); trace_binder_locked(tag); } @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag) { trace_binder_unlock(tag); mutex_unlock(&binder_main_lock); + preempt_enable(); +} + +static inline void *kzalloc_nopreempt(size_t size) +{ + void *ptr; + + ptr = kzalloc(size, GFP_NOWAIT); + if (ptr) + return ptr; + + preempt_enable_no_resched(); + ptr = kzalloc(size, GFP_KERNEL); + preempt_disable(); + + return ptr; +} + +static inline long copy_to_user_nopreempt(void __user *to, + const void *from, long n) +{ + long ret; + + preempt_enable_no_resched(); + ret = copy_to_user(to, from, n); + preempt_disable(); + return ret; +} + +static inline long copy_from_user_nopreempt(void *to, +const void __user *from, +long n) +{ + long ret; + + preempt_enable_no_resched(); + ret = copy_from_user(to, from, n); + preempt_disable(); + return ret; } +#define get_user_nopreempt(x, ptr) \ +({ \ + int __ret; \ + preempt_enable_no_resched(); \ + __ret = get_user(x, ptr); \ + preempt_disable(); \ + __ret; \ +}) + +#define put_user_nopreempt(x, ptr) \ +({ \ + int __ret; \ + preempt_enable_no_resched(); \ + __ret = put_user(x, ptr); \ + preempt_disable(); \ + __ret; \ +}) + static void binder_set_nice(long nice) { long min_nice; @@ -568,6 +634,8 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, else mm = get_task_mm(proc->tsk); + preempt_enable_no_resched(); + if (mm) { down_write(&mm->mmap_sem); vma = proc->vma; @@ -622,6 +690,9 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, up_write(&mm->mmap_sem); mmput(mm); } + + preempt_disable(); + return 0; free_range: @@ -644,6 +715,9 @@ err_no_vma: up_write(&mm->mmap_sem); mmput(mm); } + + preempt_disable(); + return -ENOMEM; } @@ -903,7 +977,7 @@ static struct binder_node *binder_new_node(struct binder_proc *proc, return NULL; } - node = kzalloc(sizeof(*node), GFP_KERNEL); + node = kzalloc_nopreempt(sizeof(*node)); if (node == NULL) return NULL; binder_stats_created(BINDER_STAT_NODE); @@ -1040,7 +1114,7 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, else return ref; } - new_ref = kzalloc(sizeof(*ref), GFP_KERNEL); + new_ref = kzalloc_nopreempt(sizeof(*ref)); if (new_ref == NULL) return NULL; binder_stats_created(BINDER_STAT_REF); @@ -1438,14 +1512,14 @@ static void binder_transaction(struct binder_proc *proc, e->to_proc = target_proc->pid; /* TODO: reuse incoming transaction for reply */ - t = kzalloc(sizeof(*t), GFP_KERNEL); + t = kzalloc_nopreempt(sizeof(*t)); if (t == NULL) { return_error = BR_FAILED_REPLY; goto err_alloc_t_failed; } binder_stats_created(BINDER_STAT_TRANSACTION); - tcomplete = kzalloc(sizeof(*tcomplete), GFP_KERNEL); + tcomplete = kzalloc_nopreempt(sizeof(*tcomplete)); if (tcomplete == NULL) { return_error = BR_FAILED_REPLY; goto err_alloc_tcomplete_failed; @@ -1502,15 +1576,16 @@ static void binder_transaction(struct binder_proc *proc, offp = (binder_size_t *)(t->buffer->data + ALIGN(tr->data_size, sizeof(void *))); - if (copy_from_user(t->buffer->data, (const void __user *)(uintptr_t) - tr->data.ptr.buffer, tr-&g
Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
This was introduced in the 2015 Nexus devices and should have been submitted to the kernel then since we keep forward porting it to each new device. On Thu, Sep 8, 2016 at 9:12 AM, Todd Kjos wrote: > In Android systems, the display pipeline relies on low > latency binder transactions and is therefore sensitive to > delays caused by contention for the global binder lock. > Jank is siginificantly reduced by disabling preemption > while the global binder lock is held. > > Originally-from: Riley Andrews > Signed-off-by: Todd Kjos > --- > drivers/android/binder.c | 194 > +++ > 1 file changed, 146 insertions(+), 48 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index 16288e7..c36e420 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct > binder_proc *proc, int flags) > struct files_struct *files = proc->files; > unsigned long rlim_cur; > unsigned long irqs; > + int ret; > > if (files == NULL) > return -ESRCH; > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct > binder_proc *proc, int flags) > rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); > unlock_task_sighand(proc->tsk, &irqs); > > - return __alloc_fd(files, 0, rlim_cur, flags); > + preempt_enable_no_resched(); > + ret = __alloc_fd(files, 0, rlim_cur, flags); > + preempt_disable(); > + > + return ret; > } > > /* > @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct > binder_proc *proc, int flags) > static void task_fd_install( > struct binder_proc *proc, unsigned int fd, struct file *file) > { > - if (proc->files) > + if (proc->files) { > + preempt_enable_no_resched(); > __fd_install(proc->files, fd, file); > + preempt_disable(); > + } > } > > /* > @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag) > { > trace_binder_lock(tag); > mutex_lock(&binder_main_lock); > + preempt_disable(); > trace_binder_locked(tag); > } > > @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag) > { > trace_binder_unlock(tag); > mutex_unlock(&binder_main_lock); > + preempt_enable(); > +} > + > +static inline void *kzalloc_nopreempt(size_t size) > +{ > + void *ptr; > + > + ptr = kzalloc(size, GFP_NOWAIT); > + if (ptr) > + return ptr; > + > + preempt_enable_no_resched(); > + ptr = kzalloc(size, GFP_KERNEL); > + preempt_disable(); > + > + return ptr; > +} > + > +static inline long copy_to_user_nopreempt(void __user *to, > + const void *from, long n) > +{ > + long ret; > + > + preempt_enable_no_resched(); > + ret = copy_to_user(to, from, n); > + preempt_disable(); > + return ret; > +} > + > +static inline long copy_from_user_nopreempt(void *to, > +const void __user *from, > +long n) > +{ > + long ret; > + > + preempt_enable_no_resched(); > + ret = copy_from_user(to, from, n); > + preempt_disable(); > + return ret; > } > > +#define get_user_nopreempt(x, ptr) \ > +({ \ > + int __ret; \ > + preempt_enable_no_resched(); \ > + __ret = get_user(x, ptr); \ > + preempt_disable(); \ > + __ret; \ > +}) > + > +#define put_user_nopreempt(x, ptr) \ > +({ \ > + int __ret; \ > + preempt_enable_no_resched(); \ > + __ret = put_user(x, ptr); \ > + preempt_disable(); \ > + __ret; \ > +}) > + > static void binder_set_nice(long nice) > { > long min_nice; > @@ -568,6 +634,8 @@ static int binder_update_page_range(struct > binder_proc *proc, int allocate, > else > mm = get_task_mm(proc->tsk); > > + preempt_enable_no_resched(); > + > if (mm) { > down_write(&mm->mmap_sem); > vma = proc->vma; > @@ -622,6 +690,9 @@ static int binder_update_page_range(struct > binder_proc *proc, int allocate, > up_write(&mm->mmap_sem); > mmput(mm); > } > + > + preempt_disable(); > + > return 0; > > free_range: > @@ -644,6 +715,9 @@ err_no_vma: > up_write(&mm->mmap_sem); > mmput(mm); > } > + > + preempt_disable(); > + > return -ENOMEM; > } > > @@ -903,7 +977,7 @@ static struct binder_node *binder_new_node(struct > binder_proc *proc, > return NULL; > } > > - node = kzalloc(sizeof(*node), GFP_KERNEL); > + node = kzalloc_nopreempt(sizeof(*node)); > if (node == NULL) > return NULL; > binder_stats_created(BINDER_STAT_NODE); > @@ -1040,7 +1114,7 @@ static struct binder_ref > *binder_get_ref_for_node(struct binder_proc *proc, > else &g
[PATCH] binder: fix proc->files use-after-free
proc->files cleanup is initiated by binder_vma_close. Therefore a reference on the binder_proc is not enough to prevent the files_struct from being released while the binder_proc still has a reference. This can lead to an attempt to dereference the stale pointer obtained from proc->files prior to proc->files cleanup. This has been seen once in task_get_unused_fd_flags() when __alloc_fd() is called with a stale "files". The fix is to always use get_files_struct() to obtain struct_files so that the refcount on the files_struct is used to prevent a premature free. proc->files is removed since we get it every time. Signed-off-by: Todd Kjos --- drivers/android/binder.c | 63 +++- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index fddf76ef5bd6..794e58c14b15 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -458,9 +458,8 @@ struct binder_ref { }; enum binder_deferred_state { - BINDER_DEFERRED_PUT_FILES= 0x01, - BINDER_DEFERRED_FLUSH= 0x02, - BINDER_DEFERRED_RELEASE = 0x04, + BINDER_DEFERRED_FLUSH= 0x01, + BINDER_DEFERRED_RELEASE = 0x02, }; /** @@ -481,8 +480,6 @@ enum binder_deferred_state { *(invariant after initialized) * @tsk task_struct for group_leader of process *(invariant after initialized) - * @files files_struct for process - *(invariant after initialized) * @deferred_work_node: element for binder_deferred_list *(protected by binder_deferred_lock) * @deferred_work:bitmap of deferred work to perform @@ -529,7 +526,6 @@ struct binder_proc { struct list_head waiting_threads; int pid; struct task_struct *tsk; - struct files_struct *files; struct hlist_node deferred_work_node; int deferred_work; bool is_dead; @@ -875,22 +871,34 @@ static void binder_free_thread(struct binder_thread *thread); static void binder_free_proc(struct binder_proc *proc); static void binder_inc_node_tmpref_ilocked(struct binder_node *node); +struct files_struct *binder_get_files_struct(struct binder_proc *proc) +{ + return get_files_struct(proc->tsk); +} + static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) { - struct files_struct *files = proc->files; + struct files_struct *files; unsigned long rlim_cur; unsigned long irqs; + int ret; + files = binder_get_files_struct(proc); if (files == NULL) return -ESRCH; - if (!lock_task_sighand(proc->tsk, &irqs)) - return -EMFILE; + if (!lock_task_sighand(proc->tsk, &irqs)) { + ret = -EMFILE; + goto err; + } rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); unlock_task_sighand(proc->tsk, &irqs); - return __alloc_fd(files, 0, rlim_cur, flags); + ret = __alloc_fd(files, 0, rlim_cur, flags); +err: + put_files_struct(files); + return ret; } /* @@ -899,8 +907,12 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) static void task_fd_install( struct binder_proc *proc, unsigned int fd, struct file *file) { - if (proc->files) - __fd_install(proc->files, fd, file); + struct files_struct *files = binder_get_files_struct(proc); + + if (files) { + __fd_install(files, fd, file); + put_files_struct(files); + } } /* @@ -908,18 +920,20 @@ static void task_fd_install( */ static long task_close_fd(struct binder_proc *proc, unsigned int fd) { + struct files_struct *files = binder_get_files_struct(proc); int retval; - if (proc->files == NULL) + if (files == NULL) return -ESRCH; - retval = __close_fd(proc->files, fd); + retval = __close_fd(files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || retval == -ERESTARTNOINTR || retval == -ERESTARTNOHAND || retval == -ERESTART_RESTARTBLOCK)) retval = -EINTR; + put_files_struct(files); return retval; } @@ -4561,7 +4575,6 @@ static void binder_vma_close(struct vm_area_struct *vma) (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, (unsigned long)pgprot_val(vma->vm_page_prot)); binder_alloc_vma_close(&proc->alloc); - binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES); } static int binder_vm_fault(struct vm_fault *vmf) @@ -4603,10 +4616,8 @@ static int binder_mmap(struct fil
[RFC] vruntime updated incorrectly when rt_mutex boots prio?
This issue was discovered on a 4.9-based android device, but the relevant mainline code appears to be the same. The symptom is that over time the some workloads become sluggish resulting in missed frames or sluggishness. It appears to be the same issue described in http://lists.infradead.org/pipermail/linux-arm-kernel/2018-March/567836.html. Here is the scenario: A task is deactivated while still in the fair class. The task is then boosted to RT, so rt_mutex_setprio() is called. This changes the task to RT and calls check_class_changed(), which eventually calls detach_task_cfs_rq(), which is where vruntime_normalized() sees that the task's state is TASK_WAKING, which results in skipping the subtraction of the rq's min_vruntime from the task's vruntime. Later, when the prio is deboosted and the task is moved back to the fair class, the fair rq's min_vruntime is added to the task's vruntime, resulting in vruntime inflation. When investigating the problem, it was found that the change below fixes the problem by forcing vruntime_normalized() to return false if the sched_class is not CFS (though we're concerned that it might introduce other issues): diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 91f7b3322a15..267056f2e2ca 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -11125,7 +11125,7 @@ static inline bool vruntime_normalized(struct task_struct *p) * - A task which has been woken up by try_to_wake_up() and * waiting for actually being woken up by sched_ttwu_pending(). */ - if (!se->sum_exec_runtime || p->state == TASK_WAKING) + if (!se->sum_exec_runtime || (p->state == TASK_WAKING && p->sched_class == &fair_sched_class)) return true; return false; Do folks agree that this is incorrect behavior? Does this fix look appropriate and safe? Other ideas? -Todd
[PATCH] binder: use standard functions to allocate fds
Binder uses internal fs interfaces to allocate and install fds: __alloc_fd __fd_install __close_fd get_files_struct put_files_struct These were used to support the passing of fds between processes as part of a transaction. The actual allocation and installation of the fds in the target process was handled by the sending process so the standard functions, alloc_fd() and fd_install() which assume task==current couldn't be used. This patch refactors this mechanism so that the fds are allocated and installed by the target process allowing the standard functions to be used. The sender now creates a list of fd fixups that contains the struct *file and the address to fixup with the new fd once it is allocated. This list is processed by the target process when the transaction is dequeued. A new error case is introduced by this change. If an async transaction with file descriptors cannot allocate new fds in the target (probably due to out of file descriptors), the transaction is discarded with a log message. In the old implementation this would have been detected in the sender context and failed prior to sending. Signed-off-by: Todd Kjos --- drivers/android/Kconfig| 2 +- drivers/android/binder.c | 387 - drivers/android/binder_trace.h | 36 ++- 3 files changed, 260 insertions(+), 165 deletions(-) diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig index 432e9ad770703..51e8250d113fa 100644 --- a/drivers/android/Kconfig +++ b/drivers/android/Kconfig @@ -10,7 +10,7 @@ if ANDROID config ANDROID_BINDER_IPC bool "Android Binder IPC Driver" - depends on MMU + depends on MMU && !CPU_CACHE_VIVT default n ---help--- Binder is used in Android for both communication between processes, diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d58763b6b0090..50856319bc7da 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -71,6 +71,7 @@ #include #include #include +#include #include @@ -457,9 +458,8 @@ struct binder_ref { }; enum binder_deferred_state { - BINDER_DEFERRED_PUT_FILES= 0x01, - BINDER_DEFERRED_FLUSH= 0x02, - BINDER_DEFERRED_RELEASE = 0x04, + BINDER_DEFERRED_FLUSH= 0x01, + BINDER_DEFERRED_RELEASE = 0x02, }; /** @@ -480,9 +480,6 @@ enum binder_deferred_state { *(invariant after initialized) * @tsk task_struct for group_leader of process *(invariant after initialized) - * @files files_struct for process - *(protected by @files_lock) - * @files_lockmutex to protect @files * @deferred_work_node: element for binder_deferred_list *(protected by binder_deferred_lock) * @deferred_work:bitmap of deferred work to perform @@ -527,8 +524,6 @@ struct binder_proc { struct list_head waiting_threads; int pid; struct task_struct *tsk; - struct files_struct *files; - struct mutex files_lock; struct hlist_node deferred_work_node; int deferred_work; bool is_dead; @@ -611,6 +606,23 @@ struct binder_thread { bool is_dead; }; +/** + * struct binder_txn_fd_fixup - transaction fd fixup list element + * @fixup_entry: list entry + * @file: struct file to be associated with new fd + * @offset: offset in buffer data to this fixup + * + * List element for fd fixups in a transaction. Since file + * descriptors need to be allocated in the context of the + * target process, we pass each fd to be processed in this + * struct. + */ +struct binder_txn_fd_fixup { + struct list_head fixup_entry; + struct file *file; + size_t offset; +}; + struct binder_transaction { int debug_id; struct binder_work work; @@ -628,6 +640,7 @@ struct binder_transaction { longpriority; longsaved_priority; kuid_t sender_euid; + struct list_head fd_fixups; /** * @lock: protects @from, @to_proc, and @to_thread * @@ -920,66 +933,6 @@ static void binder_free_thread(struct binder_thread *thread); static void binder_free_proc(struct binder_proc *proc); static void binder_inc_node_tmpref_ilocked(struct binder_node *node); -static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) -{ - unsigned long rlim_cur; - unsigned long irqs; - int ret; - - mutex_lock(&proc->files_lock); - if (proc->files == NULL) { - ret = -ESRCH; - goto err; - } - if (!lock_task_sighand(proc->tsk, &irqs)) { - ret = -EMFILE; - goto err; - } - rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); - unlock_task_sighand(proc->tsk, &am
[PATCH v2] binder: use standard functions to allocate fds
Binder uses internal fs interfaces to allocate and install fds: __alloc_fd __fd_install __close_fd get_files_struct put_files_struct These were used to support the passing of fds between processes as part of a transaction. The actual allocation and installation of the fds in the target process was handled by the sending process so the standard functions, alloc_fd() and fd_install() which assume task==current couldn't be used. This patch refactors this mechanism so that the fds are allocated and installed by the target process allowing the standard functions to be used. The sender now creates a list of fd fixups that contains the struct *file and the address to fixup with the new fd once it is allocated. This list is processed by the target process when the transaction is dequeued. A new error case is introduced by this change. If an async transaction with file descriptors cannot allocate new fds in the target (probably due to out of file descriptors), the transaction is discarded with a log message. In the old implementation this would have been detected in the sender context and failed prior to sending. Signed-off-by: Todd Kjos --- v2: use "%zu" printk format for size_t drivers/android/Kconfig| 2 +- drivers/android/binder.c | 387 - drivers/android/binder_trace.h | 36 ++- 3 files changed, 260 insertions(+), 165 deletions(-) diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig index 432e9ad770703..51e8250d113fa 100644 --- a/drivers/android/Kconfig +++ b/drivers/android/Kconfig @@ -10,7 +10,7 @@ if ANDROID config ANDROID_BINDER_IPC bool "Android Binder IPC Driver" - depends on MMU + depends on MMU && !CPU_CACHE_VIVT default n ---help--- Binder is used in Android for both communication between processes, diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d58763b6b0090..50856319bc7da 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -71,6 +71,7 @@ #include #include #include +#include #include @@ -457,9 +458,8 @@ struct binder_ref { }; enum binder_deferred_state { - BINDER_DEFERRED_PUT_FILES= 0x01, - BINDER_DEFERRED_FLUSH= 0x02, - BINDER_DEFERRED_RELEASE = 0x04, + BINDER_DEFERRED_FLUSH= 0x01, + BINDER_DEFERRED_RELEASE = 0x02, }; /** @@ -480,9 +480,6 @@ enum binder_deferred_state { *(invariant after initialized) * @tsk task_struct for group_leader of process *(invariant after initialized) - * @files files_struct for process - *(protected by @files_lock) - * @files_lockmutex to protect @files * @deferred_work_node: element for binder_deferred_list *(protected by binder_deferred_lock) * @deferred_work:bitmap of deferred work to perform @@ -527,8 +524,6 @@ struct binder_proc { struct list_head waiting_threads; int pid; struct task_struct *tsk; - struct files_struct *files; - struct mutex files_lock; struct hlist_node deferred_work_node; int deferred_work; bool is_dead; @@ -611,6 +606,23 @@ struct binder_thread { bool is_dead; }; +/** + * struct binder_txn_fd_fixup - transaction fd fixup list element + * @fixup_entry: list entry + * @file: struct file to be associated with new fd + * @offset: offset in buffer data to this fixup + * + * List element for fd fixups in a transaction. Since file + * descriptors need to be allocated in the context of the + * target process, we pass each fd to be processed in this + * struct. + */ +struct binder_txn_fd_fixup { + struct list_head fixup_entry; + struct file *file; + size_t offset; +}; + struct binder_transaction { int debug_id; struct binder_work work; @@ -628,6 +640,7 @@ struct binder_transaction { longpriority; longsaved_priority; kuid_t sender_euid; + struct list_head fd_fixups; /** * @lock: protects @from, @to_proc, and @to_thread * @@ -920,66 +933,6 @@ static void binder_free_thread(struct binder_thread *thread); static void binder_free_proc(struct binder_proc *proc); static void binder_inc_node_tmpref_ilocked(struct binder_node *node); -static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) -{ - unsigned long rlim_cur; - unsigned long irqs; - int ret; - - mutex_lock(&proc->files_lock); - if (proc->files == NULL) { - ret = -ESRCH; - goto err; - } - if (!lock_task_sighand(proc->tsk, &irqs)) { - ret = -EMFILE; - goto err; - } - rlim_cur = task_rlimit(proc->tsk, RLIMIT_
[PATCH] binder: use standard functions to allocate fds
Binder uses internal fs interfaces to allocate and install fds: __alloc_fd __fd_install __close_fd get_files_struct put_files_struct These were used to support the passing of fds between processes as part of a transaction. The actual allocation and installation of the fds in the target process was handled by the sending process so the standard functions, alloc_fd() and fd_install() which assume task==current couldn't be used. This patch refactors this mechanism so that the fds are allocated and installed by the target process allowing the standard functions to be used. The sender now creates a list of fd fixups that contains the struct *file and the address to fixup with the new fd once it is allocated. This list is processed by the target process when the transaction is dequeued. A new error case is introduced by this change. If an async transaction with file descriptors cannot allocate new fds in the target (probably due to out of file descriptors), the transaction is discarded with a log message. In the old implementation this would have been detected in the sender context and failed prior to sending. Signed-off-by: Todd Kjos --- v2: use "%zu" printk format for size_t drivers/android/Kconfig| 2 +- drivers/android/binder.c | 387 - drivers/android/binder_trace.h | 36 ++- 3 files changed, 260 insertions(+), 165 deletions(-) diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig index 432e9ad770703..51e8250d113fa 100644 --- a/drivers/android/Kconfig +++ b/drivers/android/Kconfig @@ -10,7 +10,7 @@ if ANDROID config ANDROID_BINDER_IPC bool "Android Binder IPC Driver" - depends on MMU + depends on MMU && !CPU_CACHE_VIVT default n ---help--- Binder is used in Android for both communication between processes, diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d58763b6b0090..50856319bc7da 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -71,6 +71,7 @@ #include #include #include +#include #include @@ -457,9 +458,8 @@ struct binder_ref { }; enum binder_deferred_state { - BINDER_DEFERRED_PUT_FILES= 0x01, - BINDER_DEFERRED_FLUSH= 0x02, - BINDER_DEFERRED_RELEASE = 0x04, + BINDER_DEFERRED_FLUSH= 0x01, + BINDER_DEFERRED_RELEASE = 0x02, }; /** @@ -480,9 +480,6 @@ enum binder_deferred_state { *(invariant after initialized) * @tsk task_struct for group_leader of process *(invariant after initialized) - * @files files_struct for process - *(protected by @files_lock) - * @files_lockmutex to protect @files * @deferred_work_node: element for binder_deferred_list *(protected by binder_deferred_lock) * @deferred_work:bitmap of deferred work to perform @@ -527,8 +524,6 @@ struct binder_proc { struct list_head waiting_threads; int pid; struct task_struct *tsk; - struct files_struct *files; - struct mutex files_lock; struct hlist_node deferred_work_node; int deferred_work; bool is_dead; @@ -611,6 +606,23 @@ struct binder_thread { bool is_dead; }; +/** + * struct binder_txn_fd_fixup - transaction fd fixup list element + * @fixup_entry: list entry + * @file: struct file to be associated with new fd + * @offset: offset in buffer data to this fixup + * + * List element for fd fixups in a transaction. Since file + * descriptors need to be allocated in the context of the + * target process, we pass each fd to be processed in this + * struct. + */ +struct binder_txn_fd_fixup { + struct list_head fixup_entry; + struct file *file; + size_t offset; +}; + struct binder_transaction { int debug_id; struct binder_work work; @@ -628,6 +640,7 @@ struct binder_transaction { longpriority; longsaved_priority; kuid_t sender_euid; + struct list_head fd_fixups; /** * @lock: protects @from, @to_proc, and @to_thread * @@ -920,66 +933,6 @@ static void binder_free_thread(struct binder_thread *thread); static void binder_free_proc(struct binder_proc *proc); static void binder_inc_node_tmpref_ilocked(struct binder_node *node); -static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) -{ - unsigned long rlim_cur; - unsigned long irqs; - int ret; - - mutex_lock(&proc->files_lock); - if (proc->files == NULL) { - ret = -ESRCH; - goto err; - } - if (!lock_task_sighand(proc->tsk, &irqs)) { - ret = -EMFILE; - goto err; - } - rlim_cur = task_rlimit(proc->tsk, RLIMIT_
Re: [PATCH v2] binder: use standard functions to allocate fds
Sorry, forgot to bump the version. Ignore this one. On Tue, Aug 28, 2018 at 1:43 PM Todd Kjos wrote: > > Binder uses internal fs interfaces to allocate and install fds: > > __alloc_fd > __fd_install > __close_fd > get_files_struct > put_files_struct > > These were used to support the passing of fds between processes > as part of a transaction. The actual allocation and installation > of the fds in the target process was handled by the sending > process so the standard functions, alloc_fd() and fd_install() > which assume task==current couldn't be used. > > This patch refactors this mechanism so that the fds are > allocated and installed by the target process allowing the > standard functions to be used. > > The sender now creates a list of fd fixups that contains the > struct *file and the address to fixup with the new fd once > it is allocated. This list is processed by the target process > when the transaction is dequeued. > > A new error case is introduced by this change. If an async > transaction with file descriptors cannot allocate new > fds in the target (probably due to out of file descriptors), > the transaction is discarded with a log message. In the old > implementation this would have been detected in the sender > context and failed prior to sending. > > Signed-off-by: Todd Kjos > --- > v2: use "%zu" printk format for size_t > > drivers/android/Kconfig| 2 +- > drivers/android/binder.c | 387 - > drivers/android/binder_trace.h | 36 ++- > 3 files changed, 260 insertions(+), 165 deletions(-) > > diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig > index 432e9ad770703..51e8250d113fa 100644 > --- a/drivers/android/Kconfig > +++ b/drivers/android/Kconfig > @@ -10,7 +10,7 @@ if ANDROID > > config ANDROID_BINDER_IPC > bool "Android Binder IPC Driver" > - depends on MMU > + depends on MMU && !CPU_CACHE_VIVT > default n > ---help--- > Binder is used in Android for both communication between processes, > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index d58763b6b0090..50856319bc7da 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -71,6 +71,7 @@ > #include > #include > #include > +#include > > #include > > @@ -457,9 +458,8 @@ struct binder_ref { > }; > > enum binder_deferred_state { > - BINDER_DEFERRED_PUT_FILES= 0x01, > - BINDER_DEFERRED_FLUSH= 0x02, > - BINDER_DEFERRED_RELEASE = 0x04, > + BINDER_DEFERRED_FLUSH= 0x01, > + BINDER_DEFERRED_RELEASE = 0x02, > }; > > /** > @@ -480,9 +480,6 @@ enum binder_deferred_state { > *(invariant after initialized) > * @tsk task_struct for group_leader of process > *(invariant after initialized) > - * @files files_struct for process > - *(protected by @files_lock) > - * @files_lockmutex to protect @files > * @deferred_work_node: element for binder_deferred_list > *(protected by binder_deferred_lock) > * @deferred_work:bitmap of deferred work to perform > @@ -527,8 +524,6 @@ struct binder_proc { > struct list_head waiting_threads; > int pid; > struct task_struct *tsk; > - struct files_struct *files; > - struct mutex files_lock; > struct hlist_node deferred_work_node; > int deferred_work; > bool is_dead; > @@ -611,6 +606,23 @@ struct binder_thread { > bool is_dead; > }; > > +/** > + * struct binder_txn_fd_fixup - transaction fd fixup list element > + * @fixup_entry: list entry > + * @file: struct file to be associated with new fd > + * @offset: offset in buffer data to this fixup > + * > + * List element for fd fixups in a transaction. Since file > + * descriptors need to be allocated in the context of the > + * target process, we pass each fd to be processed in this > + * struct. > + */ > +struct binder_txn_fd_fixup { > + struct list_head fixup_entry; > + struct file *file; > + size_t offset; > +}; > + > struct binder_transaction { > int debug_id; > struct binder_work work; > @@ -628,6 +640,7 @@ struct binder_transaction { > longpriority; > longsaved_priority; > kuid_t sender_euid; > + struct list_head fd_fixups; > /** > * @lock: protects @from, @to_proc
[PATCH v2] binder: use standard functions to allocate fds
Binder uses internal fs interfaces to allocate and install fds: __alloc_fd __fd_install __close_fd get_files_struct put_files_struct These were used to support the passing of fds between processes as part of a transaction. The actual allocation and installation of the fds in the target process was handled by the sending process so the standard functions, alloc_fd() and fd_install() which assume task==current couldn't be used. This patch refactors this mechanism so that the fds are allocated and installed by the target process allowing the standard functions to be used. The sender now creates a list of fd fixups that contains the struct *file and the address to fixup with the new fd once it is allocated. This list is processed by the target process when the transaction is dequeued. A new error case is introduced by this change. If an async transaction with file descriptors cannot allocate new fds in the target (probably due to out of file descriptors), the transaction is discarded with a log message. In the old implementation this would have been detected in the sender context and failed prior to sending. Signed-off-by: Todd Kjos --- v2: use "%zu" printk format for size_t drivers/android/Kconfig| 2 +- drivers/android/binder.c | 387 - drivers/android/binder_trace.h | 36 ++- 3 files changed, 260 insertions(+), 165 deletions(-) diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig index 432e9ad770703..51e8250d113fa 100644 --- a/drivers/android/Kconfig +++ b/drivers/android/Kconfig @@ -10,7 +10,7 @@ if ANDROID config ANDROID_BINDER_IPC bool "Android Binder IPC Driver" - depends on MMU + depends on MMU && !CPU_CACHE_VIVT default n ---help--- Binder is used in Android for both communication between processes, diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d58763b6b0090..50856319bc7da 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -71,6 +71,7 @@ #include #include #include +#include #include @@ -457,9 +458,8 @@ struct binder_ref { }; enum binder_deferred_state { - BINDER_DEFERRED_PUT_FILES= 0x01, - BINDER_DEFERRED_FLUSH= 0x02, - BINDER_DEFERRED_RELEASE = 0x04, + BINDER_DEFERRED_FLUSH= 0x01, + BINDER_DEFERRED_RELEASE = 0x02, }; /** @@ -480,9 +480,6 @@ enum binder_deferred_state { *(invariant after initialized) * @tsk task_struct for group_leader of process *(invariant after initialized) - * @files files_struct for process - *(protected by @files_lock) - * @files_lockmutex to protect @files * @deferred_work_node: element for binder_deferred_list *(protected by binder_deferred_lock) * @deferred_work:bitmap of deferred work to perform @@ -527,8 +524,6 @@ struct binder_proc { struct list_head waiting_threads; int pid; struct task_struct *tsk; - struct files_struct *files; - struct mutex files_lock; struct hlist_node deferred_work_node; int deferred_work; bool is_dead; @@ -611,6 +606,23 @@ struct binder_thread { bool is_dead; }; +/** + * struct binder_txn_fd_fixup - transaction fd fixup list element + * @fixup_entry: list entry + * @file: struct file to be associated with new fd + * @offset: offset in buffer data to this fixup + * + * List element for fd fixups in a transaction. Since file + * descriptors need to be allocated in the context of the + * target process, we pass each fd to be processed in this + * struct. + */ +struct binder_txn_fd_fixup { + struct list_head fixup_entry; + struct file *file; + size_t offset; +}; + struct binder_transaction { int debug_id; struct binder_work work; @@ -628,6 +640,7 @@ struct binder_transaction { longpriority; longsaved_priority; kuid_t sender_euid; + struct list_head fd_fixups; /** * @lock: protects @from, @to_proc, and @to_thread * @@ -920,66 +933,6 @@ static void binder_free_thread(struct binder_thread *thread); static void binder_free_proc(struct binder_proc *proc); static void binder_inc_node_tmpref_ilocked(struct binder_node *node); -static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) -{ - unsigned long rlim_cur; - unsigned long irqs; - int ret; - - mutex_lock(&proc->files_lock); - if (proc->files == NULL) { - ret = -ESRCH; - goto err; - } - if (!lock_task_sighand(proc->tsk, &irqs)) { - ret = -EMFILE; - goto err; - } - rlim_cur = task_rlimit(proc->tsk, RLIMIT_
Re: [PATCH] binder: use standard functions to allocate fds
On Wed, Aug 29, 2018 at 12:00 AM Christoph Hellwig wrote: > > > config ANDROID_BINDER_IPC > > bool "Android Binder IPC Driver" > > - depends on MMU > > + depends on MMU && !CPU_CACHE_VIVT > > Thats is a purely arm specific symbol which should not be > used in common code. Nevermind that there generally should > be no good reason for it. It is true that the current design (10+ years old) has this flaw. VIVT cache support was the rationale for using the non-standard functions to allocate file descriptors from the sender context. There are no known cases of recent android releases running on a device with a VIVT cache architecture, but I did want to call this out. We're working on refactoring to eliminate this issue. > > > + fixup->offset = (uintptr_t)fdp - (uintptr_t)t->buffer->data; > > This looks completely broken. Why would you care at what exact > place the fd is placed? Oh, because you share an array with fds > with userspace, which is a hell of a bad idea, and then maninpulate > that buffer mapped to userspace from kernel threads. Well, android apps rely on this behavior (and have for 10+ years) so there is no way to avoid the need to manipulate the buffer from the kernel. We are working to refactor the driver to do this using standard mechanisms instead of relying on low-level internal functions. > > I think we just need to rm -rf drivers/android/binder*.c and be done > with it, as this piece of crap should never have been merged to start > with.
Re: [PATCH] binder: check for binder_thread allocation failure in binder_poll()
Looks good to me. On Tue, Jan 30, 2018 at 11:11 PM, Eric Biggers wrote: > From: Eric Biggers > > If the kzalloc() in binder_get_thread() fails, binder_poll() > dereferences the resulting NULL pointer. > > Fix it by returning POLLERR if the memory allocation failed. > > This bug was found by syzkaller using fault injection. > > Reported-by: syzbot > Fixes: 457b9a6f09f0 ("Staging: android: add binder driver") > Cc: sta...@vger.kernel.org > Signed-off-by: Eric Biggers > --- > drivers/android/binder.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index d21040c5d343f..326ca8ea9ebcf 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -4391,6 +4391,8 @@ static __poll_t binder_poll(struct file *filp, > bool wait_for_proc_work; > > thread = binder_get_thread(proc); > + if (!thread) > + return POLLERR; > > binder_inner_proc_lock(thread->proc); > thread->looper |= BINDER_LOOPER_STATE_POLL; > -- > 2.16.1 >
Re: [PATCH] binder: use lockless list for deferred_work
Vitaly, can you say more about the behavior you observed that led you to make this change? It is not obvious what workload would cause the contention on this mutex to make a difference (at least in an Android environment). On Mon, Jan 22, 2018 at 7:44 AM, Greg Kroah-Hartman wrote: > On Mon, Jan 08, 2018 at 02:55:18PM +0100, Vitaly Wool wrote: >> Binder uses hlist for deferred list, which isn't a good match. >> It's slow and requires mutual exclusion mechanism to protect its >> operations. Moreover, having schedule_work() called under a mutex >> may cause significant delays and creates noticeable adverse effect >> on Binder performance. >> >> Deferred list in Binder is actually treated in a very simple way: >> either add an entry to it or delete the first entry from it. llist >> (lockless list) is a good match for such usage pattern, and it is >> of course quite a bit faster and doesn't require locking. >> >> To be able to add an entry to an llist only if it's not already on >> another llist, this patch adds two small helper functions. That is, >> llist_add_exclusive would only add a node if it's not already on a >> list, and llist_del_first_exclusive will delete the first node off >> the list and mark it as not being on any list. >> >> Signed-off-by: Vitaly Vul >> --- >> drivers/android/binder.c | 87 >> >> 1 file changed, 66 insertions(+), 21 deletions(-) > > Martijn and Todd, any objections to this patch? > > thanks, > > greg k-h
Re: [PATCH v3] android: binder: use VM_ALLOC to get vm area
On Mon, Jan 22, 2018 at 7:54 AM, Greg KH wrote: > On Wed, Jan 10, 2018 at 10:49:05AM +0800, Ganesh Mahendran wrote: >> VM_IOREMAP is used to access hardware through a mechanism called >> I/O mapped memory. Android binder is a IPC machanism which will >> not access I/O memory. >> >> And VM_IOREMAP has alignment requiement which may not needed in >> binder. >> __get_vm_area_node() >> { >> ... >> if (flags & VM_IOREMAP) >> align = 1ul << clamp_t(int, fls_long(size), >>PAGE_SHIFT, IOREMAP_MAX_ORDER); >> ... >> } >> >> This patch will save some kernel vm area, especially for 32bit os. >> >> In 32bit OS, kernel vm area is only 240MB. We may got below >> error when launching a app: >> >> <3>[ 4482.440053] binder_alloc: binder_alloc_mmap_handler: 15728 >> 8ce67000-8cf65000 get_vm_area failed -12 >> <3>[ 4483.218817] binder_alloc: binder_alloc_mmap_handler: 15745 >> 8ce67000-8cf65000 get_vm_area failed -12 >> >> Signed-off-by: Ganesh Mahendran >> >> V3: update comments >> V2: update comments >> --- >> drivers/android/binder_alloc.c | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) > > Martijn and Todd, any objections to this patch? Looks fine to me. Arve, do you remember the rationale for using VM_IOREMAP? > > thanks, > > greg k-h
Re: [PATCH V4] binder: ipc namespace support for android binder
On Thu, Nov 15, 2018 at 2:54 PM gre...@linuxfoundation.org wrote: ... > > A number of us have talked about this in the plumbers Android track, and > a different proposal for how to solve this has been made that should be > much more resiliant. So I will drop this patch from my queue and wait > for the patches based on the discussions we had there. > > I think there's some notes/slides on the discussion online somewhere, > but it hasn't been published as the conference is still happening, > otherwise I would link to it here... Here is a link to the session where you can look at the slides [1]. There was consensus that "binderfs" (slide 5) was the most promising -- but would be behind a config option to provide backwards compatibility for non-container use-cases. The etherpad notes are at [2] (look at "Dynamically Allocated Binder Devices" section) Christian Brauner will be sending out more details. -Todd [1] https://linuxplumbersconf.org/event/2/contributions/222/ [2] https://etherpad.openstack.org/p/Android > > thanks, > > greg k-h
Re: [PATCH V4] binder: ipc namespace support for android binder
+christ...@brauner.io +Martijn Coenen Christian, Does this patch work for your container use-cases? If not, please comment on this thread. Let's discuss at LPC this week. -Todd On Mon, Nov 12, 2018 at 1:38 AM chouryzhou(周威) wrote: > > Currently android's binder is not isolated by ipc namespace. Since binder > is a form of IPC and therefore should be tied to ipc namespace. With this > patch, we can run multiple instances of android container on one host. > > This patch move "binder_procs" and "binder_context" into ipc_namespace, > driver will find the context from it when opening. For debugfs, binder_proc > is namespace-aware, but not for binder dead nodes, binder_stats and > binder_transaction_log_entry (we added ipc inum to trace it). > > Signed-off-by: chouryzhou > Reviewed-by: Davidlohr Bueso > --- > drivers/android/binder.c | 133 > -- > include/linux/ipc_namespace.h | 15 + > ipc/namespace.c | 10 +++- > 3 files changed, 125 insertions(+), 33 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index cb30a524d16d..453265505b04 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -67,6 +67,8 @@ > #include > #include > #include > +#include > +#include > #include > #include > #include > @@ -80,13 +82,21 @@ > #include "binder_alloc.h" > #include "binder_trace.h" > > + > +#ifndef CONFIG_IPC_NS > +static struct ipc_namespace binder_ipc_ns = { > + .ns.inum = PROC_IPC_INIT_INO, > +}; > + > +#define ipcns (&binder_ipc_ns) > +#else > +#define ipcns (current->nsproxy->ipc_ns) > +#endif > + > static HLIST_HEAD(binder_deferred_list); > static DEFINE_MUTEX(binder_deferred_lock); > > static HLIST_HEAD(binder_devices); > -static HLIST_HEAD(binder_procs); > -static DEFINE_MUTEX(binder_procs_lock); > - > static HLIST_HEAD(binder_dead_nodes); > static DEFINE_SPINLOCK(binder_dead_nodes_lock); > > @@ -233,6 +243,7 @@ struct binder_transaction_log_entry { > uint32_t return_error; > uint32_t return_error_param; > const char *context_name; > + unsigned int ipc_inum; > }; > struct binder_transaction_log { > atomic_t cur; > @@ -263,19 +274,66 @@ static struct binder_transaction_log_entry > *binder_transaction_log_add( > } > > struct binder_context { > + struct hlist_node hlist; > struct binder_node *binder_context_mgr_node; > struct mutex context_mgr_node_lock; > > kuid_t binder_context_mgr_uid; > + int device; > const char *name; > }; > > struct binder_device { > struct hlist_node hlist; > struct miscdevice miscdev; > - struct binder_context context; > }; > > +void binder_exit_ns(struct ipc_namespace *ns) > +{ > + struct binder_context *context; > + struct hlist_node *tmp; > + > + mutex_destroy(&ns->binder_procs_lock); > + hlist_for_each_entry_safe(context, tmp, &ns->binder_contexts, hlist) { > + mutex_destroy(&context->context_mgr_node_lock); > + hlist_del(&context->hlist); > + kfree(context); > + } > +} > + > +int binder_init_ns(struct ipc_namespace *ns) > +{ > + int ret; > + struct binder_device *device; > + > + mutex_init(&ns->binder_procs_lock); > + INIT_HLIST_HEAD(&ns->binder_procs); > + INIT_HLIST_HEAD(&ns->binder_contexts); > + > + hlist_for_each_entry(device, &binder_devices, hlist) { > + struct binder_context *context; > + > + context = kzalloc(sizeof(*context), GFP_KERNEL); > + if (!context) { > + ret = -ENOMEM; > + goto err; > + } > + > + context->device = device->miscdev.minor; > + context->name = device->miscdev.name; > + context->binder_context_mgr_uid = INVALID_UID; > + mutex_init(&context->context_mgr_node_lock); > + > + hlist_add_head(&context->hlist, &ns->binder_contexts); > + } > + > + return 0; > +err: > + binder_exit_ns(ns); > + return ret; > +} > + > + > /** > * struct binder_work - work enqueued on a worklist > * @entry: node enqueued on list > @@ -2728,6 +2786,7 @@ static void binder_transaction(struct binder_proc *proc, > e->data_size = tr->data_size; > e->offsets_size = tr->offsets_size; > e->context_name = proc->context->name; > + e->ipc_inum = ipcns->ns.inum; > > if (reply) { > binder_inner_proc_lock(proc); > @@ -4922,6 +4981,7 @@ static int binder_open(struct inode *nodp, struct file > *filp) > { > struct binder_proc *proc; > struct binder_device *binder_dev; > + struct binder_context *context; > > binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__, > current->group_leader->pid, current->pid); > @@
Re: [PATCH V4] binder: ipc namespace support for android binder
On Tue, Nov 13, 2018 at 12:12 AM chouryzhou(周威) wrote: > > > I have not received an answer to my questions in the last version of this > > patch > > set. Also it would be good if I could be Cc'ed by default. I can't hunt > > down all > > patches. > > I do not know of any kernel entity, specifically devices, that change > > namespaces > > on open(). > > This seems like an invitation for all kinds of security bugs. > > A device node belongs to one namespace only which is attached to the > > underlying kobject. Opening the device should never change that. > > Please look at how mqueue or shm are doing this. They don't change > > namespaces on open either. > > I have to say that is one of the main reasons why I disagree with that > > design. > > > > > > If we must return the same context when every open in proc, we can only > isolate > binder with mnt namespace instead of ipc namespace, what do you think, Todd? I don't have strong feelings on this -- it seems like a bizarre use-case to send the fd through a backchannel as christian describes, but I do agree it is strange behavior (though it seems safe to me since it prevents communication between unrelated entities). I don't know how mqueue and shm work, its worth a look since this patch is modelling their behavior... We'll talk about it here at LPC and then on this thread.
[PATCH v2] binder: fix use-after-free due to fdget() optimization
44d8047f1d87a ("binder: use standard functions to allocate fds") exposed a pre-existing issue in the binder driver. fdget() is used in ksys_ioctl() as a performance optimization. One of the rules associated with fdget() is that ksys_close() must not be called between the fdget() and the fdput(). There is a case where this requirement is not met in the binder driver (and possibly other drivers) which results in the reference count dropping to 0 when the device is still in use. This can result in use-after-free or other issues. This was observed with the following sequence of events: Task A and task B are connected via binder; task A has /dev/binder open at file descriptor number X. Both tasks are single-threaded. 1. task B sends a binder message with a file descriptor array (BINDER_TYPE_FDA) containing one file descriptor to task A 2. task A reads the binder message with the translated file descriptor number Y 3. task A uses dup2(X, Y) to overwrite file descriptor Y with the /dev/binder file 4. task A unmaps the userspace binder memory mapping; the reference count on task A's /dev/binder is now 2 5. task A closes file descriptor X; the reference count on task A's /dev/binder is now 1 6. task A forks off a child, task C, duplicating the file descriptor table; the reference count on task A's /dev/binder is now 2 7. task A invokes the BC_FREE_BUFFER command on file descriptor X to release the incoming binder message 8. fdget() in ksys_ioctl() suppresses the reference count increment, since the file descriptor table is not shared 9. the BC_FREE_BUFFER handler removes the file descriptor table entry for X and decrements the reference count of task A's /dev/binder file to 1 10.task C calls close(X), which drops the reference count of task A's /dev/binder to 0 and frees it 11.task A continues processing of the ioctl and accesses some property of e.g. the binder_proc => KASAN-detectable UAF Fixed by using get_file() / fput() in binder_ioctl(). Fixes: 44d8047f1d87a ("binder: use standard functions to allocate fds") Suggested-by: Jann Horn Signed-off-by: Todd Kjos Acked-by: Martijn Coenen --- v2: added "Fixes:" tag Should be added to 4.20-final if possible drivers/android/binder.c | 8 1 file changed, 8 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9f1000d2a40c7..d6979cf7b2dad 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4733,6 +4733,13 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) unsigned int size = _IOC_SIZE(cmd); void __user *ubuf = (void __user *)arg; + /* +* Need a reference on filp since ksys_close() could +* be called on binder fd and the fdget() used in +* ksys_ioctl() might have optimized out the reference. +*/ + get_file(filp); + /*pr_info("binder_ioctl: %d:%d %x %lx\n", proc->pid, current->pid, cmd, arg);*/ @@ -4844,6 +4851,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) pr_info("%d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret); err_unlocked: trace_binder_ioctl_done(ret); + fput(filp); return ret; } -- 2.20.0.rc1.387.gf8505762e3-goog
[PATCH v2 1/3] binder: fix sparse warnings on locking context
Add __acquire()/__release() annnotations to fix warnings in sparse context checking There is one case where the warning was due to a lack of a "default:" case in a switch statement where a lock was being released in each of the cases, so the default case was added. Signed-off-by: Todd Kjos --- v2: no change, just resubmitted as #1 of 3 patches instead of by itself drivers/android/binder.c | 43 +- drivers/android/binder_alloc.c | 1 + 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d6979cf7b2dad..172c207fbf99d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -660,6 +660,7 @@ struct binder_transaction { #define binder_proc_lock(proc) _binder_proc_lock(proc, __LINE__) static void _binder_proc_lock(struct binder_proc *proc, int line) + __acquires(&proc->outer_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -675,6 +676,7 @@ _binder_proc_lock(struct binder_proc *proc, int line) #define binder_proc_unlock(_proc) _binder_proc_unlock(_proc, __LINE__) static void _binder_proc_unlock(struct binder_proc *proc, int line) + __releases(&proc->outer_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -690,6 +692,7 @@ _binder_proc_unlock(struct binder_proc *proc, int line) #define binder_inner_proc_lock(proc) _binder_inner_proc_lock(proc, __LINE__) static void _binder_inner_proc_lock(struct binder_proc *proc, int line) + __acquires(&proc->inner_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -705,6 +708,7 @@ _binder_inner_proc_lock(struct binder_proc *proc, int line) #define binder_inner_proc_unlock(proc) _binder_inner_proc_unlock(proc, __LINE__) static void _binder_inner_proc_unlock(struct binder_proc *proc, int line) + __releases(&proc->inner_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -720,6 +724,7 @@ _binder_inner_proc_unlock(struct binder_proc *proc, int line) #define binder_node_lock(node) _binder_node_lock(node, __LINE__) static void _binder_node_lock(struct binder_node *node, int line) + __acquires(&node->lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -735,6 +740,7 @@ _binder_node_lock(struct binder_node *node, int line) #define binder_node_unlock(node) _binder_node_unlock(node, __LINE__) static void _binder_node_unlock(struct binder_node *node, int line) + __releases(&node->lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -751,12 +757,16 @@ _binder_node_unlock(struct binder_node *node, int line) #define binder_node_inner_lock(node) _binder_node_inner_lock(node, __LINE__) static void _binder_node_inner_lock(struct binder_node *node, int line) + __acquires(&node->lock) __acquires(&node->proc->inner_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); spin_lock(&node->lock); if (node->proc) binder_inner_proc_lock(node->proc); + else + /* annotation for sparse */ + __acquire(&node->proc->inner_lock); } /** @@ -768,6 +778,7 @@ _binder_node_inner_lock(struct binder_node *node, int line) #define binder_node_inner_unlock(node) _binder_node_inner_unlock(node, __LINE__) static void _binder_node_inner_unlock(struct binder_node *node, int line) + __releases(&node->lock) __releases(&node->proc->inner_lock) { struct binder_proc *proc = node->proc; @@ -775,6 +786,9 @@ _binder_node_inner_unlock(struct binder_node *node, int line) "%s: line=%d\n", __func__, line); if (proc) binder_inner_proc_unlock(proc); + else + /* annotation for sparse */ + __release(&node->proc->inner_lock); spin_unlock(&node->lock); } @@ -1384,10 +1398,14 @@ static void binder_dec_node_tmpref(struct binder_node *node) binder_node_inner_lock(node); if (!node->proc) spin_lock(&binder_dead_nodes_lock); + else + __acquire(&binder_dead_nodes_lock); node->tmp_refs--; BUG_ON(node->tmp_refs < 0); if (!node->proc) spin_unlock(&binder_dead_nodes_lock); + else + __release(&binder_dead_nodes_lock); /* * Call binder_dec_node() to check if all refcounts are 0 * and clea
[PATCH 2/3] binder: fix kerneldoc header for struct binder_buffer
Fix the incomplete kerneldoc header for struct binder_buffer. Signed-off-by: Todd Kjos --- v2: no code change. Removed needless "Change-Id:" There is no dependancy on patch 1/3 drivers/android/binder_alloc.h | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index fb3238c74c8a8..c0aadbbf7f193 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -30,16 +30,16 @@ struct binder_transaction; * struct binder_buffer - buffer used for binder transactions * @entry: entry alloc->buffers * @rb_node:node for allocated_buffers/free_buffers rb trees - * @free: true if buffer is free - * @allow_user_free:describe the second member of struct blah, - * @async_transaction: describe the second member of struct blah, - * @debug_id: describe the second member of struct blah, - * @transaction:describe the second member of struct blah, - * @target_node:describe the second member of struct blah, - * @data_size: describe the second member of struct blah, - * @offsets_size: describe the second member of struct blah, - * @extra_buffers_size: describe the second member of struct blah, - * @data:i describe the second member of struct blah, + * @free: %true if buffer is free + * @allow_user_free:%true if user is allowed to free buffer + * @async_transaction: %true if buffer is in use for an async txn + * @debug_id: unique ID for debugging + * @transaction:pointer to associated struct binder_transaction + * @target_node:struct binder_node associated with this buffer + * @data_size: size of @transaction data + * @offsets_size: size of array of offsets + * @extra_buffers_size: size of space for other objects (like sg lists) + * @data: pointer to base of buffer space * * Bookkeeping structure for binder transaction buffers */ -- 2.20.0.rc1.387.gf8505762e3-goog
[PATCH 3/3] binder: filter out nodes when showing binder procs
When dumping out binder transactions via a debug node, the output is too verbose if a process has many nodes. Change the output for transaction dumps to only display nodes with pending async transactions. Signed-off-by: Todd Kjos --- v2: no change, just resubmitted as #3 of 3 patches instead of by itself There is no actual dependancy on patches 1 and 2 of the series drivers/android/binder.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 172c207fbf99d..cbaef3b0d3078 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5440,6 +5440,9 @@ static void print_binder_proc(struct seq_file *m, for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) { struct binder_node *node = rb_entry(n, struct binder_node, rb_node); + if (!print_all && !node->has_async_transaction) + continue; + /* * take a temporary reference on the node so it * survives and isn't removed from the tree -- 2.20.0.rc1.387.gf8505762e3-goog
Re: [PATCH v2] binder: fix use-after-free due to fdget() optimization
On Wed, Dec 5, 2018 at 2:00 PM Al Viro wrote: > > On Wed, Dec 05, 2018 at 01:16:01PM -0800, Todd Kjos wrote: > > 44d8047f1d87a ("binder: use standard functions to allocate fds") > > exposed a pre-existing issue in the binder driver. > > > > fdget() is used in ksys_ioctl() as a performance optimization. > > One of the rules associated with fdget() is that ksys_close() must > > not be called between the fdget() and the fdput(). There is a case > > where this requirement is not met in the binder driver (and possibly > > other drivers) which results in the reference count dropping to 0 > > when the device is still in use. This can result in use-after-free > > or other issues. > > > > This was observed with the following sequence of events: > > > > Task A and task B are connected via binder; task A has /dev/binder open at > > file descriptor number X. Both tasks are single-threaded. > > > > 1. task B sends a binder message with a file descriptor array > >(BINDER_TYPE_FDA) containing one file descriptor to task A > > 2. task A reads the binder message with the translated file > >descriptor number Y > > 3. task A uses dup2(X, Y) to overwrite file descriptor Y with > >the /dev/binder file > > 4. task A unmaps the userspace binder memory mapping; the reference > >count on task A's /dev/binder is now 2 > > 5. task A closes file descriptor X; the reference count on task > >A's /dev/binder is now 1 > > 6. task A forks off a child, task C, duplicating the file descriptor > >table; the reference count on task A's /dev/binder is now 2 > > 7. task A invokes the BC_FREE_BUFFER command on file descriptor X > >to release the incoming binder message > > 8. fdget() in ksys_ioctl() suppresses the reference count increment, > >since the file descriptor table is not shared > > 9. the BC_FREE_BUFFER handler removes the file descriptor table > >entry for X and decrements the reference count of task A's > >/dev/binder file to 1 > > 10.task C calls close(X), which drops the reference count of > >task A's /dev/binder to 0 and frees it > > 11.task A continues processing of the ioctl and accesses some > >property of e.g. the binder_proc => KASAN-detectable UAF > > > > Fixed by using get_file() / fput() in binder_ioctl(). > > Note that this patch does *not* remove the nasty trap caused by the garbage > in question - struct file can be freed before we even return from > ->unlocked_ioctl(). Could you describe in details the desired behaviour > of this interface? The ioctl(BC_FREE_BUFFER) frees the buffer memory associated with a transaction that has completed processing in userspace. If the buffer contains an FDA object (file-descriptor array), then it closes all of the fds passed in the transaction using ksys_close(). In the case with the issue, the fd associated with the binder driver has been passed in the array. Since the fdget() optimization didn't increment the reference, this makes us vulnerable to the UAF described above since the rules for fdget() are being violated (ksys_close()). This change did prevent the final close during the handling of BC_FREE_BUFFER, but as you point out, may still result in the final close being processed prematurely after the new fput() (no observed negative side-effects right now, but agreed this could be an issue). > > How about grabbing the references to all victims (*before* screwing with > ksys_close()), sticking them into a structure with embedded callback_head > and using task_work_add() on it, the callback doing those fput()? > > The callback would trigger before the return to userland, so observable > timing of the final close wouldn't be changed. And it would avoid the > kludges like this. I'll rework it according to your suggestion. I had hoped to do this in a way that doesn't require adding calls to non-exported functions since we are trying to clean up binder (I hear you snickering) to be a better citizen and not rely on internal functions that drivers shouldn't be using. I presume there are no plans to export task_work_add()... > > Of course, the proper fix would require TARDIS and set of instruments for > treating severe case of retrocranial inversion, so that this "ABI" would've > never existed, but... There are indeed many things about the binder interface we'd do differently if we had the chance to start over... -Todd
Re: [PATCH v2] binder: fix use-after-free due to fdget() optimization
On Wed, Dec 5, 2018 at 4:40 PM Al Viro wrote: > > On Wed, Dec 05, 2018 at 04:21:55PM -0800, Todd Kjos wrote: > > > > How about grabbing the references to all victims (*before* screwing with > > > ksys_close()), sticking them into a structure with embedded callback_head > > > and using task_work_add() on it, the callback doing those fput()? > > > > > > The callback would trigger before the return to userland, so observable > > > timing of the final close wouldn't be changed. And it would avoid the > > > kludges like this. > > > > I'll rework it according to your suggestion. I had hoped to do this in a way > > that doesn't require adding calls to non-exported functions since we are > > trying to clean up binder (I hear you snickering) to be a better citizen and > > not rely on internal functions that drivers shouldn't be using. I presume > > there are no plans to export task_work_add()... > > Er... Your variant critically depends upon binder being non-modular; if it > *was* built as a module, you could > * lose the timeslice just after your fput() > * have another process hit the final fput() *and* close the struct > file > * now that module refcount is not pinned by anything, get rmmod remove > your module > * have the process in binder_ioctl() regain the timeslice and find the > code under it gone. > > That's one of the reasons why such kludges are brittle as hell - normally you > are guaranteed that once fdget() has succeeded, the final fput() won't happen > until fdput(). With everything that guarantees in terms of code/data not > going > away under you. This patch relies upon the lack of accesses to anything > sensitive after that fput() added into binder_ioctl(). Which is actually > true, but only because the driver is not modular... > > At least this variant (task_work_add()-based) doesn't depend on anything > subtle - the lack of exports is the only problem there (IOW, it would've > worked in a module if not for that). Thanks for the detailed responses. I'll rework it for v3.
Re: [PATCH v2 1/3] binder: fix sparse warnings on locking context
On Thu, Dec 6, 2018 at 6:51 AM Greg KH wrote: > > On Wed, Dec 05, 2018 at 03:19:24PM -0800, Todd Kjos wrote: > > Add __acquire()/__release() annnotations to fix warnings > > in sparse context checking > > > > There is one case where the warning was due to a lack of > > a "default:" case in a switch statement where a lock was > > being released in each of the cases, so the default > > case was added. > > > > Signed-off-by: Todd Kjos > > --- > > v2: no change, just resubmitted as #1 of 3 patches instead of by itself > > I've already applied this one, right? No, not yet. I confused you by failing to add the "v2" in the subject for the other two submitted with it that you applied this morning: binder: filter out nodes when showing binder procs binder: fix kerneldoc header for struct binder_buffer Sorry about the confusion. This one still needs to be applied. -Todd > > thanks, > > greg k-h
Re: [PATCH v2 1/3] binder: fix sparse warnings on locking context
On Thu, Dec 6, 2018 at 11:08 PM Greg Kroah-Hartman wrote: ... > But I thought I applied this back on November 26: > > https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git/commit/?h=char-misc-testing&id=324fa64cf4189094bc4df744a9e7214a1b81d845 > > You should have gotten an email from my scripts at that time. > > or is this somehow a different patch with the same commit log? > Or a v2 that changed from the one that I had applied? Do I need to > revert this one and apply your new one? Sorry, you are right, you already applied it. I was thrown off by a subsequent mail (https://lkml.org/lkml/2018/12/5/1042) in which you said "Ok, no need for numbering them, but they do need to be resent based on the feedback I gave. I've dropped them all from my queue because of that.". So I thought it had been dropped. Anyway, glad its there and no need for further action on it. -Todd
Re: [PATCH] binder: remove BINDER_DEBUG_ENTRY()
On Fri, Nov 30, 2018 at 5:26 PM Yangtao Li wrote: > > We already have the DEFINE_SHOW_ATTRIBUTE.There is no need to define > such a macro,so remove BINDER_DEBUG_ENTRY. > > Signed-off-by: Yangtao Li Acked-by: Todd Kjos > --- > drivers/android/binder.c | 48 ++-- > 1 file changed, 17 insertions(+), 31 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index cb30a524d16d..5496b8e07234 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -94,22 +94,8 @@ static struct dentry *binder_debugfs_dir_entry_root; > static struct dentry *binder_debugfs_dir_entry_proc; > static atomic_t binder_last_id; > > -#define BINDER_DEBUG_ENTRY(name) \ > -static int binder_##name##_open(struct inode *inode, struct file *file) \ > -{ \ > - return single_open(file, binder_##name##_show, inode->i_private); \ > -} \ > -\ > -static const struct file_operations binder_##name##_fops = { \ > - .owner = THIS_MODULE, \ > - .open = binder_##name##_open, \ > - .read = seq_read, \ > - .llseek = seq_lseek, \ > - .release = single_release, \ > -} > - > -static int binder_proc_show(struct seq_file *m, void *unused); > -BINDER_DEBUG_ENTRY(proc); > +static int proc_show(struct seq_file *m, void *unused); > +DEFINE_SHOW_ATTRIBUTE(proc); > > /* This is only defined in include/asm-arm/sizes.h */ > #ifndef SZ_1K > @@ -4964,7 +4950,7 @@ static int binder_open(struct inode *nodp, struct file > *filp) > proc->debugfs_entry = debugfs_create_file(strbuf, 0444, > binder_debugfs_dir_entry_proc, > (void *)(unsigned long)proc->pid, > - &binder_proc_fops); > + &proc_fops); > } > > return 0; > @@ -5592,7 +5578,7 @@ static void print_binder_proc_stats(struct seq_file *m, > } > > > -static int binder_state_show(struct seq_file *m, void *unused) > +static int state_show(struct seq_file *m, void *unused) > { > struct binder_proc *proc; > struct binder_node *node; > @@ -5631,7 +5617,7 @@ static int binder_state_show(struct seq_file *m, void > *unused) > return 0; > } > > -static int binder_stats_show(struct seq_file *m, void *unused) > +static int stats_show(struct seq_file *m, void *unused) > { > struct binder_proc *proc; > > @@ -5647,7 +5633,7 @@ static int binder_stats_show(struct seq_file *m, void > *unused) > return 0; > } > > -static int binder_transactions_show(struct seq_file *m, void *unused) > +static int transactions_show(struct seq_file *m, void *unused) > { > struct binder_proc *proc; > > @@ -5660,7 +5646,7 @@ static int binder_transactions_show(struct seq_file *m, > void *unused) > return 0; > } > > -static int binder_proc_show(struct seq_file *m, void *unused) > +static int proc_show(struct seq_file *m, void *unused) > { > struct binder_proc *itr; > int pid = (unsigned long)m->private; > @@ -5703,7 +5689,7 @@ static void print_binder_transaction_log_entry(struct > seq_file *m, > "\n" : " (incomplete)\n"); > } > > -static int binder_transaction_log_show(struct seq_file *m, void *unused) > +static int transaction_log_show(struct seq_file *m, void *unused) > { > struct binder_transaction_log *log = m->private; > unsigned int log_cur = atomic_read(&log->cur); > @@ -5735,10 +5721,10 @@ static const struct file_operations binder_fops = { > .release = binder_release, > }; > > -BINDER_DEBUG_ENTRY(state); > -BINDER_DEBUG_ENTRY(stats); > -BINDER_DEBUG_ENTRY(transactions); > -BINDER_DEBUG_ENTRY(transaction_log); > +DEFINE_SHOW_ATTRIBUTE(state); > +DEFINE_SHOW_ATTRIBUTE(stats); > +DEFINE_SHOW_ATTRIBUTE(transactions); > +DEFINE_SHOW_ATTRIBUTE(transaction_log); > > static int __init init_binder_device(const char *name) > { > @@ -5792,27 +5778,27 @@ static int __init binder_init(void) > 0444, > binder_debugfs_dir_entry_root, > NULL, > - &binder_state_fops); > + &state_fops); > debugfs_create_file("stats", > 0444, > binder_debugfs_dir_entry_root, > NULL, > - &binder_stats_fops); &
[PATCH] binder: fix kerneldoc header for struct binder_buffer
Fix the incomplete kerneldoc header for struct binder_buffer. Change-Id: If3ca10cf6d90f605a0c078e4cdce28f02a475877 Signed-off-by: Todd Kjos --- drivers/android/binder_alloc.h | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index fb3238c74c8a8..c0aadbbf7f193 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -30,16 +30,16 @@ struct binder_transaction; * struct binder_buffer - buffer used for binder transactions * @entry: entry alloc->buffers * @rb_node:node for allocated_buffers/free_buffers rb trees - * @free: true if buffer is free - * @allow_user_free:describe the second member of struct blah, - * @async_transaction: describe the second member of struct blah, - * @debug_id: describe the second member of struct blah, - * @transaction:describe the second member of struct blah, - * @target_node:describe the second member of struct blah, - * @data_size: describe the second member of struct blah, - * @offsets_size: describe the second member of struct blah, - * @extra_buffers_size: describe the second member of struct blah, - * @data:i describe the second member of struct blah, + * @free: %true if buffer is free + * @allow_user_free:%true if user is allowed to free buffer + * @async_transaction: %true if buffer is in use for an async txn + * @debug_id: unique ID for debugging + * @transaction:pointer to associated struct binder_transaction + * @target_node:struct binder_node associated with this buffer + * @data_size: size of @transaction data + * @offsets_size: size of array of offsets + * @extra_buffers_size: size of space for other objects (like sg lists) + * @data: pointer to base of buffer space * * Bookkeeping structure for binder transaction buffers */ -- 2.20.0.rc1.387.gf8505762e3-goog
[PATCH] binder: fix sparse warnings on locking context
Add __acquire()/__release() annnotations to fix warnings in sparse context checking There is one case where the warning was due to a lack of a "default:" case in a switch statement where a lock was being released in each of the cases, so the default case was added. Signed-off-by: Todd Kjos --- drivers/android/binder.c | 43 +- drivers/android/binder_alloc.c | 1 + 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9f1000d2a40c7..9f2059d24ae2d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -660,6 +660,7 @@ struct binder_transaction { #define binder_proc_lock(proc) _binder_proc_lock(proc, __LINE__) static void _binder_proc_lock(struct binder_proc *proc, int line) + __acquires(&proc->outer_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -675,6 +676,7 @@ _binder_proc_lock(struct binder_proc *proc, int line) #define binder_proc_unlock(_proc) _binder_proc_unlock(_proc, __LINE__) static void _binder_proc_unlock(struct binder_proc *proc, int line) + __releases(&proc->outer_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -690,6 +692,7 @@ _binder_proc_unlock(struct binder_proc *proc, int line) #define binder_inner_proc_lock(proc) _binder_inner_proc_lock(proc, __LINE__) static void _binder_inner_proc_lock(struct binder_proc *proc, int line) + __acquires(&proc->inner_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -705,6 +708,7 @@ _binder_inner_proc_lock(struct binder_proc *proc, int line) #define binder_inner_proc_unlock(proc) _binder_inner_proc_unlock(proc, __LINE__) static void _binder_inner_proc_unlock(struct binder_proc *proc, int line) + __releases(&proc->inner_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -720,6 +724,7 @@ _binder_inner_proc_unlock(struct binder_proc *proc, int line) #define binder_node_lock(node) _binder_node_lock(node, __LINE__) static void _binder_node_lock(struct binder_node *node, int line) + __acquires(&node->lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -735,6 +740,7 @@ _binder_node_lock(struct binder_node *node, int line) #define binder_node_unlock(node) _binder_node_unlock(node, __LINE__) static void _binder_node_unlock(struct binder_node *node, int line) + __releases(&node->lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -751,12 +757,16 @@ _binder_node_unlock(struct binder_node *node, int line) #define binder_node_inner_lock(node) _binder_node_inner_lock(node, __LINE__) static void _binder_node_inner_lock(struct binder_node *node, int line) + __acquires(&node->lock) __acquires(&node->proc->inner_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); spin_lock(&node->lock); if (node->proc) binder_inner_proc_lock(node->proc); + else + /* annotation for sparse */ + __acquire(&node->proc->inner_lock); } /** @@ -768,6 +778,7 @@ _binder_node_inner_lock(struct binder_node *node, int line) #define binder_node_inner_unlock(node) _binder_node_inner_unlock(node, __LINE__) static void _binder_node_inner_unlock(struct binder_node *node, int line) + __releases(&node->lock) __releases(&node->proc->inner_lock) { struct binder_proc *proc = node->proc; @@ -775,6 +786,9 @@ _binder_node_inner_unlock(struct binder_node *node, int line) "%s: line=%d\n", __func__, line); if (proc) binder_inner_proc_unlock(proc); + else + /* annotation for sparse */ + __release(&node->proc->inner_lock); spin_unlock(&node->lock); } @@ -1384,10 +1398,14 @@ static void binder_dec_node_tmpref(struct binder_node *node) binder_node_inner_lock(node); if (!node->proc) spin_lock(&binder_dead_nodes_lock); + else + __acquire(&binder_dead_nodes_lock); node->tmp_refs--; BUG_ON(node->tmp_refs < 0); if (!node->proc) spin_unlock(&binder_dead_nodes_lock); + else + __release(&binder_dead_nodes_lock); /* * Call binder_dec_node() to check if all refcounts are 0 * and cleanup is needed. Calling with strong=0 and internal=1 @@ -1890,18 +1908,22 @@ static str
[PATCH] binder: filter out nodes when showing binder procs
When dumping out binder transactions via a debug node, the output is too verbose if a process has many nodes. Change the output for transaction dumps to only display nodes with pending async transactions. Signed-off-by: Todd Kjos --- drivers/android/binder.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9f2059d24ae2d..c525b164d39d2 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5432,6 +5432,9 @@ static void print_binder_proc(struct seq_file *m, for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) { struct binder_node *node = rb_entry(n, struct binder_node, rb_node); + if (!print_all && !node->has_async_transaction) + continue; + /* * take a temporary reference on the node so it * survives and isn't removed from the tree -- 2.20.0.rc1.387.gf8505762e3-goog
[PATCH] binder: fix use-after-free due to fdget() optimization
44d8047f1d87a ("binder: use standard functions to allocate fds") exposed a pre-existing issue in the binder driver. fdget() is used in ksys_ioctl() as a performance optimization. One of the rules associated with fdget() is that ksys_close() must not be called between the fdget() and the fdput(). There is a case where this requirement is not met in the binder driver (and possibly other drivers) which results in the reference count dropping to 0 when the device is still in use. This can result in use-after-free or other issues. This was observed with the following sequence of events: Task A and task B are connected via binder; task A has /dev/binder open at file descriptor number X. Both tasks are single-threaded. 1. task B sends a binder message with a file descriptor array (BINDER_TYPE_FDA) containing one file descriptor to task A 2. task A reads the binder message with the translated file descriptor number Y 3. task A uses dup2(X, Y) to overwrite file descriptor Y with the /dev/binder file 4. task A unmaps the userspace binder memory mapping; the reference count on task A's /dev/binder is now 2 5. task A closes file descriptor X; the reference count on task A's /dev/binder is now 1 6. task A forks off a child, task C, duplicating the file descriptor table; the reference count on task A's /dev/binder is now 2 7. task A invokes the BC_FREE_BUFFER command on file descriptor X to release the incoming binder message 8. fdget() in ksys_ioctl() suppresses the reference count increment, since the file descriptor table is not shared 9. the BC_FREE_BUFFER handler removes the file descriptor table entry for X and decrements the reference count of task A's /dev/binder file to 1 10.task C calls close(X), which drops the reference count of task A's /dev/binder to 0 and frees it 11.task A continues processing of the ioctl and accesses some property of e.g. the binder_proc => KASAN-detectable UAF Fixed by using get_file() / fput() in binder_ioctl(). Suggested-by: Jann Horn Signed-off-by: Todd Kjos Acked-by: Martijn Coenen --- drivers/android/binder.c | 8 1 file changed, 8 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index c525b164d39d2..cbaef3b0d3078 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4774,6 +4774,13 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) unsigned int size = _IOC_SIZE(cmd); void __user *ubuf = (void __user *)arg; + /* +* Need a reference on filp since ksys_close() could +* be called on binder fd and the fdget() used in +* ksys_ioctl() might have optimized out the reference. +*/ + get_file(filp); + /*pr_info("binder_ioctl: %d:%d %x %lx\n", proc->pid, current->pid, cmd, arg);*/ @@ -4885,6 +4892,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) pr_info("%d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret); err_unlocked: trace_binder_ioctl_done(ret); + fput(filp); return ret; } -- 2.20.0.rc1.387.gf8505762e3-goog
Re: [PATCH] binder: fix race that allows malicious free of live buffer
On Fri, Nov 9, 2018 at 4:32 AM Greg KH wrote: > > On Tue, Nov 06, 2018 at 03:55:32PM -0800, Todd Kjos wrote: > > Malicious code can attempt to free buffers using the > > BC_FREE_BUFFER ioctl to binder. There are protections > > against a user freeing a buffer while in use by the > > kernel, however there was a window where BC_FREE_BUFFER > > could be used to free a recently allocated buffer that > > was not completely initialized. This resulted in a > > use-after-free detected by KASAN with a malicious > > test program. > > > > This window is closed by setting the buffer's > > allow_user_free attribute to 0 when the buffer > > is allocated or when the user has previously > > freed it instead of waiting for the caller > > to set it. The problem was that when the struct > > buffer was recycled, allow_user_free was stale > > and set to 1 allowing a free to go through. > > > > Signed-off-by: Todd Kjos > > Acked-by: Arve Hjønnevåg > > No "stable" tag here? Any idea how far back the stable backporting > should go, if any? Sorry about that. It should be backported to 4.14 and later. > > thanks, > > greg k-h
Re: [PATCH V3] binder: ipc namespace support for android binder
On Thu, Nov 8, 2018 at 5:02 AM chouryzhou(周威) wrote: > > We are working for running android in container, but we found that binder is > not isolated by ipc namespace. Since binder is a form of IPC and therefore > should > be tied to ipc namespace. With this patch, we can run more than one android > container on one host. > This patch move "binder_procs" and "binder_context" into ipc_namespace, > driver will find the context from it when opening. Although statistics in > debugfs > remain global. > > Signed-off-by: chouryzhou > --- > drivers/android/binder.c | 128 > +++--- > include/linux/ipc_namespace.h | 15 + > ipc/namespace.c | 10 +++- > 3 files changed, 117 insertions(+), 36 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index cb30a524d16d..22e45bb937e6 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -68,6 +68,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -80,13 +81,18 @@ > #include "binder_alloc.h" > #include "binder_trace.h" > > + > +#if !defined(CONFIG_SYSVIPC) && !defined(CONFIG_POSIX_MQUEUE) I still don't understand the dependencies on SYSVIPC or POSIX_MQUEUE. It seems like this mechanism would work even if both are disabled -- as long as IPC_NS is enabled. Seems cleaner to change init/Kconfig and allow IPC_NS if CONFIG_ANDROID_BINDER_IPC and change this line to "#ifndef CONFIG_IPC_NS" > +struct ipc_namespace init_ipc_ns; > +#define ipcns (&init_ipc_ns) > +#else > +#define ipcns (current->nsproxy->ipc_ns) > +#endif > + > static HLIST_HEAD(binder_deferred_list); > static DEFINE_MUTEX(binder_deferred_lock); > > static HLIST_HEAD(binder_devices); > -static HLIST_HEAD(binder_procs); > -static DEFINE_MUTEX(binder_procs_lock); > - > static HLIST_HEAD(binder_dead_nodes); > static DEFINE_SPINLOCK(binder_dead_nodes_lock); > > @@ -232,7 +238,7 @@ struct binder_transaction_log_entry { > int return_error_line; > uint32_t return_error; > uint32_t return_error_param; > - const char *context_name; > + int context_device; > }; > struct binder_transaction_log { > atomic_t cur; > @@ -263,19 +269,64 @@ static struct binder_transaction_log_entry > *binder_transaction_log_add( > } > > struct binder_context { > + struct hlist_node hlist; > struct binder_node *binder_context_mgr_node; > struct mutex context_mgr_node_lock; > > kuid_t binder_context_mgr_uid; > - const char *name; > + intdevice; > }; > > struct binder_device { > struct hlist_node hlist; > struct miscdevice miscdev; > - struct binder_context context; > }; > > +void binder_exit_ns(struct ipc_namespace *ns) > +{ > + struct binder_context *context; > + struct hlist_node *tmp; > + > + mutex_destroy(&ns->binder_procs_lock); > + hlist_for_each_entry_safe(context, tmp, &ns->binder_contexts, hlist) { > + mutex_destroy(&context->context_mgr_node_lock); > + hlist_del(&context->hlist); > + kfree(context); > + } > +} > + > +int binder_init_ns(struct ipc_namespace *ns) > +{ > + int ret; > + struct binder_device *device; > + > + mutex_init(&ns->binder_procs_lock); > + INIT_HLIST_HEAD(&ns->binder_procs); > + INIT_HLIST_HEAD(&ns->binder_contexts); > + > + hlist_for_each_entry(device, &binder_devices, hlist) { > + struct binder_context *context; > + > + context = kzalloc(sizeof(*context), GFP_KERNEL); > + if (!context) { > + ret = -ENOMEM; > + goto err; > + } > + > + context->device = device->miscdev.minor; > + context->binder_context_mgr_uid = INVALID_UID; > + mutex_init(&context->context_mgr_node_lock); > + > + hlist_add_head(&context->hlist, &ns->binder_contexts); > + } > + > + return 0; > +err: > + binder_exit_ns(ns); > + return ret; > +} > + > + > /** > * struct binder_work - work enqueued on a worklist > * @entry: node enqueued on list > @@ -2727,7 +2778,7 @@ static void binder_transaction(struct binder_proc *proc, > e->target_handle = tr->target.handle; > e->data_size = tr->data_size; > e->offsets_size = tr->offsets_size; > - e->context_name = proc->context->name; > + e->context_device = proc->context->device; why eliminate name? The string name is very useful for differentiating normal "framework" binder transactions vs "hal" or "vendor" transactions. If we just have a device number it will be hard to tell in the logs even which namespace it belongs to. We need to keep both the "name" (for which there might be multiple in each ns) and some indication of which namespace this is. Maybe we assign some sort of name
Re: [PATCH V3] binder: ipc namespace support for android binder
On Fri, Nov 9, 2018 at 10:27 AM Davidlohr Bueso wrote: > > On Thu, 08 Nov 2018, chouryzhou(??) wrote: > > >+#ifdef CONFIG_ANDROID_BINDER_IPC > >+ /* next fields are for binder */ > >+ struct mutex binder_procs_lock; > >+ struct hlist_head binder_procs; > >+ struct hlist_head binder_contexts; > >+#endif > > Now, I took a look at how the binder_procs list is used; and no, what > follows isn't really related to this patch, but a general observation. > > I think that a mutex is also an overkill and you might wanna replace it > with a spinlock/rwlock. Can anything block while holding the > binder_procs_lock? > I don't see anything... you mainly need it for consulting the hlist calling > print_binder_proc[_stat]() - which will take the proc->inner_lock anyway, so > no blocking there. print_binder_proc() drops proc->inner_lock and calls binder_alloc_print_allocated() which acquires proc->alloc->mutex. Likewise, print_binder_stats() calls print_binder_proc_stats() which drops its locks to call binder_alloc_print_pages() which also acquires proc->alloc->mutex. So binder_procs_lock needs to be a mutex since it can block on proc->alloc->mutex. > Also, if this is perhaps because of long hold times, dunno, > the rb_first_cached primitive might reduce some of it, although I don't know > how big the rbtrees in binder can get and if it matters at all. > > Anyway, that said and along with addressing Todd's comments, the ipc/ bits > look > good. Feel free to add my: > > Reviewed-by: Davidlohr Bueso > > Thanks, > Davidlohr
Re: Re: [PATCH V3] binder: ipc namespace support for android binder(Internet mail)
On Fri, Nov 9, 2018 at 8:43 PM chouryzhou(周威) wrote: > > If IPC_NS is disabled, "current-nsporxy->ipc_ns" will also exists, it will > be a static > reference of "init_ipc_ns" (in ipc/msgutil.c, not defined in binder.c by me) > with > no namespace-ization. You will get the same one in all processes, everything > is > the same as without this patch. except, as far as I can tell, binder_init_ns() would never have been called on it so the mutex and list heads are not initialized so its completely broken. Am I missing something? How do those fields get initialized in this case? > > - choury - >
Re: Re: [PATCH V3] binder: ipc namespace support for android binder
On Fri, Nov 9, 2018 at 7:09 PM chouryzhou(周威) wrote: > > > > > I still don't understand the dependencies on SYSVIPC or POSIX_MQUEUE. > > It seems like this mechanism would work even if both are disabled -- > > as long as IPC_NS is enabled. Seems cleaner to change init/Kconfig and > > allow IPC_NS if CONFIG_ANDROID_BINDER_IPC and change this line to > > "#ifndef CONFIG_IPC_NS" > > Let me explain it in detail. If SYSIPC and IPC_NS are both defined, > current->nsproxy->ipc_ns will save the ipc namespace variables. We just use > it. If SYSIPC (or POSIX_MQUEUE) is defined while IPC_NS is not set, > current->nsproxy->ipc_ns will always refer to init_ipc_ns in ipc/msgutil.c, > which is also fine to us. But if neither SYSIPC nor POSIX_MQUEUE is set > (IPC_NS can't be set in this situation), there is no current->nsproxy->ipc_ns. > We make a fack init_ipc_ns here and use it. Yes, I can read the code. I'm wondering specifically about SYSVIPC and POSIX_MQUEUE. Even with your code changes, binder has no dependency on these configs. Why are you creating one? The actual dependency with your changes is on "current->nsproxy->ipc_ns" being initialized for binder -- which is dependent on CONFIG_IPC_NS being enabled, isn't it? If SYSVIPC or POSIX_MQUEUE are enabled, but IPC_NS is disabled, does this work? > > > why eliminate name? The string name is very useful for differentiating > > normal "framework" binder transactions vs "hal" or "vendor" > > transactions. If we just have a device number it will be hard to tell > > in the logs even which namespace it belongs to. We need to keep both > > the "name" (for which there might be multiple in each ns) and some > > indication of which namespace this is. Maybe we assign some sort of > > namespace ID during binder_init_ns(). > > I will remain the name of device. The inum of ipc_ns can be treated as > namespace ID in ipc_ns. > > > As mentioned above, we need to retain name and probably also want a ns > > id of some sort. So context now has 3 components if IPC_NS, so maybe a > > helper function to print context like: > > > > static void binder_seq_print_context(struct seq_file *m, struct > > binder_context *context) > > { > > #ifdef CONFIG_IPC_NS > > seq_printf(m, "%d-%d-%s", context->ns_id, context->device, > > context->name); > > #else > > seq_printf(m, "%d", context->name); > > #endif > > } > > > > (same comment below everywhere context is printed) > > > > Should these debugfs nodes should be ns aware and only print debugging > > info for the context of the thread accessing the node? If so, we would > > also want to be namespace-aware when printing pids. > > Nowadays, debugfs is not namespace-ized, and pid namespace is not associated > with ipc namespace. Will it be more complicated to debug this if we just > print > the info for current thread? Because we will have to enter the ipc namespace > firstly. But add ipc inum should be no problem. With the name and ns id, debugging would be fine from the host-side. I don't understand the container use cases enough to know if you need to be able to debug binder transaction failures from within the container -- in which case it seems like you'd want the container-version of the PIDs -- but obviously this depends on how the containers are set up and what the use-cases really are. I'm ok with leaving that for a later patch. > > - choury - > >
Re: Re: [PATCH V3] binder: ipc namespace support for android binder(Internet mail)
On Fri, Nov 9, 2018 at 9:43 PM chouryzhou(周威) wrote: > > > > > > > If IPC_NS is disabled, "current-nsporxy->ipc_ns" will also exists, it > > > will be a static > > > reference of "init_ipc_ns" (in ipc/msgutil.c, not defined in binder.c by > > > me) with > > > no namespace-ization. You will get the same one in all processes, > > > everything is > > > the same as without this patch. > > > > except, as far as I can tell, binder_init_ns() would never have been > > called on it so the mutex and list heads are not initialized so its > > completely broken. Am I missing something? How do those fields get > > initialized in this case? > > > @@ -5832,8 +5888,12 @@ static int __init binder_init(void) > > goto err_init_binder_device_failed; > > } > > > > - return ret; > > + ret = binder_init_ns(&init_ipc_ns); > > + if (ret) > > + goto err_init_namespace_failed; > > > > + return ret; > > They are initialized here. Ok, This init_ipc_ns is a global declared in msgutil.c if SYSVIPC || POSIX_MQUEUE. This seems kinda hacky, but now I finally see why the dependancy... msgutil.c is the only file we can count on if !IPC_NS && (SYSVIPC || POSIX_MQUEUE). There must be a cleaner way to do this, I really don't like this dependency... wouldn't it be cleaner to do: #ifndef CONFIG_IPC_NS static struct ipc_namespace binder_ipc_ns; #define ipcns (&binder_ipc_ns) #else #define ipcns (current->nsproxy->ipc_ns) #endif (and make the initialization of binder_ipc_ns conditional on IPC_NS) This gets us the same thing without the incestuous dependency on the msgutil.c version of init_ipc_ns...and then binder doesn't rely on SYSVIPC or POSIX_MQUEUE directly. > > - choury - >
Re: [PATCH v1] binder: implement binderfs
> > > /* Create a new binder device in a binderfs mount */ > > > sudo mkdir /dev/binderfs > > > sudo mount -t binder binder /dev/binderfs > > > > > > #define _GNU_SOURCE > > > #include > > > #include > > > #include > > > #include > > > #include > > > #include > > > #include > > > #include > > > #include > > > #include > > > > > > int main(int argc, char *argv[]) > > > { > > > int fd, ret, saved_errno; > > > struct binderfs_device device = { 0 }; > > > > > > if (argc < 2) > > > exit(EXIT_FAILURE); > > > > > > strncpy(device.name, argv[1], sizeof(device.name)); > > > > > > fd = open("/dev/binderfs/binder-control", O_RDONLY | O_CLOEXEC); > > > if (fd < 0) { > > > printf("%s - Failed to open binder-control device\n", > > > strerror(errno)); > > > exit(EXIT_FAILURE); > > > } > > > > > > ret = ioctl(fd, BINDER_CTL_ADD, &device); > > > saved_errno = errno; > > > close(fd); > > > errno = saved_errno; > > > if (ret < 0) { > > > printf("%s - Failed to allocate new binder device\n", > > > strerror(errno)); > > > exit(EXIT_FAILURE); > > > } > > > > > > printf("Allocated new binder device with major %d, minor %d, and > > > " > > > "name %s\n", device.major, device.minor, > > > device.name); > > > > > > exit(EXIT_SUCCESS); > > > } > > > > > > /* Demo */ > > > A demo of how binderfs works can be found under [2]. > > > > > > [1]: https://goo.gl/JL2tfX > > > > > > Cc: Martijn Coenen > > > Cc: Todd Kjos > > > Cc: Greg Kroah-Hartman > > > Signed-off-by: Christian Brauner > > Do we plan to bring this into mergeable shape before Christmas? I'm > happy to do it. :) It looks fine to me and I tested it on an android device. Acked-by: Todd Kjos > > Christian > > > > --- > > > v1: > > > - simplify init_binderfs() > > > Move the creation of binder-control into binderfs_fill_super() so that > > > we can > > > cleanly and without any complex error handling deallocate the super > > > block on > > > failure. > > > - switch from __u32 to __u8 in struct binderfs_device > > > __u8 is the correct value to cross the kernel <-> userspace boundary. > > > - introduce BINDERFS_MAX_NAME > > > This determines the maximum length of a binderfs binder device name. > > > - add name member struct binderfs_device > > > This lets userspace specify a name for the binder device. The maximum > > > length > > > is determined by BINDERFS_MAX_NAME. > > > - handle naming collisions > > > Since userspace now gives us a name to use for the device we need to > > > handle > > > the case where userspace passes the same device name twice. This is > > > done by > > > using d_lookup() (takes rename lock too). If a matching dentry under > > > the root > > > dentry of the superblock is found we test whether it is still active > > > and if > > > so return -EEXIST. > > > - remove per-super block idr tracking and locking > > > Since userspace now determines the name remove the idr tracking since > > > it's > > > not needed anymore. > > > - remove ctl_mutex from struct binders_info > > > It was only needed to protect the per-super block idr which has been > > > removed. > > > If I'm not mistaken we currently do not need to lock the ioctl() itself > > > since > > > removing is handled by a simple unlink(). So remove the mutex. > > > - ensure that binderfs_evict_inode() doesn't cause double-frees on iput() > > > When setting up the inode fails at a step where a new inode already has > > > been > > > allocated we need to set inode->i_private to NULL to ensure that > > > binderfs_evict_inode() doesn't try to free up stuff that we already > > > freed > > > while handling the error. > > > ---
[PATCH] binder: fix use-after-free due to ksys_close() during fdget()
44d8047f1d8 ("binder: use standard functions to allocate fds") exposed a pre-existing issue in the binder driver. fdget() is used in ksys_ioctl() as a performance optimization. One of the rules associated with fdget() is that ksys_close() must not be called between the fdget() and the fdput(). There is a case where this requirement is not met in the binder driver which results in the reference count dropping to 0 when the device is still in use. This can result in use-after-free or other issues. If userpace has passed a file-descriptor for the binder driver using a BINDER_TYPE_FDA object, then kys_close() is called on it when handling a binder_ioctl(BC_FREE_BUFFER) command. This violates the assumptions for using fdget(). The problem is fixed by deferring the fd close using task_work_add() so ksys_close() is called after returning from binder_ioctl(). Fixes: 44d8047f1d87a ("binder: use standard functions to allocate fds") Suggested-by: Al Viro Signed-off-by: Todd Kjos --- drivers/android/binder.c | 91 +++- 1 file changed, 81 insertions(+), 10 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index c525b164d39d2..8f77d6af31209 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -72,6 +72,7 @@ #include #include #include +#include #include @@ -560,6 +561,9 @@ enum { *(protected by @proc->inner_lock) * @waiting_thread_node: element for @proc->waiting_threads list *(protected by @proc->inner_lock) + * @deferred_close_fd_cb: callback_head for task work + * @deferred_close_fds: list of fds to be closed + *(only accessed by this thread) * @pid: PID for this thread *(invariant after initialization) * @looper: bitmap of looping state @@ -592,6 +596,8 @@ struct binder_thread { struct binder_proc *proc; struct rb_node rb_node; struct list_head waiting_thread_node; + struct callback_head deferred_close_fd_cb; + struct hlist_head deferred_close_fds; int pid; int looper; /* only modified by this thread */ bool looper_need_return; /* can be written by other thread */ @@ -2184,7 +2190,64 @@ static bool binder_validate_fixup(struct binder_buffer *b, return (fixup_offset >= last_min_offset); } +struct binder_task_work { + struct hlist_node entry; + int fd; +}; + +/** + * binder_do_fd_close() - close list of file descriptors + * @twork: callback head for task work + * + * It is not safe to call ksys_close() during the binder_ioctl() + * function if there is a chance that binder's own file descriptor + * might be closed. This is to meet the requirements for using + * fdget() (see comments for __fget_light()). Therefore use + * task_add_work() to schedule the close operation once we have + * returned from binder_ioctl(). This function is a callback + * for that mechanism and does the actual ksys_close() on the list + * of file descriptors. + */ +static void binder_do_fd_close(struct callback_head *twork) +{ + struct binder_thread *thread = container_of(twork, + struct binder_thread, deferred_close_fd_cb); + struct hlist_node *entry, *tmp; + + hlist_for_each_safe(entry, tmp, &thread->deferred_close_fds) { + struct binder_task_work *work = container_of(entry, + struct binder_task_work, entry); + + ksys_close(work->fd); + hlist_del(&work->entry); + kfree(work); + } +} + +/** + * binder_deferred_fd_close() - schedule a close for the given file-descriptor + * @thread:struct binder_thread for this task + * @fd:file-descriptor to close + * + * See comments in binder_do_fd_close(). This function is used to schedule + * a file-descriptor to be closed after returning from binder_ioctl(). + */ +static void binder_deferred_fd_close(struct binder_thread *thread, int fd) +{ + struct binder_task_work *work; + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return; + work->fd = fd; + + if (hlist_empty(&thread->deferred_close_fds)) + task_work_add(current, &thread->deferred_close_fd_cb, true); + hlist_add_head(&work->entry, &thread->deferred_close_fds); +} + static void binder_transaction_buffer_release(struct binder_proc *proc, + struct binder_thread *thread, struct binder_buffer *buffer, binder_size_t *failed_at) { @@ -2323,7 +2386,8 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, }
Re: [PATCH] binder: fix use-after-free due to ksys_close() during fdget()
I need to make a change to this patch, so please ignore this version. I'll send a v2 soon. On Thu, Dec 13, 2018 at 1:04 PM Todd Kjos wrote: > > 44d8047f1d8 ("binder: use standard functions to allocate fds") > exposed a pre-existing issue in the binder driver. > > fdget() is used in ksys_ioctl() as a performance optimization. > One of the rules associated with fdget() is that ksys_close() must > not be called between the fdget() and the fdput(). There is a case > where this requirement is not met in the binder driver which results > in the reference count dropping to 0 when the device is still in > use. This can result in use-after-free or other issues. > > If userpace has passed a file-descriptor for the binder driver using > a BINDER_TYPE_FDA object, then kys_close() is called on it when > handling a binder_ioctl(BC_FREE_BUFFER) command. This violates > the assumptions for using fdget(). > > The problem is fixed by deferring the fd close using task_work_add() > so ksys_close() is called after returning from binder_ioctl(). > > Fixes: 44d8047f1d87a ("binder: use standard functions to allocate fds") > Suggested-by: Al Viro > Signed-off-by: Todd Kjos > --- > drivers/android/binder.c | 91 +++- > 1 file changed, 81 insertions(+), 10 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index c525b164d39d2..8f77d6af31209 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -72,6 +72,7 @@ > #include > #include > #include > +#include > > #include > > @@ -560,6 +561,9 @@ enum { > *(protected by @proc->inner_lock) > * @waiting_thread_node: element for @proc->waiting_threads list > *(protected by @proc->inner_lock) > + * @deferred_close_fd_cb: callback_head for task work > + * @deferred_close_fds: list of fds to be closed > + *(only accessed by this thread) > * @pid: PID for this thread > *(invariant after initialization) > * @looper: bitmap of looping state > @@ -592,6 +596,8 @@ struct binder_thread { > struct binder_proc *proc; > struct rb_node rb_node; > struct list_head waiting_thread_node; > + struct callback_head deferred_close_fd_cb; > + struct hlist_head deferred_close_fds; > int pid; > int looper; /* only modified by this thread */ > bool looper_need_return; /* can be written by other thread */ > @@ -2184,7 +2190,64 @@ static bool binder_validate_fixup(struct binder_buffer > *b, > return (fixup_offset >= last_min_offset); > } > > +struct binder_task_work { > + struct hlist_node entry; > + int fd; > +}; > + > +/** > + * binder_do_fd_close() - close list of file descriptors > + * @twork: callback head for task work > + * > + * It is not safe to call ksys_close() during the binder_ioctl() > + * function if there is a chance that binder's own file descriptor > + * might be closed. This is to meet the requirements for using > + * fdget() (see comments for __fget_light()). Therefore use > + * task_add_work() to schedule the close operation once we have > + * returned from binder_ioctl(). This function is a callback > + * for that mechanism and does the actual ksys_close() on the list > + * of file descriptors. > + */ > +static void binder_do_fd_close(struct callback_head *twork) > +{ > + struct binder_thread *thread = container_of(twork, > + struct binder_thread, deferred_close_fd_cb); > + struct hlist_node *entry, *tmp; > + > + hlist_for_each_safe(entry, tmp, &thread->deferred_close_fds) { > + struct binder_task_work *work = container_of(entry, > + struct binder_task_work, entry); > + > + ksys_close(work->fd); > + hlist_del(&work->entry); > + kfree(work); > + } > +} > + > +/** > + * binder_deferred_fd_close() - schedule a close for the given > file-descriptor > + * @thread:struct binder_thread for this task > + * @fd:file-descriptor to close > + * > + * See comments in binder_do_fd_close(). This function is used to schedule > + * a file-descriptor to be closed after returning from binder_ioctl(). > + */ > +static void binder_deferred_fd_close(struct binder_thread *thread, int fd) > +{ > + struct binder_task_work *work; > + > + work = kzalloc(sizeof(*work), GFP_KERNEL); > + if (!work) > + return
[PATCH v2] binder: fix use-after-free due to ksys_close() during fdget()
44d8047f1d8 ("binder: use standard functions to allocate fds") exposed a pre-existing issue in the binder driver. fdget() is used in ksys_ioctl() as a performance optimization. One of the rules associated with fdget() is that ksys_close() must not be called between the fdget() and the fdput(). There is a case where this requirement is not met in the binder driver which results in the reference count dropping to 0 when the device is still in use. This can result in use-after-free or other issues. If userpace has passed a file-descriptor for the binder driver using a BINDER_TYPE_FDA object, then kys_close() is called on it when handling a binder_ioctl(BC_FREE_BUFFER) command. This violates the assumptions for using fdget(). The problem is fixed by deferring the fd close using task_work_add() so ksys_close() is called after returning from binder_ioctl(). Fixes: 44d8047f1d87a ("binder: use standard functions to allocate fds") Suggested-by: Al Viro Signed-off-by: Todd Kjos --- v2: - simplified code If possible, please add to 4.20-final drivers/android/binder.c | 60 ++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index c525b164d39d2f..5d0233ca852c5d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -72,6 +72,7 @@ #include #include #include +#include #include @@ -2184,6 +2185,61 @@ static bool binder_validate_fixup(struct binder_buffer *b, return (fixup_offset >= last_min_offset); } +/** + * struct binder_task_work_cb - for deferred close + * + * @twork:callback_head for task work + * @fd: fd to close + * + * Structure to pass task work to be handled after + * returning from binder_ioctl() via task_work_add(). + */ +struct binder_task_work_cb { + struct callback_head twork; + int fd; +}; + +/** + * binder_do_fd_close() - close list of file descriptors + * @twork: callback head for task work + * + * It is not safe to call ksys_close() during the binder_ioctl() + * function if there is a chance that binder's own file descriptor + * might be closed. This is to meet the requirements for using + * fdget() (see comments for __fget_light()). Therefore use + * task_work_add() to schedule the close operation once we have + * returned from binder_ioctl(). This function is a callback + * for that mechanism and does the actual ksys_close() on the + * given file descriptor. + */ +static void binder_do_fd_close(struct callback_head *twork) +{ + struct binder_task_work_cb *twcb = container_of(twork, + struct binder_task_work_cb, twork); + + ksys_close(twcb->fd); + kfree(twcb); +} + +/** + * binder_deferred_fd_close() - schedule a close for the given file-descriptor + * @fd:file-descriptor to close + * + * See comments in binder_do_fd_close(). This function is used to schedule + * a file-descriptor to be closed after returning from binder_ioctl(). + */ +static void binder_deferred_fd_close(int fd) +{ + struct binder_task_work_cb *twcb; + + twcb = kzalloc(sizeof(*twcb), GFP_KERNEL); + if (!twcb) + return; + init_task_work(&twcb->twork, binder_do_fd_close); + twcb->fd = fd; + task_work_add(current, &twcb->twork, true); +} + static void binder_transaction_buffer_release(struct binder_proc *proc, struct binder_buffer *buffer, binder_size_t *failed_at) @@ -2323,7 +2379,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, } fd_array = (u32 *)(parent_buffer + (uintptr_t)fda->parent_offset); for (fd_index = 0; fd_index < fda->num_fds; fd_index++) - ksys_close(fd_array[fd_index]); + binder_deferred_fd_close(fd_array[fd_index]); } break; default: pr_err("transaction release %d bad object type %x\n", @@ -3942,7 +3998,7 @@ static int binder_apply_fd_fixups(struct binder_transaction *t) } else if (ret) { u32 *fdp = (u32 *)(t->buffer->data + fixup->offset); - ksys_close(*fdp); + binder_deferred_fd_close(*fdp); } list_del(&fixup->fixup_entry); kfree(fixup); -- 2.20.0.405.gbc1bbc6f85-goog
[PATCH v3] binder: fix use-after-free due to ksys_close() during fdget()
44d8047f1d8 ("binder: use standard functions to allocate fds") exposed a pre-existing issue in the binder driver. fdget() is used in ksys_ioctl() as a performance optimization. One of the rules associated with fdget() is that ksys_close() must not be called between the fdget() and the fdput(). There is a case where this requirement is not met in the binder driver which results in the reference count dropping to 0 when the device is still in use. This can result in use-after-free or other issues. If userpace has passed a file-descriptor for the binder driver using a BINDER_TYPE_FDA object, then kys_close() is called on it when handling a binder_ioctl(BC_FREE_BUFFER) command. This violates the assumptions for using fdget(). The problem is fixed by deferring the close using task_work_add(). A new variant of __close_fd() was created that returns a struct file with a reference. The fput() is deferred instead of using ksys_close(). Fixes: 44d8047f1d87a ("binder: use standard functions to allocate fds") Suggested-by: Al Viro Signed-off-by: Todd Kjos --- v2: - simplified code v3: - implemented Al Viro's suggestion to pass struct file instead of fd - added __close_fd_get_file() to close the fd, but reference the file drivers/android/binder.c | 63 ++-- fs/file.c| 29 ++ include/linux/fdtable.h | 1 + 3 files changed, 91 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index c525b164d39d2f..c4ee11d883dd93 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -72,6 +72,7 @@ #include #include #include +#include #include @@ -2184,6 +2185,64 @@ static bool binder_validate_fixup(struct binder_buffer *b, return (fixup_offset >= last_min_offset); } +/** + * struct binder_task_work_cb - for deferred close + * + * @twork:callback_head for task work + * @fd: fd to close + * + * Structure to pass task work to be handled after + * returning from binder_ioctl() via task_work_add(). + */ +struct binder_task_work_cb { + struct callback_head twork; + struct file *file; +}; + +/** + * binder_do_fd_close() - close list of file descriptors + * @twork: callback head for task work + * + * It is not safe to call ksys_close() during the binder_ioctl() + * function if there is a chance that binder's own file descriptor + * might be closed. This is to meet the requirements for using + * fdget() (see comments for __fget_light()). Therefore use + * task_work_add() to schedule the close operation once we have + * returned from binder_ioctl(). This function is a callback + * for that mechanism and does the actual ksys_close() on the + * given file descriptor. + */ +static void binder_do_fd_close(struct callback_head *twork) +{ + struct binder_task_work_cb *twcb = container_of(twork, + struct binder_task_work_cb, twork); + + fput(twcb->file); + kfree(twcb); +} + +/** + * binder_deferred_fd_close() - schedule a close for the given file-descriptor + * @fd:file-descriptor to close + * + * See comments in binder_do_fd_close(). This function is used to schedule + * a file-descriptor to be closed after returning from binder_ioctl(). + */ +static void binder_deferred_fd_close(int fd) +{ + struct binder_task_work_cb *twcb; + + twcb = kzalloc(sizeof(*twcb), GFP_KERNEL); + if (!twcb) + return; + init_task_work(&twcb->twork, binder_do_fd_close); + __close_fd_get_file(fd, &twcb->file); + if (twcb->file) + task_work_add(current, &twcb->twork, true); + else + kfree(twcb); +} + static void binder_transaction_buffer_release(struct binder_proc *proc, struct binder_buffer *buffer, binder_size_t *failed_at) @@ -2323,7 +2382,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, } fd_array = (u32 *)(parent_buffer + (uintptr_t)fda->parent_offset); for (fd_index = 0; fd_index < fda->num_fds; fd_index++) - ksys_close(fd_array[fd_index]); + binder_deferred_fd_close(fd_array[fd_index]); } break; default: pr_err("transaction release %d bad object type %x\n", @@ -3942,7 +4001,7 @@ static int binder_apply_fd_fixups(struct binder_transaction *t) } else if (ret) { u32 *fdp = (u32 *)(t->buffer->data + fixup->offset); - ksys_close(*fdp); + binder_deferred_fd_close(*fdp); } list_del(&fixup->fixup_entry);
Re: [PATCH v1] binder: implement binderfs
iled to open binder-control device\n", > strerror(errno)); > exit(EXIT_FAILURE); > } > > ret = ioctl(fd, BINDER_CTL_ADD, &device); > saved_errno = errno; > close(fd); > errno = saved_errno; > if (ret < 0) { > printf("%s - Failed to allocate new binder device\n", > strerror(errno)); > exit(EXIT_FAILURE); > } > > printf("Allocated new binder device with major %d, minor %d, and " > "name %s\n", device.major, device.minor, > device.name); > > exit(EXIT_SUCCESS); > } > > /* Demo */ > A demo of how binderfs works can be found under [2]. > > [1]: https://goo.gl/JL2tfX > > Cc: Martijn Coenen > Cc: Todd Kjos > Cc: Greg Kroah-Hartman > Signed-off-by: Christian Brauner > --- > v1: > - simplify init_binderfs() > Move the creation of binder-control into binderfs_fill_super() so that we > can > cleanly and without any complex error handling deallocate the super block on > failure. > - switch from __u32 to __u8 in struct binderfs_device > __u8 is the correct value to cross the kernel <-> userspace boundary. > - introduce BINDERFS_MAX_NAME > This determines the maximum length of a binderfs binder device name. > - add name member struct binderfs_device > This lets userspace specify a name for the binder device. The maximum length > is determined by BINDERFS_MAX_NAME. > - handle naming collisions > Since userspace now gives us a name to use for the device we need to handle > the case where userspace passes the same device name twice. This is done by > using d_lookup() (takes rename lock too). If a matching dentry under the > root > dentry of the superblock is found we test whether it is still active and if > so return -EEXIST. > - remove per-super block idr tracking and locking > Since userspace now determines the name remove the idr tracking since it's > not needed anymore. > - remove ctl_mutex from struct binders_info > It was only needed to protect the per-super block idr which has been > removed. > If I'm not mistaken we currently do not need to lock the ioctl() itself > since > removing is handled by a simple unlink(). So remove the mutex. > - ensure that binderfs_evict_inode() doesn't cause double-frees on iput() > When setting up the inode fails at a step where a new inode already has been > allocated we need to set inode->i_private to NULL to ensure that > binderfs_evict_inode() doesn't try to free up stuff that we already freed > while handling the error. > --- > drivers/android/Kconfig | 12 + > drivers/android/Makefile| 1 + > drivers/android/binder.c| 25 +- > drivers/android/binder_internal.h | 49 ++ > drivers/android/binderfs.c | 565 > include/uapi/linux/android/binder_ctl.h | 35 ++ > include/uapi/linux/magic.h | 1 + > 7 files changed, 671 insertions(+), 17 deletions(-) > create mode 100644 drivers/android/binder_internal.h > create mode 100644 drivers/android/binderfs.c > create mode 100644 include/uapi/linux/android/binder_ctl.h > > diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig > index 51e8250d113f..4c190f8d1f4c 100644 > --- a/drivers/android/Kconfig > +++ b/drivers/android/Kconfig > @@ -20,6 +20,18 @@ config ANDROID_BINDER_IPC > Android process, using Binder to identify, invoke and pass arguments > between said processes. > > +config ANDROID_BINDERFS > + bool "Android Binderfs filesystem" > + depends on ANDROID_BINDER_IPC > + default n > + ---help--- > + Binderfs is a pseudo-filesystem for the Android Binder IPC driver > + which can be mounted per-ipc namespace allowing to run multiple > + instances of Android. > + Each binderfs mount initially only contains a binder-control device. > + It can be used to dynamically allocate new binder IPC devices via > + ioctls. > + > config ANDROID_BINDER_DEVICES > string "Android Binder devices" > depends on ANDROID_BINDER_IPC > diff --git a/drivers/android/Makefile b/drivers/android/Makefile > index a01254c43ee3..c7856e3200da 100644 > --- a/drivers/android/Makefile > +++ b/drivers/android/Makefile > @@ -1,4 +1,5 @@ > ccflags-y += -I$(src) # needed for trace events > > +obj-$(CONFIG_ANDROID_BINDERFS) += binderfs.o > obj-$(CONFIG_AN
Re: [PATCH] binderfs: implement sysctls
On Fri, Dec 21, 2018 at 8:33 AM Greg KH wrote: > > On Fri, Dec 21, 2018 at 04:59:19PM +0100, Christian Brauner wrote: > > On Fri, Dec 21, 2018 at 04:37:58PM +0100, Greg KH wrote: > > > On Fri, Dec 21, 2018 at 03:12:42PM +0100, Christian Brauner wrote: > > > > On Fri, Dec 21, 2018 at 02:55:09PM +0100, Greg KH wrote: > > > > > On Fri, Dec 21, 2018 at 02:39:09PM +0100, Christian Brauner wrote: > > > > > > This implements three sysctls that have very specific goals: > > > > > > > > > > Ick, why? > > > > > > > > > > What are these going to be used for? Who will "control" them? As you > > > > > > > > Only global root in the initial user namespace. See the reasons below. > > > > :) > > > > > > > > > are putting them in the "global" namespace, that feels like something > > > > > that binderfs was trying to avoid in the first place. > > > > > > > > There are a couple of reason imho: > > > > - Global root needs a way to restrict how many binder devices can be > > > > allocated across all user + ipc namespace pairs. > > > > One obvious reason is that otherwise userns root in a non-initial user > > > > namespace can allocate a huge number of binder devices (pick a random > > > > number say 10.000) and use up a lot of kernel memory. > > > > > > Root can do tons of other bad things too, why are you picking on > > > > That's global root not userns root though. :) These sysctls are about > > global root gaining the ability to proactively restrict binder device > > delegation. > > > > > binderfs here? :) > > > > > > > In addition they can pound on the binder.c code causing a lot of > > > > contention for the remaining global lock in there. > > > > > > That's the problem of that container, don't let it do that. Or remove > > > the global lock :) > > > > > > > We should let global root explicitly restrict non-initial namespaces > > > > in this respect. Imho, that's just good security design. :) > > > > > > If you do not trust your container enough to have it properly allocate > > > the correct binder resources, then perhaps you shouldn't be allowing it > > > to allocate any resources at all? > > > > Containers just like VMs get delegated and you might not have control > > over what is running in there. That's AWS in a nutshell. :) Restricting > > it by saying "just don't do that" seems not something that is > > appropriate given the workloads out there in the wild. > > In general, I do *understand* the reasoning but I think the premise is > > flawed if we can somewhat trivially make this safe. > > > > > > > > > - The reason for having a number of reserved devices is when the initial > > > > binderfs mount needs to bump the number of binder devices after the > > > > initial allocation done during say boot (e.g. it could've removed > > > > devices and wants to reallocate new ones but all binder minor numbers > > > > have been given out or just needs additional devices). By reserving an > > > > initial pool of binder devices this can be easily accounted for and > > > > future proofs userspace. This is to say: global root in the initial > > > > userns + ipcns gets dibs on however many devices it wants. :) > > > > > > binder devices do not "come and go" at runtime, you need to set them up > > > initially and then all is fine. So there should never be a need for the > > > "global" instance to need "more" binder devices once it is up and > > > running. So I don't see what you are really trying to solve here. > > > > That's dismissing a whole range of use-cases where you might allocate > > and deallocate devices on the fly which this is somewhat designed for. > > But I guess ok for now. > > > > > > > > You seem to be trying to protect the system from the container you just > > > gave root to and trusted it with creating its own binder instances. > > > If you do not trust it to create binder instances then do not allow it > > > to create binder instances! :) > > > > Again, I get the reasoning but think that this dismisses major > > real-world use-cases not just for binderfs but for all instances where > > untrusted workloads are run which both containers and VMs aim to make > > sure are possible. > > Note, I mean untrusted not in the sense of necessarily being malicious > > but just "can't guarantee that things don't blow up in your face". > > > > > > > > > - The fact that we have a single shared pool of binder device minor > > > > numbers for all namespaces imho makes it necessary for the global root > > > > user in the initial ipc + user namespace to manage device allocation > > > > and delegation. > > > > > > You are managing the allocation, you are giving who ever asks for one a > > > device. If you run out of devices, oops, you run out of devices, that's > > > it. Are you really ever going to run out of a major's number of binder > > > devices? > > > > The point is more about someone intentionally trying to do that. > > > > > > > > > The binderfs sysctl stuff is really small code-wi
Re: [PATCH v1 2/2] binderfs: reserve devices for initial mount
On Sun, Dec 23, 2018 at 6:36 AM Christian Brauner wrote: > > The binderfs instance in the initial ipc namespace will always have a > reserve of 4 binder devices unless explicitly capped by specifying a lower > value via the "max" mount option. > This ensures when binder devices are removed (on accident or on purpose) > they can always be recreated without risking that all minor numbers have > already been used up. > > Cc: Todd Kjos > Cc: Greg Kroah-Hartman > Signed-off-by: Christian Brauner > --- > v1: > - patch introduced > v0: > - patch not present > --- > drivers/android/binderfs.c | 7 ++- > 1 file changed, 6 insertions(+), 1 deletion(-) > > diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c > index 873adda064ac..aa635c7ea727 100644 > --- a/drivers/android/binderfs.c > +++ b/drivers/android/binderfs.c > @@ -40,6 +40,8 @@ > #define INODE_OFFSET 3 > #define INTSTRLEN 21 > #define BINDERFS_MAX_MINOR (1U << MINORBITS) > +/* Ensure that the initial ipc namespace always has a devices available. */ > +#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4) Why do you ever need more minors per instance than the number of devices listed in CONFIG_ANDROID_BINDER_DEVICES? > > static struct vfsmount *binderfs_mnt; > > @@ -129,11 +131,14 @@ static int binderfs_binder_device_create(struct inode > *ref_inode, > struct inode *inode = NULL; > struct super_block *sb = ref_inode->i_sb; > struct binderfs_info *info = sb->s_fs_info; > + bool use_reserve = (info->ipc_ns == &init_ipc_ns); > > /* Reserve new minor number for the new device. */ > mutex_lock(&binderfs_minors_mutex); > if (++info->device_count <= info->mount_opts.max) > - minor = ida_alloc_max(&binderfs_minors, BINDERFS_MAX_MINOR, > + minor = ida_alloc_max(&binderfs_minors, > + use_reserve ? BINDERFS_MAX_MINOR : > + BINDERFS_MAX_MINOR_CAPPED, > GFP_KERNEL); > else > minor = -ENOSPC; > -- > 2.19.1 >
Re: [PATCH v1 2/2] binderfs: reserve devices for initial mount
On Thu, Jan 3, 2019 at 12:34 PM Christian Brauner wrote: > > On Thu, Jan 03, 2019 at 12:25:24PM -0800, Todd Kjos wrote: > > On Sun, Dec 23, 2018 at 6:36 AM Christian Brauner > > wrote: > > > > > > The binderfs instance in the initial ipc namespace will always have a > > > reserve of 4 binder devices unless explicitly capped by specifying a lower > > > value via the "max" mount option. > > > This ensures when binder devices are removed (on accident or on purpose) > > > they can always be recreated without risking that all minor numbers have > > > already been used up. > > > > > > Cc: Todd Kjos > > > Cc: Greg Kroah-Hartman > > > Signed-off-by: Christian Brauner > > > --- > > > v1: > > > - patch introduced > > > v0: > > > - patch not present > > > --- > > > drivers/android/binderfs.c | 7 ++- > > > 1 file changed, 6 insertions(+), 1 deletion(-) > > > > > > diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c > > > index 873adda064ac..aa635c7ea727 100644 > > > --- a/drivers/android/binderfs.c > > > +++ b/drivers/android/binderfs.c > > > @@ -40,6 +40,8 @@ > > > #define INODE_OFFSET 3 > > > #define INTSTRLEN 21 > > > #define BINDERFS_MAX_MINOR (1U << MINORBITS) > > > +/* Ensure that the initial ipc namespace always has a devices available. > > > */ > > > +#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4) > > > > Why do you ever need more minors per instance than the number of > > devices listed in CONFIG_ANDROID_BINDER_DEVICES? > > Are you asking asking why this is 4 and not 3? In that case we should > probably parse CONFIG_ANDROID_BINDER_DEVICES once at init time and then > reserve that many binder devices. Thoughts? I'm asking why CONFIG_ANDROID_BINDER_DEVICES isn't the source of truth for the number of devices (it normally specifies 3 devices, but could be different). I can't think of a reason why you'd want CONFIG_MAX_MINOR_CAPPED to be different than the number of devices indicated by CONFIG_ANDROID_BINDER_DEVICES and having it specified in two places seems error prone. > > Christian
Re: [PATCH v1 2/2] binderfs: reserve devices for initial mount
On Thu, Jan 3, 2019 at 2:08 PM Christian Brauner wrote: > > On Thu, Jan 03, 2019 at 01:47:13PM -0800, Todd Kjos wrote: > > On Thu, Jan 3, 2019 at 12:34 PM Christian Brauner > > wrote: > > > > > > On Thu, Jan 03, 2019 at 12:25:24PM -0800, Todd Kjos wrote: > > > > On Sun, Dec 23, 2018 at 6:36 AM Christian Brauner > > > > wrote: > > > > > > > > > > The binderfs instance in the initial ipc namespace will always have a > > > > > reserve of 4 binder devices unless explicitly capped by specifying a > > > > > lower > > > > > value via the "max" mount option. > > > > > This ensures when binder devices are removed (on accident or on > > > > > purpose) > > > > > they can always be recreated without risking that all minor numbers > > > > > have > > > > > already been used up. > > > > > > > > > > Cc: Todd Kjos > > > > > Cc: Greg Kroah-Hartman > > > > > Signed-off-by: Christian Brauner > > > > > --- > > > > > v1: > > > > > - patch introduced > > > > > v0: > > > > > - patch not present > > > > > --- > > > > > drivers/android/binderfs.c | 7 ++- > > > > > 1 file changed, 6 insertions(+), 1 deletion(-) > > > > > > > > > > diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c > > > > > index 873adda064ac..aa635c7ea727 100644 > > > > > --- a/drivers/android/binderfs.c > > > > > +++ b/drivers/android/binderfs.c > > > > > @@ -40,6 +40,8 @@ > > > > > #define INODE_OFFSET 3 > > > > > #define INTSTRLEN 21 > > > > > #define BINDERFS_MAX_MINOR (1U << MINORBITS) > > > > > +/* Ensure that the initial ipc namespace always has a devices > > > > > available. */ > > > > > +#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4) > > > > > > > > Why do you ever need more minors per instance than the number of > > > > devices listed in CONFIG_ANDROID_BINDER_DEVICES? > > > > > > Are you asking asking why this is 4 and not 3? In that case we should > > > probably parse CONFIG_ANDROID_BINDER_DEVICES once at init time and then > > > reserve that many binder devices. Thoughts? > > > > I'm asking why CONFIG_ANDROID_BINDER_DEVICES isn't the source of truth > > for the number of devices (it normally specifies 3 devices, but could > > be different). I can't think of a reason why you'd want > > CONFIG_MAX_MINOR_CAPPED to be different than the number of devices > > indicated by CONFIG_ANDROID_BINDER_DEVICES and having it specified in > > two places seems error prone. > > I'm not following. The CONFIG_MAX_MINOR_CAPPED and > CONFIG_ANDROID_BINDER_DEVICES do not have a relation to each other just > like binder devices requested in CONFIG_ANDROID_BINDER_DEVICES do not > have a relation to binderfs binder devices as was requested for this > patchset. > I also don't know what you mean "appear in two places". > > The fact is, binderfs binder devices are independent of binderfs binder > devices. So it is perfectly reasonable for someone to say > CONFIG_ANDROID_BINDER_DEVICES="" and *only* rely on binderfs itself. > Which absolutely need to be possible. > What I want in such scenarios is that people always have a number of > binderfs binder devices guaranteed to be available in the binderfs mount > in the initial ipc namespace no matter how many devices are allocated in > non-initial ipc namespace binderfs mounts. That's why allo non-initial > ipc namespace binderfs mounts will use the CONFIG_MAX_MINOR_CAPPED macro > which guarantees that there's number of devices reserved for the > binderfs instance in the initial ipc namespace. Yes, you are right. Cobwebs from vacation -- I confused this with the previous non-binderfs implementation that was posted that did use CONFIG_ANDROID_BINDER_DEVICES to instantiate the devices in all containers. In binderfs, that is only used for the initial (default) devices.
Re: [PATCH] MAINTAINERS: Add me to Android drivers
On Thu, Oct 4, 2018 at 6:50 PM Joel Fernandes (Google) wrote: > > I am one of the main engineers working on ashmem. I have been fixing > bugs in the driver and have been involved in the memfd conversion > discussions and sending patches about that. I also have an understanding > of the binder driver and was involved with some of the development on > finer grained locking. So I would like to be added to the MAINTAINERS > file for android drivers for review and maintenance of ashmem and other > Android drivers. > > Cc: tk...@google.com > Cc: gre...@linuxfoundation.org > Signed-off-by: Joel Fernandes (Google) > --- > MAINTAINERS | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/MAINTAINERS b/MAINTAINERS > index 544cac829cf4..d639c4d04438 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -894,6 +894,7 @@ M: Greg Kroah-Hartman > M: Arve Hjønnevåg > M: Todd Kjos > M: Martijn Coenen > +M: Joel Fernandes > T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git > L: de...@driverdev.osuosl.org > S: Supported > -- > 2.19.0.605.g01d371f741-goog > Nice to have someone focused on ashmem maintenance! Acked-by: Todd Kjos
Re: [PATCH] binder: ipc namespace support for android binder
+christ...@brauner.io On Sun, Oct 28, 2018 at 7:29 PM chouryzhou(周威) wrote: ... > > > It's not obvious from this patch where this dependency comes > > from...why is SYSVIPC required? I'd like to not have to require IPC_NS > > either for devices. > > Yes, the patch is not highly dependent on SYSVIPC, but it will be convenient > if require it. I will update it to drop dependency of it in V2 patch. This > patch > doesn't need IPC_NS set at present. Actually it is dependent on IPC_NS since it makes changes to ipc/namespace.c which is compiled only if CONFIG_IPC_NS. There are a couple more implementations similar to this one. https://lwn.net/Articles/577957/ and some submissions to AOSP derived from that one that introduce a generic registration function for namespace support [1], and changes to binder to implement namespaces [2]. If this is really needed, then we should have a solution that works for devices without requiring IPC_NS or SYSVIPC. Also, we should not add binder-specific code to ipc/namespace.c or include/linux/ipc_namespace.h. -Todd [1] https://android-review.googlesource.com/c/kernel/common/+/471961 [2] https://android-review.googlesource.com/c/kernel/common/+/471825
Re: [PATCH v2] ANDROID: binder: Add BINDER_GET_NODE_INFO_FOR_REF ioctl.
On Fri, Sep 7, 2018 at 6:38 AM Martijn Coenen wrote: > > This allows the context manager to retrieve information about nodes > that it holds a reference to, such as the current number of > references to those nodes. > > Such information can for example be used to determine whether the > servicemanager is the only process holding a reference to a node. > This information can then be passed on to the process holding the > node, which can in turn decide whether it wants to shut down to > reduce resource usage. > > Signed-off-by: Martijn Coenen > --- > v2: made sure reserved fields are aligned, and enforce caller zeroes > all fields except handle, as suggested by Dan Carpenter. > > drivers/android/binder.c| 55 + > include/uapi/linux/android/binder.h | 10 ++ > 2 files changed, 65 insertions(+) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index d58763b6b0090..5b25412e15ccf 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -4544,6 +4544,42 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp) > return ret; > } > > +static int binder_ioctl_get_node_info_for_ref(struct binder_proc *proc, > + struct binder_node_info_for_ref *info) > +{ > + struct binder_node *node; > + struct binder_context *context = proc->context; > + __u32 handle = info->handle; > + > + if (info->strong_count || info->weak_count || info->reserved1 || > + info->reserved2 || info->reserved3) { > + binder_user_error("%d BINDER_GET_NODE_INFO_FOR_REF: only > handle may be non-zero.", > + proc->pid); > + return -EINVAL; > + } > + > + /* This ioctl may only be used by the context manager */ > + mutex_lock(&context->context_mgr_node_lock); > + if (!context->binder_context_mgr_node || > + context->binder_context_mgr_node->proc != proc) { > + mutex_unlock(&context->context_mgr_node_lock); > + return -EPERM; > + } > + mutex_unlock(&context->context_mgr_node_lock); > + > + node = binder_get_node_from_ref(proc, handle, true, NULL); > + if (!node) > + return -EINVAL; > + > + info->strong_count = node->local_strong_refs + > + node->internal_strong_refs; > + info->weak_count = node->local_weak_refs; Aren't we under-reporting the weak refs here? The "internal weak" refs are the count of elements in node->refs, so we need to add something like: hlist_for_each_entry(ref, &node->refs, node_entry) info->weak_count++; > + > + binder_put_node(node); > + > + return 0; > +} > + > static int binder_ioctl_get_node_debug_info(struct binder_proc *proc, > struct binder_node_debug_info *info) > { > @@ -4638,6 +4674,25 @@ static long binder_ioctl(struct file *filp, unsigned > int cmd, unsigned long arg) > } > break; > } > + case BINDER_GET_NODE_INFO_FOR_REF: { > + struct binder_node_info_for_ref info; > + > + if (copy_from_user(&info, ubuf, sizeof(info))) { > + ret = -EFAULT; > + goto err; > + } > + > + ret = binder_ioctl_get_node_info_for_ref(proc, &info); > + if (ret < 0) > + goto err; > + > + if (copy_to_user(ubuf, &info, sizeof(info))) { > + ret = -EFAULT; > + goto err; > + } > + > + break; > + } > case BINDER_GET_NODE_DEBUG_INFO: { > struct binder_node_debug_info info; > > diff --git a/include/uapi/linux/android/binder.h > b/include/uapi/linux/android/binder.h > index bfaec6903b8bc..b9ba520f7e4bb 100644 > --- a/include/uapi/linux/android/binder.h > +++ b/include/uapi/linux/android/binder.h > @@ -200,6 +200,15 @@ struct binder_node_debug_info { > __u32has_weak_ref; > }; > > +struct binder_node_info_for_ref { > + __u32handle; > + __u32strong_count; > + __u32weak_count; > + __u32reserved1; > + __u32reserved2; > + __u32reserved3; > +}; > + > #define BINDER_WRITE_READ _IOWR('b', 1, struct > binder_write_read) > #define BINDER_SET_IDLE_TIMEOUT_IOW('b', 3, __s64) > #define BINDER_SET_MAX_THREADS _IOW('b', 5, __u32) > @@ -208,6 +217,7 @@ struct binder_node_debug_info { > #define BINDER_THREAD_EXIT _IOW('b', 8, __s32) > #define BINDER_VERSION _IOWR('b', 9, struct binder_version) > #define BINDER_GET_NODE_DEBUG_INFO _IOWR('b', 11, struct > binder_node_debug_info) > +#define BINDER_GET_NODE_INFO_FOR_REF _IOWR('b', 12, struct > binder_node_info_for_ref) > > /* > * NOTE: Two
[PATCH] binder: fix race that allows malicious free of live buffer
Malicious code can attempt to free buffers using the BC_FREE_BUFFER ioctl to binder. There are protections against a user freeing a buffer while in use by the kernel, however there was a window where BC_FREE_BUFFER could be used to free a recently allocated buffer that was not completely initialized. This resulted in a use-after-free detected by KASAN with a malicious test program. This window is closed by setting the buffer's allow_user_free attribute to 0 when the buffer is allocated or when the user has previously freed it instead of waiting for the caller to set it. The problem was that when the struct buffer was recycled, allow_user_free was stale and set to 1 allowing a free to go through. Signed-off-by: Todd Kjos Acked-by: Arve Hjønnevåg --- drivers/android/binder.c | 21 - drivers/android/binder_alloc.c | 16 ++-- drivers/android/binder_alloc.h | 3 +-- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index cb30a524d16d8..9f1000d2a40c7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2974,7 +2974,6 @@ static void binder_transaction(struct binder_proc *proc, t->buffer = NULL; goto err_binder_alloc_buf_failed; } - t->buffer->allow_user_free = 0; t->buffer->debug_id = t->debug_id; t->buffer->transaction = t; t->buffer->target_node = target_node; @@ -3510,14 +3509,18 @@ static int binder_thread_write(struct binder_proc *proc, buffer = binder_alloc_prepare_to_free(&proc->alloc, data_ptr); - if (buffer == NULL) { - binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n", - proc->pid, thread->pid, (u64)data_ptr); - break; - } - if (!buffer->allow_user_free) { - binder_user_error("%d:%d BC_FREE_BUFFER u%016llx matched unreturned buffer\n", - proc->pid, thread->pid, (u64)data_ptr); + if (IS_ERR_OR_NULL(buffer)) { + if (PTR_ERR(buffer) == -EPERM) { + binder_user_error( + "%d:%d BC_FREE_BUFFER u%016llx matched unreturned or currently freeing buffer\n", + proc->pid, thread->pid, + (u64)data_ptr); + } else { + binder_user_error( + "%d:%d BC_FREE_BUFFER u%016llx no match\n", + proc->pid, thread->pid, + (u64)data_ptr); + } break; } binder_debug(BINDER_DEBUG_FREE_BUFFER, diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 64fd96eada31f..030c98f35cca7 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -151,16 +151,12 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked( else { /* * Guard against user threads attempting to -* free the buffer twice +* free the buffer when in use by kernel or +* after it's already been freed. */ - if (buffer->free_in_progress) { - binder_alloc_debug(BINDER_DEBUG_USER_ERROR, - "%d:%d FREE_BUFFER u%016llx user freed buffer twice\n", - alloc->pid, current->pid, - (u64)user_ptr); - return NULL; - } - buffer->free_in_progress = 1; + if (!buffer->allow_user_free) + return ERR_PTR(-EPERM); + buffer->allow_user_free = 0; return buffer; } } @@ -500,7 +496,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked( rb_erase(best_fit, &alloc->free_buffers); buffer->free = 0; - buffer->free_in_progress = 0; + buffer->allow_user_free = 0; binder_insert_allocated_buffer_locked(alloc, buffer); binder_alloc_
[PATCH] binder: fix sparse warnings on locking context
Add __acquire()/__release() annnotations to fix warnings in sparse context checking There is one case where the warning was due to a lack of a "default:" case in a switch statement where a lock was being released in each of the cases, so the default case was added. Signed-off-by: Todd Kjos --- drivers/android/binder.c | 43 +- drivers/android/binder_alloc.c | 1 + 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9f1000d2a40c7..9f2059d24ae2d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -660,6 +660,7 @@ struct binder_transaction { #define binder_proc_lock(proc) _binder_proc_lock(proc, __LINE__) static void _binder_proc_lock(struct binder_proc *proc, int line) + __acquires(&proc->outer_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -675,6 +676,7 @@ _binder_proc_lock(struct binder_proc *proc, int line) #define binder_proc_unlock(_proc) _binder_proc_unlock(_proc, __LINE__) static void _binder_proc_unlock(struct binder_proc *proc, int line) + __releases(&proc->outer_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -690,6 +692,7 @@ _binder_proc_unlock(struct binder_proc *proc, int line) #define binder_inner_proc_lock(proc) _binder_inner_proc_lock(proc, __LINE__) static void _binder_inner_proc_lock(struct binder_proc *proc, int line) + __acquires(&proc->inner_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -705,6 +708,7 @@ _binder_inner_proc_lock(struct binder_proc *proc, int line) #define binder_inner_proc_unlock(proc) _binder_inner_proc_unlock(proc, __LINE__) static void _binder_inner_proc_unlock(struct binder_proc *proc, int line) + __releases(&proc->inner_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -720,6 +724,7 @@ _binder_inner_proc_unlock(struct binder_proc *proc, int line) #define binder_node_lock(node) _binder_node_lock(node, __LINE__) static void _binder_node_lock(struct binder_node *node, int line) + __acquires(&node->lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -735,6 +740,7 @@ _binder_node_lock(struct binder_node *node, int line) #define binder_node_unlock(node) _binder_node_unlock(node, __LINE__) static void _binder_node_unlock(struct binder_node *node, int line) + __releases(&node->lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); @@ -751,12 +757,16 @@ _binder_node_unlock(struct binder_node *node, int line) #define binder_node_inner_lock(node) _binder_node_inner_lock(node, __LINE__) static void _binder_node_inner_lock(struct binder_node *node, int line) + __acquires(&node->lock) __acquires(&node->proc->inner_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); spin_lock(&node->lock); if (node->proc) binder_inner_proc_lock(node->proc); + else + /* annotation for sparse */ + __acquire(&node->proc->inner_lock); } /** @@ -768,6 +778,7 @@ _binder_node_inner_lock(struct binder_node *node, int line) #define binder_node_inner_unlock(node) _binder_node_inner_unlock(node, __LINE__) static void _binder_node_inner_unlock(struct binder_node *node, int line) + __releases(&node->lock) __releases(&node->proc->inner_lock) { struct binder_proc *proc = node->proc; @@ -775,6 +786,9 @@ _binder_node_inner_unlock(struct binder_node *node, int line) "%s: line=%d\n", __func__, line); if (proc) binder_inner_proc_unlock(proc); + else + /* annotation for sparse */ + __release(&node->proc->inner_lock); spin_unlock(&node->lock); } @@ -1384,10 +1398,14 @@ static void binder_dec_node_tmpref(struct binder_node *node) binder_node_inner_lock(node); if (!node->proc) spin_lock(&binder_dead_nodes_lock); + else + __acquire(&binder_dead_nodes_lock); node->tmp_refs--; BUG_ON(node->tmp_refs < 0); if (!node->proc) spin_unlock(&binder_dead_nodes_lock); + else + __release(&binder_dead_nodes_lock); /* * Call binder_dec_node() to check if all refcounts are 0 * and cleanup is needed. Calling with strong=0 and internal=1 @@ -1890,18 +1908,22 @@ static str
Re: [PATCH] binder: ipc namespace support for android binder
On Fri, Oct 26, 2018 at 2:20 AM chouryzhou(周威) wrote: > > Hi > We are working for running android in container, but we found that binder is > not isolated by ipc namespace. Since binder is a form of IPC and therefore > should > be tied to ipc namespace. With this patch, we can run more than one android > container on one host. > This patch move "binder_procs" and "binder_context" into ipc_namespace, > driver will find the context from it when opening. Althought statistics in > debugfs > remain global. > > Signed-off-by: choury zhou > --- > drivers/android/Kconfig | 2 +- > drivers/android/binder.c | 126 +- > include/linux/ipc_namespace.h | 14 > ipc/namespace.c | 4 ++ > 4 files changed, 111 insertions(+), 35 deletions(-) > > diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig > index 432e9ad77070..09883443b2da 100644 > --- a/drivers/android/Kconfig > +++ b/drivers/android/Kconfig > @@ -10,7 +10,7 @@ if ANDROID > > config ANDROID_BINDER_IPC > bool "Android Binder IPC Driver" > - depends on MMU > + depends on MMU && SYSVIPC NAK. We can't force SYSVIPC on for Android. The notion of running binder in a container is reasonable, but needs to be done without explicit requirement for SYSVIPC. binder-in-container is a topic in the android microconf at Linux plumbers -- are you going to be at LPC? It's not obvious from this patch where this dependency comes from...why is SYSVIPC required? I'd like to not have to require IPC_NS either for devices. > default n > ---help--- > Binder is used in Android for both communication between processes, > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index d58763b6b009..e061dba9b8b3 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -68,6 +68,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -79,13 +80,12 @@ > #include "binder_alloc.h" > #include "binder_trace.h" > > +#define ipcns (current->nsproxy->ipc_ns) > + > static HLIST_HEAD(binder_deferred_list); > static DEFINE_MUTEX(binder_deferred_lock); > > static HLIST_HEAD(binder_devices); > -static HLIST_HEAD(binder_procs); > -static DEFINE_MUTEX(binder_procs_lock); > - > static HLIST_HEAD(binder_dead_nodes); > static DEFINE_SPINLOCK(binder_dead_nodes_lock); > > @@ -231,7 +231,7 @@ struct binder_transaction_log_entry { > int return_error_line; > uint32_t return_error; > uint32_t return_error_param; > - const char *context_name; > + int context_device; > }; > struct binder_transaction_log { > atomic_t cur; > @@ -262,19 +262,66 @@ static struct binder_transaction_log_entry > *binder_transaction_log_add( > } > > struct binder_context { > + struct hlist_node hlist; > struct binder_node *binder_context_mgr_node; > struct mutex context_mgr_node_lock; > > kuid_t binder_context_mgr_uid; > - const char *name; > + intdevice; > }; > > struct binder_device { > struct hlist_node hlist; > struct miscdevice miscdev; > - struct binder_context context; > }; > > +void binder_exit_ns(struct ipc_namespace *ns) > +{ > + struct binder_context *context; > + struct hlist_node *tmp; > + > + mutex_destroy(&ns->binder_procs_lock); > + mutex_destroy(&ns->binder_contexts_lock); > + hlist_for_each_entry_safe(context, tmp, &ns->binder_contexts, hlist) { > + mutex_destroy(&context->context_mgr_node_lock); > + hlist_del(&context->hlist); > + kfree(context); > + } > +} > + > +int binder_init_ns(struct ipc_namespace *ns) > +{ > + int ret; > + struct binder_device *device; > + > + mutex_init(&ns->binder_procs_lock); > + INIT_HLIST_HEAD(&ns->binder_procs); > + mutex_init(&ns->binder_contexts_lock); > + INIT_HLIST_HEAD(&ns->binder_contexts); > + > + hlist_for_each_entry(device, &binder_devices, hlist) { > + struct binder_context *context; > + > + context = kzalloc(sizeof(*context), GFP_KERNEL); > + if (!context) { > + ret = -ENOMEM; > + goto err; > + } > + > + context->device = device->miscdev.minor; > + context->binder_context_mgr_uid = INVALID_UID; > + mutex_init(&context->context_mgr_node_lock); > + > + hlist_add_head(&context->hlist, &ns->binder_contexts); > + } > + > + return 0; > +err: > + binder_exit_ns(ns); > + return ret; > +} > + > + > /** > * struct binder_work - work enqueued on a worklist > * @entry: node enqueued on list > @@ -2748,7 +2795,7 @@ static void binder_transaction(struct binder_proc *proc, > e->target_handle = tr->target.handle; > e->data_size = tr->data_size
Re: [PATCH v4] binder: tell userspace to dump current backtrace when detecting oneway spamming
On Tue, Apr 6, 2021 at 9:15 PM Hang Lu wrote: > > When async binder buffer got exhausted, some normal oneway transactions > will also be discarded and may cause system or application failures. By > that time, the binder debug information we dump may not be relevant to > the root cause. And this issue is difficult to debug if without the > backtrace of the thread sending spam. > > This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when oneway > spamming is detected, request to dump current backtrace. Oneway spamming > will be reported only once when exceeding the threshold (target process > dips below 80% of its oneway space, and current process is responsible for > either more than 50 transactions, or more than 50% of the oneway space). > And the detection will restart when the async buffer has returned to a > healthy state. > > Signed-off-by: Hang Lu > --- > v4: add placeholder for BR_FROZEN_REPLY in binder_return_strings for not > triggering BUG_ON in print_binder_stats Instead of a placeholder, please rebase this series onto Greg's char-misc-next branch in git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git and add a new patch that fixes the missing "BR_FROZEN_REPLY". > > v3: add BR_ONEWAY_SPAM_SUSPECT to binder_return_strings > > v2: make the detection on/off switch to be per-proc > > drivers/android/binder.c| 31 +++ > drivers/android/binder_alloc.c | 15 --- > drivers/android/binder_alloc.h | 8 +++- > drivers/android/binder_internal.h | 6 +- > include/uapi/linux/android/binder.h | 8 > 5 files changed, 59 insertions(+), 9 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index c119736..7046af90 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -3007,7 +3007,10 @@ static void binder_transaction(struct binder_proc > *proc, > goto err_bad_object_type; > } > } > - tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; > + if (t->buffer->oneway_spam_suspect) > + tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT; > + else > + tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; > t->work.type = BINDER_WORK_TRANSACTION; > > if (reply) { > @@ -3875,9 +3878,14 @@ static int binder_thread_read(struct binder_proc *proc, > > binder_stat_br(proc, thread, cmd); > } break; > - case BINDER_WORK_TRANSACTION_COMPLETE: { > + case BINDER_WORK_TRANSACTION_COMPLETE: > + case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: { > + if (proc->oneway_spam_detection_enabled && > + w->type == > BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT) > + cmd = BR_ONEWAY_SPAM_SUSPECT; > + else > + cmd = BR_TRANSACTION_COMPLETE; > binder_inner_proc_unlock(proc); > - cmd = BR_TRANSACTION_COMPLETE; > kfree(w); > > binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); > if (put_user(cmd, (uint32_t __user *)ptr)) > @@ -4727,6 +4735,18 @@ static long binder_ioctl(struct file *filp, unsigned > int cmd, unsigned long arg) > } > break; > } > + case BINDER_ENABLE_ONEWAY_SPAM_DETECTION: { > + uint32_t enable; > + > + if (copy_from_user(&enable, ubuf, sizeof(enable))) { > + ret = -EINVAL; > + goto err; > + } > + binder_inner_proc_lock(proc); > + proc->oneway_spam_detection_enabled = (bool)enable; > + binder_inner_proc_unlock(proc); > + break; > + } > default: > ret = -EINVAL; > goto err; > @@ -5385,7 +5405,10 @@ static const char * const binder_return_strings[] = { > "BR_FINISHED", > "BR_DEAD_BINDER", > "BR_CLEAR_DEATH_NOTIFICATION_DONE", > - "BR_FAILED_REPLY" > + "BR_FAILED_REPLY", > + /* set placeholder for BR_FROZEN_REPLY */ > + "PLACEHOLDER", This should be in a new patch that fixes the issue for the previous patch. > + "BR_ONEWAY_SPAM_SUSPECT" > }; > > static const char * const binder_command_strings[] = { > diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c > index 7caf74a..340515f 100644 > --- a/drivers/android/binder_alloc.c > +++ b/drivers/android/binder_alloc.c > @@ -338,7 +338,7 @@ static inline struct vm_area_struct *binder_alloc_get_vma( > return vma; > } > > -static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid) > +static bool debug_low_async_space_locked(struct binder_alloc
Re: [PATCH v2 1/4] ARM: amba: Make driver_override output consistent with other buses
Reviewed-by: Todd Kjos On Tue, Apr 10, 2018 at 6:21 AM, Geert Uytterhoeven wrote: > For AMBA devices with unconfigured driver override, the > "driver_override" sysfs virtual file is empty, while it contains > "(null)" for platform and PCI devices. > > Make AMBA consistent with other buses by dropping the test for a NULL > pointer. > > Note that contrary to popular belief, sprintf() handles NULL pointers > fine; they are printed as "(null)". > > Signed-off-by: Geert Uytterhoeven > --- > drivers/amba/bus.c | 3 --- > 1 file changed, 3 deletions(-) > > diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c > index 594c228d2f021123..6ffd778352e6d953 100644 > --- a/drivers/amba/bus.c > +++ b/drivers/amba/bus.c > @@ -70,9 +70,6 @@ static ssize_t driver_override_show(struct device *_dev, > { > struct amba_device *dev = to_amba_device(_dev); > > - if (!dev->driver_override) > - return 0; > - > return sprintf(buf, "%s\n", dev->driver_override); > } > > -- > 2.7.4 >
Re: [PATCH v2 2/4] ARM: amba: Fix race condition with driver_override
Reviewed-by: Todd Kjos On Tue, Apr 10, 2018 at 6:21 AM, Geert Uytterhoeven wrote: > The driver_override implementation is susceptible to a race condition > when different threads are reading vs storing a different driver > override. Add locking to avoid this race condition. > > Cfr. commits 6265539776a0810b ("driver core: platform: fix race > condition with driver_override") and 9561475db680f714 ("PCI: Fix race > condition with driver_override"). > > Fixes: 3cf385713460eb2b ("ARM: 8256/1: driver coamba: add device binding path > 'driver_override'") > Signed-off-by: Geert Uytterhoeven > --- > drivers/amba/bus.c | 11 +-- > 1 file changed, 9 insertions(+), 2 deletions(-) > > diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c > index 6ffd778352e6d953..36c5653ced5742b7 100644 > --- a/drivers/amba/bus.c > +++ b/drivers/amba/bus.c > @@ -69,8 +69,12 @@ static ssize_t driver_override_show(struct device *_dev, > struct device_attribute *attr, char *buf) > { > struct amba_device *dev = to_amba_device(_dev); > + ssize_t len; > > - return sprintf(buf, "%s\n", dev->driver_override); > + device_lock(_dev); > + len = sprintf(buf, "%s\n", dev->driver_override); > + device_unlock(_dev); > + return len; > } > > static ssize_t driver_override_store(struct device *_dev, > @@ -78,7 +82,7 @@ static ssize_t driver_override_store(struct device *_dev, > const char *buf, size_t count) > { > struct amba_device *dev = to_amba_device(_dev); > - char *driver_override, *old = dev->driver_override, *cp; > + char *driver_override, *old, *cp; > > if (count > PATH_MAX) > return -EINVAL; > @@ -91,12 +95,15 @@ static ssize_t driver_override_store(struct device *_dev, > if (cp) > *cp = '\0'; > > + device_lock(_dev); > + old = dev->driver_override; > if (strlen(driver_override)) { > dev->driver_override = driver_override; > } else { >kfree(driver_override); >dev->driver_override = NULL; > } > + device_unlock(_dev); > > kfree(old); > > -- > 2.7.4 >
Re: [PATCH v2 3/4] ARM: amba: Don't read past the end of sysfs "driver_override" buffer
Reviewed-by: Todd Kjos On Tue, Apr 10, 2018 at 6:21 AM, Geert Uytterhoeven wrote: > When printing the driver_override parameter when it is 4095 and 4094 > bytes long, the printing code would access invalid memory because we > need count + 1 bytes for printing. > > Cfr. commits 4efe874aace57dba ("PCI: Don't read past the end of sysfs > "driver_override" buffer") and bf563b01c2895a4b ("driver core: platform: > Don't read past the end of "driver_override" buffer"). > > Fixes: 3cf385713460eb2b ("ARM: 8256/1: driver coamba: add device binding path > 'driver_override'") > Signed-off-by: Geert Uytterhoeven > --- > drivers/amba/bus.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c > index 36c5653ced5742b7..4a3ac31c07d0ee49 100644 > --- a/drivers/amba/bus.c > +++ b/drivers/amba/bus.c > @@ -84,7 +84,8 @@ static ssize_t driver_override_store(struct device *_dev, > struct amba_device *dev = to_amba_device(_dev); > char *driver_override, *old, *cp; > > - if (count > PATH_MAX) > + /* We need to keep extra room for a newline */ > + if (count >= (PAGE_SIZE - 1)) > return -EINVAL; > > driver_override = kstrndup(buf, count, GFP_KERNEL); > -- > 2.7.4 >
Re: [PATCH v2 4/4] ARM: amba: Fix wrong indentation in driver_override_store()
Reviewed-by: Todd Kjos On Tue, Apr 10, 2018 at 6:21 AM, Geert Uytterhoeven wrote: > Indentation is one TAB and 7 spaces instead of 2 TABs. > > Fixes: 3cf385713460eb2b ("ARM: 8256/1: driver coamba: add device binding path > 'driver_override'") > Signed-off-by: Geert Uytterhoeven > --- > drivers/amba/bus.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c > index 4a3ac31c07d0ee49..842314a439fdd4eb 100644 > --- a/drivers/amba/bus.c > +++ b/drivers/amba/bus.c > @@ -101,8 +101,8 @@ static ssize_t driver_override_store(struct device *_dev, > if (strlen(driver_override)) { > dev->driver_override = driver_override; > } else { > - kfree(driver_override); > - dev->driver_override = NULL; > + kfree(driver_override); > + dev->driver_override = NULL; > } > device_unlock(_dev); > > -- > 2.7.4 >
[PATCH] binder: fix race between munmap() and direct reclaim
An munmap() on a binder device causes binder_vma_close() to be called which clears the alloc->vma pointer. If direct reclaim causes binder_alloc_free_page() to be called, there is a race where alloc->vma is read into a local vma pointer and then used later after the mm->mmap_sem is acquired. This can result in calling zap_page_range() with an invalid vma which manifests as a use-after-free in zap_page_range(). The fix is to check alloc->vma after acquiring the mmap_sem (which we were acquiring anyway) and skip zap_page_range() if it has changed to NULL. Signed-off-by: Todd Kjos --- drivers/android/binder_alloc.c | 18 -- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 6389467670a0b..195f120c4e8c9 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -927,14 +927,13 @@ enum lru_status binder_alloc_free_page(struct list_head *item, index = page - alloc->pages; page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE; + + mm = alloc->vma_vm_mm; + if (!mmget_not_zero(mm)) + goto err_mmget; + if (!down_write_trylock(&mm->mmap_sem)) + goto err_down_write_mmap_sem_failed; vma = binder_alloc_get_vma(alloc); - if (vma) { - if (!mmget_not_zero(alloc->vma_vm_mm)) - goto err_mmget; - mm = alloc->vma_vm_mm; - if (!down_read_trylock(&mm->mmap_sem)) - goto err_down_write_mmap_sem_failed; - } list_lru_isolate(lru, item); spin_unlock(lock); @@ -945,10 +944,9 @@ enum lru_status binder_alloc_free_page(struct list_head *item, zap_page_range(vma, page_addr, PAGE_SIZE); trace_binder_unmap_user_end(alloc, index); - - up_read(&mm->mmap_sem); - mmput(mm); } + up_write(&mm->mmap_sem); + mmput(mm); trace_binder_unmap_kernel_start(alloc, index); -- 2.21.0.352.gf09ad66450-goog
Re: [PATCH] binder: fix race between munmap() and direct reclaim
On Fri, Mar 1, 2019 at 11:57 PM Greg KH wrote: > > On Fri, Mar 01, 2019 at 03:06:06PM -0800, Todd Kjos wrote: > > An munmap() on a binder device causes binder_vma_close() to be called > > which clears the alloc->vma pointer. > > > > If direct reclaim causes binder_alloc_free_page() to be called, there > > is a race where alloc->vma is read into a local vma pointer and then > > used later after the mm->mmap_sem is acquired. This can result in > > calling zap_page_range() with an invalid vma which manifests as a > > use-after-free in zap_page_range(). > > > > The fix is to check alloc->vma after acquiring the mmap_sem (which we > > were acquiring anyway) and skip zap_page_range() if it has changed > > to NULL. > > > > Signed-off-by: Todd Kjos > > --- > > Any specific commit that this fixes? No, it's been there a long time. > And should it be marked for stable releases? It is needed in stable (back to 4.4), but will need to be backported. Should I post backported versions targeting the specific releases now? I was thinking we'd wait for this one to land. I think we'll need 1 patch for 4.4/4.9 and a second one for 4.14/4.19 (and some of those backported patches will have conflicts when merged down to android-4.X -- I think the 4.14/4.19 version will apply to all the android branches). Let me know how you want to handle this. > > thanks, > > greg k-h
[PATCH] binder: check for overflow when alloc for security context
When allocating space in the target buffer for the security context, make sure the extra_buffers_size doesn't overflow. This can only happen if the given size is invalid, but an overflow can turn it into a valid size. Fail the transaction if an overflow is detected. Signed-off-by: Todd Kjos --- drivers/android/binder.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 4b9c7ca492e6d..6f0712f0767c5 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3121,6 +3121,7 @@ static void binder_transaction(struct binder_proc *proc, if (target_node && target_node->txn_security_ctx) { u32 secid; + size_t added_size; security_task_getsecid(proc->tsk, &secid); ret = security_secid_to_secctx(secid, &secctx, &secctx_sz); @@ -3130,7 +3131,15 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_get_secctx_failed; } - extra_buffers_size += ALIGN(secctx_sz, sizeof(u64)); + added_size = ALIGN(secctx_sz, sizeof(u64)); + extra_buffers_size += added_size; + if (extra_buffers_size < added_size) { + /* integer overflow of extra_buffers_size */ + return_error = BR_FAILED_REPLY; + return_error_param = EINVAL; + return_error_line = __LINE__; + goto err_bad_extra_size; + } } trace_binder_transaction(reply, t, target_node); @@ -3480,6 +3489,7 @@ static void binder_transaction(struct binder_proc *proc, t->buffer->transaction = NULL; binder_alloc_free_buf(&target_proc->alloc, t->buffer); err_binder_alloc_buf_failed: +err_bad_extra_size: if (secctx) security_release_secctx(secctx, secctx_sz); err_get_secctx_failed: -- 2.21.0.593.g511ec345e18-goog
Re: [PATCH v1] binderfs: remove separate device_initcall()
On Wed, Jan 30, 2019 at 4:25 PM Christian Brauner wrote: > > binderfs should not have a separate device_initcall(). When a kernel is > compiled with CONFIG_ANDROID_BINDERFS register the filesystem alongside > CONFIG_ANDROID_IPC. This use-case is especially sensible when users specify > CONFIG_ANDROID_IPC=y, CONFIG_ANDROID_BINDERFS=y and > ANDROID_BINDER_DEVICES="". > When CONFIG_ANDROID_BINDERFS=n then this always succeeds so there's no > regression potential for legacy workloads. > > Signed-off-by: Christian Brauner Acked-by: Todd Kjos > > --- > /* Changelog */ > - ensure that device_name is set to NULL so kfree() doesn't freak out > --- > drivers/android/binder.c | 7 ++- > drivers/android/binder_internal.h | 9 + > drivers/android/binderfs.c| 4 +--- > 3 files changed, 16 insertions(+), 4 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index 57cf259de600..4d2b2ad1ee0e 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -5854,9 +5854,10 @@ static int __init init_binder_device(const char *name) > static int __init binder_init(void) > { > int ret; > - char *device_name, *device_names, *device_tmp; > + char *device_name, *device_tmp; > struct binder_device *device; > struct hlist_node *tmp; > + char *device_names = NULL; > > ret = binder_alloc_shrinker_init(); > if (ret) > @@ -5917,6 +5918,10 @@ static int __init binder_init(void) > } > } > > + ret = init_binderfs(); > + if (ret) > + goto err_init_binder_device_failed; > + > return ret; > > err_init_binder_device_failed: > diff --git a/drivers/android/binder_internal.h > b/drivers/android/binder_internal.h > index 7fb97f503ef2..045b3e42d98b 100644 > --- a/drivers/android/binder_internal.h > +++ b/drivers/android/binder_internal.h > @@ -46,4 +46,13 @@ static inline bool is_binderfs_device(const struct inode > *inode) > } > #endif > > +#ifdef CONFIG_ANDROID_BINDERFS > +extern int __init init_binderfs(void); > +#else > +static inline int __init init_binderfs(void) > +{ > + return 0; > +} > +#endif > + > #endif /* _LINUX_BINDER_INTERNAL_H */ > diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c > index 7a550104a722..e773f45d19d9 100644 > --- a/drivers/android/binderfs.c > +++ b/drivers/android/binderfs.c > @@ -550,7 +550,7 @@ static struct file_system_type binder_fs_type = { > .fs_flags = FS_USERNS_MOUNT, > }; > > -static int __init init_binderfs(void) > +int __init init_binderfs(void) > { > int ret; > > @@ -568,5 +568,3 @@ static int __init init_binderfs(void) > > return ret; > } > - > -device_initcall(init_binderfs); > -- > 2.20.1 >
Re: [PATCH] binder: fix CONFIG_ANDROID_BINDER_DEVICES
On Sat, Jan 26, 2019 at 2:23 AM Christian Brauner wrote: > > Several users have tried to only rely on binderfs to provide binder devices > and set CONFIG_ANDROID_BINDER_DEVICES="" empty. This is a great use-case of > binderfs and one that was always intended to work. However, this is > currently not possible since setting CONFIG_ANDROID_BINDER_DEVICES="" emtpy > will simply panic the kernel: > > kobject: (028c2f79): attempted to be registered with empty name! > WARNING: CPU: 7 PID: 1703 at lib/kobject.c:228 > kobject_add_internal+0x288/0x2b0 > Modules linked in: binder_linux(+) bridge stp llc ipmi_ssif gpio_ich dcdbas > coretemp kvm_intel kvm irqbypass serio_raw input_leds lpc_ich i5100_edac > mac_hid ipmi_si ipmi_devintf ipmi_msghandler sch_fq_codel ib_i > CPU: 7 PID: 1703 Comm: modprobe Not tainted 5.0.0-rc2-brauner-binderfs #263 > Hardware name: Dell DCS XS24-SC2 /XS24-SC2 , BIOS > S59_3C20 04/07/2011 > RIP: 0010:kobject_add_internal+0x288/0x2b0 > Code: 12 95 48 c7 c7 78 63 3b 95 e8 77 35 71 ff e9 91 fe ff ff 0f 0b eb a7 0f > 0b eb 9a 48 89 de 48 c7 c7 00 63 3b 95 e8 f8 95 6a ff <0f> 0b 41 bc ea ff ff > ff e9 6d fe ff ff 41 bc fe ff ff ff e9 62 fe > RSP: 0018:973f84237a30 EFLAGS: 00010282 > RAX: RBX: 8b53e2472010 RCX: 0006 > RDX: 0007 RSI: 0086 RDI: 8b53edbd63a0 > RBP: 973f84237a60 R08: 0342 R09: 0004 > R10: 973f84237af0 R11: 0001 R12: > R13: 8b53e9f1a1e0 R14: e9f1a1e0 R15: 00a00037 > FS: 7fbac36f7540() GS:8b53edbc() knlGS: > CS: 0010 DS: ES: CR0: 80050033 > CR2: 7fbac364cfa7 CR3: 0004a6d48000 CR4: 000406e0 > Call Trace: > kobject_add+0x71/0xd0 > ? _cond_resched+0x19/0x40 > ? mutex_lock+0x12/0x40 > device_add+0x12e/0x6b0 > device_create_groups_vargs+0xe4/0xf0 > device_create_with_groups+0x3f/0x60 > ? _cond_resched+0x19/0x40 > misc_register+0x140/0x180 > binder_init+0x1ed/0x2d4 [binder_linux] > ? trace_event_define_fields_binder_transaction_fd_send+0x8e/0x8e > [binder_linux] > do_one_initcall+0x4a/0x1c9 > ? _cond_resched+0x19/0x40 > ? kmem_cache_alloc_trace+0x151/0x1c0 > do_init_module+0x5f/0x216 > load_module+0x223d/0x2b20 > __do_sys_finit_module+0xfc/0x120 > ? __do_sys_finit_module+0xfc/0x120 > __x64_sys_finit_module+0x1a/0x20 > do_syscall_64+0x5a/0x120 > entry_SYSCALL_64_after_hwframe+0x44/0xa9 > RIP: 0033:0x7fbac3202839 > Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 > 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 > 01 c3 48 8b 0d 1f f6 2c 00 f7 d8 64 89 01 48 > RSP: 002b:7ffd1494a908 EFLAGS: 0246 ORIG_RAX: 0139 > RAX: ffda RBX: 55b629ebec60 RCX: 7fbac3202839 > RDX: RSI: 55b629c20d2e RDI: 0003 > RBP: 55b629c20d2e R08: R09: 55b629ec2310 > R10: 0003 R11: 0246 R12: > R13: 55b629ebed70 R14: 0004 R15: 55b629ebec60 > > So check for the empty string since strsep() will otherwise return the > emtpy string which will cause kobject_add_internal() to panic when trying > to add a kobject with an emtpy name. > > Fixes: ac4812c5ffbb ("binder: Support multiple /dev instances") > Cc: Martijn Coenen > Signed-off-by: Christian Brauner Acked-by: Todd Kjos > --- > drivers/android/binder.c | 30 -- > 1 file changed, 16 insertions(+), 14 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index 751d76173f81..5f7d6fe06eec 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -5898,21 +5898,23 @@ static int __init binder_init(void) > &transaction_log_fops); > } > > - /* > -* Copy the module_parameter string, because we don't want to > -* tokenize it in-place. > -*/ > - device_names = kstrdup(binder_devices_param, GFP_KERNEL); > - if (!device_names) { > - ret = -ENOMEM; > - goto err_alloc_device_names_failed; > - } > + if (strcmp(binder_devices_param, "") != 0) { > + /* > + * Copy the module_parameter string, because we don't want to > + * tokenize it in-place. > +*/ > + device_names = kstrdup(binder_devices_param, GFP_KERNEL); > + if (!device_names) { > +
Re: [PATCH v6 2/3] binder: add trace at free transaction.
On Mon, Jul 27, 2020 at 8:28 PM Frankie Chang wrote: > > From: "Frankie.Chang" > > Since the original trace_binder_transaction_received cannot > precisely present the real finished time of transaction, adding a > trace_binder_txn_latency_free at the point of free transaction > may be more close to it. > > Signed-off-by: Frankie.Chang > --- > drivers/android/binder.c |6 ++ > drivers/android/binder_trace.h | 27 +++ > 2 files changed, 33 insertions(+) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index 2df146f..1e6fc40 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -1522,6 +1522,9 @@ static void binder_free_transaction(struct > binder_transaction *t) > * If the transaction has no target_proc, then > * t->buffer->transaction has already been cleared. > */ > + spin_lock(&t->lock); > + trace_binder_txn_latency_free(t); > + spin_unlock(&t->lock); Hmm. I don't prefer taking the lock just to call a trace. It doesn't make clear why the lock has to be taken. I'd prefer something like: if (trace_binder_txn_latency_free_enabled()) { int to_proc, to_thread; spin_lock(&t->lock); to_proc = t->to_proc ? t->to_proc->pid : 0; to_thread = t->to_thread ? t->to_thread->pid : 0; spin_unlock(&t->lock); trace_binder_txn_latency_free(t, to_proc, to_pid); } And then the trace would use the passed-in values instead of accessing via t->to_proc/to_thread. > binder_free_txn_fixups(t); > kfree(t); > binder_stats_deleted(BINDER_STAT_TRANSACTION); > @@ -3093,6 +3096,9 @@ static void binder_transaction(struct binder_proc *proc, > kfree(tcomplete); > binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); > err_alloc_tcomplete_failed: > + spin_lock(&t->lock); > + trace_binder_txn_latency_free(t); > + spin_unlock(&t->lock); > kfree(t); > binder_stats_deleted(BINDER_STAT_TRANSACTION); > err_alloc_t_failed: > diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h > index 6731c3c..8ac87d1 100644 > --- a/drivers/android/binder_trace.h > +++ b/drivers/android/binder_trace.h > @@ -95,6 +95,33 @@ > __entry->thread_todo) > ); > > +TRACE_EVENT(binder_txn_latency_free, > + TP_PROTO(struct binder_transaction *t), > + TP_ARGS(t), > + TP_STRUCT__entry( > + __field(int, debug_id) > + __field(int, from_proc) > + __field(int, from_thread) > + __field(int, to_proc) > + __field(int, to_thread) > + __field(unsigned int, code) > + __field(unsigned int, flags) > + ), > + TP_fast_assign( > + __entry->debug_id = t->debug_id; > + __entry->from_proc = t->from ? t->from->proc->pid : 0; > + __entry->from_thread = t->from ? t->from->pid : 0; > + __entry->to_proc = t->to_proc ? t->to_proc->pid : 0; > + __entry->to_thread = t->to_thread ? t->to_thread->pid : 0; > + __entry->code = t->code; > + __entry->flags = t->flags; > + ), > + TP_printk("transaction=%d from %d:%d to %d:%d flags=0x%x code=0x%x", > + __entry->debug_id, __entry->from_proc, __entry->from_thread, > + __entry->to_proc, __entry->to_thread, __entry->code, > + __entry->flags) > +); > + > TRACE_EVENT(binder_transaction, > TP_PROTO(bool reply, struct binder_transaction *t, > struct binder_node *target_node), > -- > 1.7.9.5
Re: [PATCH v6 2/3] binder: add trace at free transaction.
On Sun, Aug 2, 2020 at 8:11 PM Frankie Chang wrote: > > On Fri, 2020-07-31 at 11:50 -0700, Todd Kjos wrote: > > On Mon, Jul 27, 2020 at 8:28 PM Frankie Chang > > wrote: > > > > > > From: "Frankie.Chang" > > > > > > Since the original trace_binder_transaction_received cannot > > > precisely present the real finished time of transaction, adding a > > > trace_binder_txn_latency_free at the point of free transaction > > > may be more close to it. > > > > > > Signed-off-by: Frankie.Chang > > > --- > > > drivers/android/binder.c |6 ++ > > > drivers/android/binder_trace.h | 27 +++ > > > 2 files changed, 33 insertions(+) > > > > > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > > > index 2df146f..1e6fc40 100644 > > > --- a/drivers/android/binder.c > > > +++ b/drivers/android/binder.c > > > @@ -1522,6 +1522,9 @@ static void binder_free_transaction(struct > > > binder_transaction *t) > > > * If the transaction has no target_proc, then > > > * t->buffer->transaction has already been cleared. > > > */ > > > + spin_lock(&t->lock); > > > + trace_binder_txn_latency_free(t); > > > + spin_unlock(&t->lock); > > > > Hmm. I don't prefer taking the lock just to call a trace. It doesn't > > make clear why the lock has to be taken. I'd prefer something like: > > > > if (trace_binder_txn_latency_free_enabled()) { > c > > } > > > > And then the trace would use the passed-in values instead of accessing > > via t->to_proc/to_thread. > > > Then we still add lock protection in the hook function, when trace is > disable ? I don't understand... in the example I gave, the trace doesn't get called if disabled. What do you mean to "add lock protection when the trace is disabled()"? > > Or we also pass these to hook function, no matter the trace is enable or What do you mean by "hook" function? If something has attached to the trace, then xxx_enabled() will return true. > not.I think this way is more clear that the lock protects @from, > @to_proc and @to_thread.Then, there is no need to add the lock in hook > function. Why is it clearer (other than the fact that I missed including t->from under the lock)? > > int from_proc, from_thread, to_proc, to_thread; > > spin_lock(&t->lock); > from_proc = t->from ? t->from->proc->pid : 0; > from_thread = t->from ? t->from->pid :0; > to_proc = t->to_proc ? t->to_proc->pid : 0; > to_thread = t->to_thread ? t->to_thread->pid : 0; > spin_unlock(&t->lock); > trace_binder_txn_latency_free(t, from_proc, from_thread, to_proc, > to_pid); The main feedback is I'd like to see the fields dereferenced in the same context as the lock acquisition instead of acquiring the lock and calling the trace function, so this code would be fine. There will be very little contention for t->lock so using xxx_enabled() is optional. Since trace_binder_txn_latency_free() is called twice, it would make sense to have a helper function to do the above. > > > > binder_free_txn_fixups(t); > > > kfree(t); > > > binder_stats_deleted(BINDER_STAT_TRANSACTION); > > > @@ -3093,6 +3096,9 @@ static void binder_transaction(struct binder_proc > > > *proc, > > > kfree(tcomplete); > > > binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); > > > err_alloc_tcomplete_failed: > > > + spin_lock(&t->lock); > > > + trace_binder_txn_latency_free(t); > > > + spin_unlock(&t->lock); > > > kfree(t); > > > binder_stats_deleted(BINDER_STAT_TRANSACTION); > > > err_alloc_t_failed: > > > diff --git a/drivers/android/binder_trace.h > > > b/drivers/android/binder_trace.h > > > index 6731c3c..8ac87d1 100644 > > > --- a/drivers/android/binder_trace.h > > > +++ b/drivers/android/binder_trace.h > > > @@ -95,6 +95,33 @@ > > > __entry->thread_todo) > > > ); > > > > > > +TRACE_EVENT(binder_txn_latency_free, > > > + TP_PROTO(struct binder_transaction *t), > > > + TP_ARGS(t), > > > + TP_STRUCT__entry( > > > + __field(int, debug_id) > > > + __field(int, from_proc) > > > + __field(int, from_thread
Re: [PATCH v2] ANDROID: binder: print warnings when detecting oneway spamming.
On Thu, Aug 20, 2020 at 4:50 AM Martijn Coenen wrote: > > The most common cause of the binder transaction buffer filling up is a > client rapidly firing oneway transactions into a process, before it has > a chance to handle them. Yet the root cause of this is often hard to > debug, because either the system or the app will stop, and by that time > binder debug information we dump in bugreports is no longer relevant. > > This change warns as soon as a process dips below 80% of its oneway > space (less than 100kB available in the configuration), when any one > process is responsible for either more than 50 transactions, or more > than 50% of the oneway space. > > Signed-off-by: Martijn Coenen A few minor comment issues below. When resolved: Acked-by: Todd Kjos > --- > v2: fixed call-site in binder_alloc_selftest > > drivers/android/binder.c| 2 +- > drivers/android/binder_alloc.c | 49 +++-- > drivers/android/binder_alloc.h | 5 ++- > drivers/android/binder_alloc_selftest.c | 2 +- > 4 files changed, 52 insertions(+), 6 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index f936530a19b0..946332bc871a 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -3136,7 +3136,7 @@ static void binder_transaction(struct binder_proc *proc, > > t->buffer = binder_alloc_new_buf(&target_proc->alloc, tr->data_size, > tr->offsets_size, extra_buffers_size, > - !reply && (t->flags & TF_ONE_WAY)); > + !reply && (t->flags & TF_ONE_WAY), current->tgid); > if (IS_ERR(t->buffer)) { > /* > * -ESRCH indicates VMA cleared. The target is dying. > diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c > index 69609696a843..76e8e633dbd4 100644 > --- a/drivers/android/binder_alloc.c > +++ b/drivers/android/binder_alloc.c > @@ -338,12 +338,48 @@ static inline struct vm_area_struct > *binder_alloc_get_vma( > return vma; > } > > +static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid) > +{ > + /* > +* Find the amount and size of buffers allocated by the current > caller; > +* The idea is that once we cross the threshold, whoever is > responsible > +* for the low async space is likely to try to send another async txn, > +* and at some point we'll catch them in the act. This is more > efficient > +* than keeping a map per pid. > +*/ > + struct rb_node *n = alloc->free_buffers.rb_node; > + struct binder_buffer *buffer; > + size_t buffer_size; > + size_t total_alloc_size = 0; > + size_t num_buffers = 0; > + > + for (n = rb_first(&alloc->allocated_buffers); n != NULL; > +n = rb_next(n)) { > + buffer = rb_entry(n, struct binder_buffer, rb_node); > + if (buffer->pid != pid) > + continue; > + if (!buffer->async_transaction) > + continue; > + buffer_size = binder_alloc_buffer_size(alloc, buffer); > + total_alloc_size += buffer_size; > + num_buffers++; > + } > + > + // Warn if this pid has more than 50% of async space, or more than 50 > txns /* .. */ Folks might be confused by 50% being calculated as (alloc->buffer_size /4) which on the surface looks like 25%. Maybe either change the comment to "25% of buffer space" or point out that async space is 50% of the buffer. > + if (num_buffers > 50 || total_alloc_size > alloc->buffer_size / 4) { > + binder_alloc_debug(BINDER_DEBUG_USER_ERROR, > +"%d: pid %d spamming oneway? %zd buffers > allocated for a total size of %zd\n", > + alloc->pid, pid, num_buffers, total_alloc_size); > + } > +} > + > static struct binder_buffer *binder_alloc_new_buf_locked( > struct binder_alloc *alloc, > size_t data_size, > size_t offsets_size, > size_t extra_buffers_size, > - int is_async) > + int is_async, > + int pid) > { > struct rb_node *n = alloc->free_buffers.rb_node; > struct binder_buffer *buffer; > @@ -486,11 +522,16 @@ static struct binder_buffer > *binder_alloc_new_buf_locked( > buffer
Re: WARNING in binder_transaction_buffer_release (2)
On Thu, Aug 6, 2020 at 9:09 AM Jann Horn wrote: > > On Thu, Aug 6, 2020 at 1:19 PM syzbot > wrote: > > syzbot suspects this issue was fixed by commit: > > > > commit 4b836a1426cb0f1ef2a6e211d7e553221594f8fc > > Author: Jann Horn > > Date: Mon Jul 27 12:04:24 2020 + > > > > binder: Prevent context manager from incrementing ref 0 > [...] > > dashboard link: https://syzkaller.appspot.com/bug?extid=e113a0b970b7b3f394ba > [...] > > If the result looks correct, please mark the issue as fixed by replying > > with: > > > > #syz fix: binder: Prevent context manager from incrementing ref 0 > > I think this issue still exists, syzbot probably just hit it in a > weird way that doesn't work anymore. > > This warning: > > case BINDER_TYPE_FD: { > /* > * No need to close the file here since user-space > * closes it for for successfully delivered > * transactions. For transactions that weren't > * delivered, the new fd was never allocated so > * there is no need to close and the fput on the > * file is done when the transaction is torn > * down. > */ > WARN_ON(failed_at && > proc->tsk == current->group_leader); > } break; > > can be false-positive if the sender and recipient of the transaction > are associated with the same task_struct. But there isn't really any > reason why you wouldn't be able to have sender and recipient in the > same process, as long as the binder_proc is different. > (binder_transaction() has a weird check that refuses transactions to > handle 0 based on task_struct equality - which IMO doesn't really make > sense -, but transactions to other handles can happen just fine even > if both ends are in the same task_struct.) > > Maybe the best fix is just to rip out that WARN_ON()? Yes, probably so.
Re: [PATCH] binder: Remove bogus warning on failed same-process transaction
On Thu, Aug 6, 2020 at 9:54 AM Jann Horn wrote: > > While binder transactions with the same binder_proc as sender and recipient > are forbidden, transactions with the same task_struct as sender and > recipient are possible (even though currently there is a weird check in > binder_transaction() that rejects them in the target==0 case). > Therefore, task_struct identities can't be used to distinguish whether > the caller is running in the context of the sender or the recipient. > > Since I see no easy way to make this WARN_ON() useful and correct, let's > just remove it. > > Fixes: 44d8047f1d87 ("binder: use standard functions to allocate fds") > Reported-by: syzbot+e113a0b970b7b3f39...@syzkaller.appspotmail.com > Signed-off-by: Jann Horn Acked-by: Todd Kjos > --- > drivers/android/binder.c | 2 -- > 1 file changed, 2 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index f936530a19b0..5b0376344dbe 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -2344,8 +2344,6 @@ static void binder_transaction_buffer_release(struct > binder_proc *proc, > * file is done when the transaction is torn > * down. > */ > - WARN_ON(failed_at && > - proc->tsk == current->group_leader); > } break; > case BINDER_TYPE_PTR: > /* > > base-commit: 47ec5303d73ea344e84f46660fff693c57641386 > -- > 2.28.0.163.g6104cc2f0b6-goog >
Re: [PATCH] driver: staging: count ashmem_range into SLAB_RECLAIMBLE
+Hridya Valsaraju +Suren Baghdasaryan On Thu, Dec 17, 2020 at 11:48 PM Huangzhaoyang wrote: > > From: Zhaoyang Huang > > Add SLAB_RECLAIM_ACCOUNT to ashmem_range cache since it has registered > shrinker, which make memAvailable more presiced. > > Signed-off-by: Zhaoyang Huang Acked-by: Todd Kjos > --- > drivers/staging/android/ashmem.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/staging/android/ashmem.c > b/drivers/staging/android/ashmem.c > index 74d497d..b79301f 100644 > --- a/drivers/staging/android/ashmem.c > +++ b/drivers/staging/android/ashmem.c > @@ -893,7 +893,7 @@ static int __init ashmem_init(void) > > ashmem_range_cachep = kmem_cache_create("ashmem_range_cache", > sizeof(struct ashmem_range), > - 0, 0, NULL); > + 0, SLAB_RECLAIM_ACCOUNT, > NULL); > if (!ashmem_range_cachep) { > pr_err("failed to create slab cache\n"); > goto out_free1; > -- > 1.9.1 >
Re: [PATCH v3 0/3] Binder: Enable App Freezing Capability
On Wed, Mar 17, 2021 at 1:17 PM Jann Horn wrote: > > On Wed, Mar 17, 2021 at 7:00 PM Christian Brauner > wrote: > > On Mon, Mar 15, 2021 at 06:16:27PM -0700, Li Li wrote: > > > To improve the user experience when switching between recently used > > > applications, the background applications which are not currently needed > > > are cached in the memory. Normally, a well designed application will not > > > consume valuable CPU resources in the background. However, it's possible > > > some applications are not able or willing to behave as expected, wasting > > > energy even after being cached. > > > > > > It is a good idea to freeze those applications when they're only being > > > kept alive for the sake of faster startup and energy saving. These kernel > > > patches will provide the necessary infrastructure for user space framework > > > to freeze and thaw a cached process, check the current freezing status and > > > correctly deal with outstanding binder transactions to frozen processes. > > I just have some comments on the overall design: > > This seems a bit convoluted to me; and I'm not sure whether this is > really something the kernel should get involved in, or whether this > patchset is operating at the right layer. The issue is that there is lot's of per-process state in the binder driver that needs to be quiesced prior to freezing the process (using the standard freeze mechanism of Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst). That's all this series does... quiesces binder state prior to freeze and then re-enable transactions when the process is thawed. > > If there are non-binder threads that are misbehaving, could you > instead stop all those threads in pure userspace code (e.g. by sending > a thread-directed signal to all of them and letting the signal handler > sleep on a futex); and if the binder thread receives a transaction > that should be handled, wake up those threads again? It is not an issue of stopping threads. It's an issue of quiescing binder for a process so clients aren't blocked waiting for a response from a frozen process that may never handle the transaction. This series causes the soon-to-be-frozen process to reject new transactions and allows user-space to detect when the transactions have drained from the queues prior to freezing the process. > > Or alternatively you could detect that the application is being woken > up frequently even though it's supposed to be idle (e.g. using > information from procfs), and kill it since you consider it to be > misbehaving? > > Or if there are specific usage patterns you see frequently that you > consider to be wasting CPU resources (e.g. setting an interval timer > that fires in short intervals), you could try to delay such timers. > > > With your current approach, you're baking the assumption that all IPC > goes through binder into the kernel API; things like passing a file > descriptor to a pipe through binder or using shared futexes are no No, we're dealing with an issue that is particular to binder IPC when freezing a process. I suspect that other IPC mechanisms do not have this issue -- and if any do for Android, then they would need equivalent pre-freeze/post-freeze mechanisms. So far in the testing of freezing in Android R, there haven't been issues with pipes or futexes that required this kind of explicit quiescing (at least none that I know of -- dualli@, please comment if there have been these kinds of issues). > longer usable for cross-process communication without making more > kernel changes. I'm not sure whether that's a good idea. On top of > that, if you freeze a process while it is in the middle of some > operation, resources associated with the operation will probably stay > in use for quite some time; for example, if an app is in the middle of > downloading some data over HTTP, and you freeze it, this may cause the > TCP connection to remain active and consume resources for send/receive > buffers on both the device and the server.
Re: [PATCH v3 1/3] binder: BINDER_FREEZE ioctl
On Mon, Mar 15, 2021 at 6:16 PM Li Li wrote: > > From: Marco Ballesio > > Frozen tasks can't process binder transactions, so a way is required to > inform transmitting ends of communication failures due to the frozen > state of their receiving counterparts. Additionally, races are possible > between transitions to frozen state and binder transactions enqueued to > a specific process. > > Implement BINDER_FREEZE ioctl for user space to inform the binder driver > about the intention to freeze or unfreeze a process. When the ioctl is > called, block the caller until any pending binder transactions toward > the target process are flushed. Return an error to transactions to > processes marked as frozen. > > Signed-off-by: Marco Ballesio > Co-developed-by: Todd Kjos > Signed-off-by: Todd Kjos > Signed-off-by: Li Li For the series, you can add Acked-by: Todd Kjos > --- > drivers/android/binder.c| 139 ++-- > drivers/android/binder_internal.h | 12 +++ > include/uapi/linux/android/binder.h | 13 +++ > 3 files changed, 154 insertions(+), 10 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index c119736ca56a..b93ca53bb90f 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -1506,6 +1506,12 @@ static void binder_free_transaction(struct > binder_transaction *t) > > if (target_proc) { > binder_inner_proc_lock(target_proc); > + target_proc->outstanding_txns--; > + if (target_proc->outstanding_txns < 0) > + pr_warn("%s: Unexpected outstanding_txns %d\n", > + __func__, target_proc->outstanding_txns); > + if (!target_proc->outstanding_txns && target_proc->is_frozen) > + wake_up_interruptible_all(&target_proc->freeze_wait); > if (t->buffer) > t->buffer->transaction = NULL; > binder_inner_proc_unlock(target_proc); > @@ -2331,10 +2337,11 @@ static int binder_fixup_parent(struct > binder_transaction *t, > * If the @thread parameter is not NULL, the transaction is always queued > * to the waitlist of that specific thread. > * > - * Return: true if the transactions was successfully queued > - * false if the target process or thread is dead > + * Return: 0 if the transaction was successfully queued > + * BR_DEAD_REPLY if the target process or thread is dead > + * BR_FROZEN_REPLY if the target process or thread is frozen > */ > -static bool binder_proc_transaction(struct binder_transaction *t, > +static int binder_proc_transaction(struct binder_transaction *t, > struct binder_proc *proc, > struct binder_thread *thread) > { > @@ -2354,10 +2361,11 @@ static bool binder_proc_transaction(struct > binder_transaction *t, > > binder_inner_proc_lock(proc); > > - if (proc->is_dead || (thread && thread->is_dead)) { > + if ((proc->is_frozen && !oneway) || proc->is_dead || > + (thread && thread->is_dead)) { > binder_inner_proc_unlock(proc); > binder_node_unlock(node); > - return false; > + return proc->is_frozen ? BR_FROZEN_REPLY : BR_DEAD_REPLY; > } > > if (!thread && !pending_async) > @@ -2373,10 +2381,11 @@ static bool binder_proc_transaction(struct > binder_transaction *t, > if (!pending_async) > binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync > */); > > + proc->outstanding_txns++; > binder_inner_proc_unlock(proc); > binder_node_unlock(node); > > - return true; > + return 0; > } > > /** > @@ -3013,13 +3022,16 @@ static void binder_transaction(struct binder_proc > *proc, > if (reply) { > binder_enqueue_thread_work(thread, tcomplete); > binder_inner_proc_lock(target_proc); > - if (target_thread->is_dead) { > + if (target_thread->is_dead || target_proc->is_frozen) { > + return_error = target_thread->is_dead ? > + BR_DEAD_REPLY : BR_FROZEN_REPLY; > binder_inner_proc_unlock(target_proc); > goto err_dead_proc_or_thread; > } > BUG_ON(t->buffer->async_transaction != 0); > binder_pop_tr
Re: [PATCH 02/57] staging: android: ashmem: Supply description for 'new_range'
On Wed, Apr 14, 2021 at 11:11 AM Lee Jones wrote: > > Fixes the following W=1 kernel build warning(s): > > drivers/staging/android/ashmem.c:189: warning: Function parameter or member > 'new_range' not described in 'range_alloc' > > Cc: Greg Kroah-Hartman > Cc: "Arve Hjønnevåg" > Cc: Todd Kjos > Cc: Martijn Coenen > Cc: Joel Fernandes > Cc: Christian Brauner > Cc: Hridya Valsaraju > Cc: Suren Baghdasaryan > Cc: Robert Love > Cc: linux-stag...@lists.linux.dev > Signed-off-by: Lee Jones Acked-by: Todd Kjos > --- > drivers/staging/android/ashmem.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/staging/android/ashmem.c > b/drivers/staging/android/ashmem.c > index d66a64e42273a..8ee4320a5dc6d 100644 > --- a/drivers/staging/android/ashmem.c > +++ b/drivers/staging/android/ashmem.c > @@ -179,6 +179,7 @@ static inline void lru_del(struct ashmem_range *range) > * @purged: Initial purge status (ASMEM_NOT_PURGED or > ASHMEM_WAS_PURGED) > * @start:The starting page (inclusive) > * @end: The ending page (inclusive) > + * @new_range:The placeholder for the new range > * > * This function is protected by ashmem_mutex. > */ > -- > 2.27.0 >
Re: [PATCH v2] binder: tell userspace to dump current backtrace when detecting oneway spamming
On Thu, Apr 1, 2021 at 1:29 AM Hang Lu wrote: > > When async binder buffer got exhausted, some normal oneway transaction > will also be discarded and finally caused system/app stop. "...be discarded and may cause system or application failures" ? > By that time, > the binder debug information we dump may not relevant to the root cause. "may not be relevant" > And this issue is difficult to debug if without the backtrace of thread > sending spam. "...backtrace of the thread..." > > This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when detecting > oneway spamming, request to dump current backtrace. "to userspace when oneway spamming is detected > The detection will > happened only once when exceeding the threshold (target process dips "Oneway spamming will be reported only once..." > below 80% of its oneway space, and current process is responsible for > either more than 50 transactions, or more than 50% of the oneway space). > And the detection will restart when the async buffer has returned to a > healthy state. > > Signed-off-by: Hang Lu > --- > v2: make the detection on/off switch to be per-proc > > drivers/android/binder.c| 26 +++--- > drivers/android/binder_alloc.c | 15 --- > drivers/android/binder_alloc.h | 8 +++- > drivers/android/binder_internal.h | 4 > include/uapi/linux/android/binder.h | 8 > 5 files changed, 54 insertions(+), 7 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index c119736..93964d1 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -3007,7 +3007,10 @@ static void binder_transaction(struct binder_proc > *proc, > goto err_bad_object_type; > } > } > - tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; > + if (t->buffer->oneway_spam_suspect) > + tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT; > + else > + tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; > t->work.type = BINDER_WORK_TRANSACTION; > > if (reply) { > @@ -3875,9 +3878,14 @@ static int binder_thread_read(struct binder_proc *proc, > > binder_stat_br(proc, thread, cmd); > } break; > - case BINDER_WORK_TRANSACTION_COMPLETE: { > + case BINDER_WORK_TRANSACTION_COMPLETE: > + case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: { > + if (proc->oneway_spam_detection_enabled && > + w->type == > BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT) > + cmd = BR_ONEWAY_SPAM_SUSPECT; Doesn't "BR_ONEWAY_SPAM_SUSPECT" need to be added to binder_return_strings[]? > + else > + cmd = BR_TRANSACTION_COMPLETE; > binder_inner_proc_unlock(proc); > - cmd = BR_TRANSACTION_COMPLETE; > kfree(w); > > binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); > if (put_user(cmd, (uint32_t __user *)ptr)) > @@ -4727,6 +4735,18 @@ static long binder_ioctl(struct file *filp, unsigned > int cmd, unsigned long arg) > } > break; > } > + case BINDER_ENABLE_ONEWAY_SPAM_DETECTION: { > + uint32_t enable; > + > + if (copy_from_user(&enable, ubuf, sizeof(enable))) { > + ret = -EINVAL; > + goto err; > + } > + binder_inner_proc_lock(proc); > + proc->oneway_spam_detection_enabled = (bool)enable; > + binder_inner_proc_unlock(proc); > + break; > + } > default: > ret = -EINVAL; > goto err; > diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c > index 7caf74a..a09872b 100644 > --- a/drivers/android/binder_alloc.c > +++ b/drivers/android/binder_alloc.c > @@ -338,7 +338,7 @@ static inline struct vm_area_struct *binder_alloc_get_vma( > return vma; > } > > -static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid) > +static bool debug_low_async_space_locked(struct binder_alloc *alloc, int pid) > { > /* > * Find the amount and size of buffers allocated by the current > caller; > @@ -366,13 +366,19 @@ static void debug_low_async_space_locked(struct > binder_alloc *alloc, int pid) > > /* > * Warn if this pid has more than 50 transactions, or more than 50% of > -* async space (which is 25% of total buffer size). > +* async space (which is 25% of total buffer size). Oneway spam only > +* detect once when exceed the threshold. "Oneway spam is only detected when the threshold is exceeded" > */ > if (num_buff
Re: [PATCH 1/2] binder: fix the missing BR_FROZEN_REPLY in binder_return_strings
+Li Li On Fri, Apr 9, 2021 at 2:42 AM Hang Lu wrote: > > Add BR_FROZEN_REPLY in binder_return_strings to support stat function. > > Fixes: ae28c1be1e54 ("binder: BINDER_GET_FROZEN_INFO ioctl") > Signed-off-by: Hang Lu Acked-by: Todd Kjos > --- > drivers/android/binder.c | 3 ++- > drivers/android/binder_internal.h | 2 +- > 2 files changed, 3 insertions(+), 2 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index e1a484a..be34da3 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -5559,7 +5559,8 @@ static const char * const binder_return_strings[] = { > "BR_FINISHED", > "BR_DEAD_BINDER", > "BR_CLEAR_DEATH_NOTIFICATION_DONE", > - "BR_FAILED_REPLY" > + "BR_FAILED_REPLY", > + "BR_FROZEN_REPLY", > }; > > static const char * const binder_command_strings[] = { > diff --git a/drivers/android/binder_internal.h > b/drivers/android/binder_internal.h > index 2872a7d..a507166 100644 > --- a/drivers/android/binder_internal.h > +++ b/drivers/android/binder_internal.h > @@ -155,7 +155,7 @@ enum binder_stat_types { > }; > > struct binder_stats { > - atomic_t br[_IOC_NR(BR_FAILED_REPLY) + 1]; > + atomic_t br[_IOC_NR(BR_FROZEN_REPLY) + 1]; > atomic_t bc[_IOC_NR(BC_REPLY_SG) + 1]; > atomic_t obj_created[BINDER_STAT_COUNT]; > atomic_t obj_deleted[BINDER_STAT_COUNT]; > -- > The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, > a Linux Foundation Collaborative Project >
Re: [PATCH 2/2] binder: tell userspace to dump current backtrace when detected oneway spamming
On Fri, Apr 9, 2021 at 2:42 AM Hang Lu wrote: > > When async binder buffer got exhausted, some normal oneway transactions > will also be discarded and may cause system or application failures. By > that time, the binder debug information we dump may not be relevant to > the root cause. And this issue is difficult to debug if without the > backtrace of the thread sending spam. > > This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when oneway > spamming is detected, request to dump current backtrace. Oneway spamming > will be reported only once when exceeding the threshold (target process > dips below 80% of its oneway space, and current process is responsible for > either more than 50 transactions, or more than 50% of the oneway space). > And the detection will restart when the async buffer has returned to a > healthy state. > > Signed-off-by: Hang Lu Acked-by: Todd Kjos > --- > drivers/android/binder.c| 27 --- > drivers/android/binder_alloc.c | 15 --- > drivers/android/binder_alloc.h | 8 +++- > drivers/android/binder_internal.h | 6 +- > include/uapi/linux/android/binder.h | 8 > 5 files changed, 56 insertions(+), 8 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index be34da3..63d2c43 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -3020,7 +3020,10 @@ static void binder_transaction(struct binder_proc > *proc, > goto err_bad_object_type; > } > } > - tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; > + if (t->buffer->oneway_spam_suspect) > + tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT; > + else > + tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; > t->work.type = BINDER_WORK_TRANSACTION; > > if (reply) { > @@ -3893,9 +3896,14 @@ static int binder_thread_read(struct binder_proc *proc, > > binder_stat_br(proc, thread, cmd); > } break; > - case BINDER_WORK_TRANSACTION_COMPLETE: { > + case BINDER_WORK_TRANSACTION_COMPLETE: > + case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: { > + if (proc->oneway_spam_detection_enabled && > + w->type == > BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT) > + cmd = BR_ONEWAY_SPAM_SUSPECT; > + else > + cmd = BR_TRANSACTION_COMPLETE; > binder_inner_proc_unlock(proc); > - cmd = BR_TRANSACTION_COMPLETE; > kfree(w); > > binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); > if (put_user(cmd, (uint32_t __user *)ptr)) > @@ -4897,6 +4905,18 @@ static long binder_ioctl(struct file *filp, unsigned > int cmd, unsigned long arg) > } > break; > } > + case BINDER_ENABLE_ONEWAY_SPAM_DETECTION: { > + uint32_t enable; > + > + if (copy_from_user(&enable, ubuf, sizeof(enable))) { > + ret = -EINVAL; > + goto err; > + } > + binder_inner_proc_lock(proc); > + proc->oneway_spam_detection_enabled = (bool)enable; > + binder_inner_proc_unlock(proc); > + break; > + } > default: > ret = -EINVAL; > goto err; > @@ -5561,6 +5581,7 @@ static const char * const binder_return_strings[] = { > "BR_CLEAR_DEATH_NOTIFICATION_DONE", > "BR_FAILED_REPLY", > "BR_FROZEN_REPLY", > + "BR_ONEWAY_SPAM_SUSPECT", > }; > > static const char * const binder_command_strings[] = { > diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c > index 7caf74a..340515f 100644 > --- a/drivers/android/binder_alloc.c > +++ b/drivers/android/binder_alloc.c > @@ -338,7 +338,7 @@ static inline struct vm_area_struct *binder_alloc_get_vma( > return vma; > } > > -static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid) > +static bool debug_low_async_space_locked(struct binder_alloc *alloc, int pid) > { > /* > * Find the amount and size of buffers allocated by the current > caller; > @@ -366,13 +366,19 @@ static void debug_low_async_space_locked(struct > binder_alloc *alloc, int
Re: [PATCH v2 1/3] binder: BINDER_FREEZE ioctl
On Thu, Mar 11, 2021 at 10:46 AM Li Li wrote: > > From: Marco Ballesio > > Frozen tasks can't process binder transactions, so a way is required to > inform transmitting ends of communication failures due to the frozen > state of their receiving counterparts. Additionally, races are possible > between transitions to frozen state and binder transactions enqueued to > a specific process. > > Implement BINDER_FREEZE ioctl for user space to inform the binder driver > about the intention to freeze or unfreeze a process. When the ioctl is > called, block the caller until any pending binder transactions toward > the target process are flushed. Return an error to transactions to > processes marked as frozen. > > Signed-off-by: Marco Ballesio > Co-developed-by: Todd Kjos > Signed-off-by: Todd Kjos > Signed-off-by: Li Li > --- > drivers/android/binder.c| 141 ++-- > drivers/android/binder_internal.h | 12 +++ > include/uapi/linux/android/binder.h | 13 +++ > 3 files changed, 156 insertions(+), 10 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index c119736ca56a..76ea558df03e 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -1506,6 +1506,12 @@ static void binder_free_transaction(struct > binder_transaction *t) > > if (target_proc) { > binder_inner_proc_lock(target_proc); > + target_proc->outstanding_txns--; > + if (target_proc->outstanding_txns < 0) > + pr_warn("%s: Unexpected outstanding_txns %d\n", > + __func__, target_proc->outstanding_txns); Shouldn't this be something like "outstanding_txns is negative"? If we ever see one of these, is this enough information to be useful? Should we at least print the target proc's pid so someone can figure out what process had the messed up count? > + if (!target_proc->outstanding_txns && target_proc->is_frozen) > + wake_up_interruptible_all(&target_proc->freeze_wait); > if (t->buffer) > t->buffer->transaction = NULL; > binder_inner_proc_unlock(target_proc); > @@ -2331,10 +2337,11 @@ static int binder_fixup_parent(struct > binder_transaction *t, > * If the @thread parameter is not NULL, the transaction is always queued > * to the waitlist of that specific thread. > * > - * Return: true if the transactions was successfully queued > - * false if the target process or thread is dead > + * Return: 0 if the transaction was successfully queued > + * BR_DEAD_REPLY if the target process or thread is dead > + * BR_FROZEN_REPLY if the target process or thread is frozen > */ > -static bool binder_proc_transaction(struct binder_transaction *t, > +static int binder_proc_transaction(struct binder_transaction *t, > struct binder_proc *proc, > struct binder_thread *thread) > { > @@ -2354,10 +2361,13 @@ static bool binder_proc_transaction(struct > binder_transaction *t, > > binder_inner_proc_lock(proc); > > - if (proc->is_dead || (thread && thread->is_dead)) { > + if ((proc->is_frozen && !oneway) || proc->is_dead || > + (thread && thread->is_dead)) { > + bool proc_is_dead = proc->is_dead > + || (thread && thread->is_dead); > binder_inner_proc_unlock(proc); > binder_node_unlock(node); > - return false; > + return proc_is_dead ? BR_DEAD_REPLY : BR_FROZEN_REPLY; Do we need the proc_is_dead local? This could be: return proc->is_frozen ? BR_FROZEN_REPLY : BR_DEAD_REPLY > } > > if (!thread && !pending_async) > @@ -2373,10 +2383,11 @@ static bool binder_proc_transaction(struct > binder_transaction *t, > if (!pending_async) > binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync > */); > > + proc->outstanding_txns++; > binder_inner_proc_unlock(proc); > binder_node_unlock(node); > > - return true; > + return 0; > } > > /** > @@ -3013,13 +3024,16 @@ static void binder_transaction(struct binder_proc > *proc, > if (reply) { > binder_enqueue_thread_work(thread, tcomplete); > binder_inner_proc_lock(target_proc); > - if (target_thread->is_dead) { > +
Re: [PATCH v2 2/3] binder: use EINTR for interrupted wait for work
On Thu, Mar 11, 2021 at 10:46 AM Li Li wrote: > > From: Marco Ballesio > > when interrupted by a signal, binder_wait_for_work currently returns > -ERESTARTSYS. This error code isn't propagated to user space, but a way > to handle interruption due to signals must be provided to code using > this API. > > Replace this instance of -ERESTARTSYS with -EINTR, which is propagated > to user space. > > Test: built, booted, interrupted a worker thread within > binder_wait_for_work > Signed-off-by: Marco Ballesio > Signed-off-by: Li Li Acked-by: Todd Kjos > --- > drivers/android/binder.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index 76ea558df03e..38bbf9a4ce99 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -3712,7 +3712,7 @@ static int binder_wait_for_work(struct binder_thread > *thread, > binder_inner_proc_lock(proc); > list_del_init(&thread->waiting_thread_node); > if (signal_pending(current)) { > - ret = -ERESTARTSYS; > + ret = -EINTR; > break; > } > } > @@ -4855,7 +4855,7 @@ static long binder_ioctl(struct file *filp, unsigned > int cmd, unsigned long arg) > if (thread) > thread->looper_need_return = false; > wait_event_interruptible(binder_user_error_wait, > binder_stop_on_user_error < 2); > - if (ret && ret != -ERESTARTSYS) > + if (ret && ret != -EINTR) > pr_info("%d:%d ioctl %x %lx returned %d\n", proc->pid, > current->pid, cmd, arg, ret); > err_unlocked: > trace_binder_ioctl_done(ret); > -- > 2.31.0.rc2.261.g7f71774620-goog >
Re: [PATCH v2 3/3] binder: BINDER_GET_FROZEN_INFO ioctl
On Thu, Mar 11, 2021 at 10:46 AM Li Li wrote: > > From: Marco Ballesio > > User space needs to know if binder transactions occurred to frozen > processes. Introduce a new BINDER_GET_FROZEN ioctl and keep track of > transactions occurring to frozen proceses. > > Signed-off-by: Marco Ballesio > Signed-off-by: Li Li Acked-by: Todd Kjos > --- > drivers/android/binder.c| 55 + > drivers/android/binder_internal.h | 6 > include/uapi/linux/android/binder.h | 7 > 3 files changed, 68 insertions(+) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index 38bbf9a4ce99..b4999ed04b2e 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -2360,6 +2360,10 @@ static int binder_proc_transaction(struct > binder_transaction *t, > } > > binder_inner_proc_lock(proc); > + if (proc->is_frozen) { > + proc->sync_recv |= !oneway; > + proc->async_recv |= oneway; > + } > > if ((proc->is_frozen && !oneway) || proc->is_dead || > (thread && thread->is_dead)) { > @@ -4636,6 +4640,8 @@ static int binder_ioctl_freeze(struct > binder_freeze_info *info, > > if (!info->enable) { > binder_inner_proc_lock(target_proc); > + target_proc->sync_recv = false; > + target_proc->async_recv = false; > target_proc->is_frozen = false; > binder_inner_proc_unlock(target_proc); > return 0; > @@ -4647,6 +4653,8 @@ static int binder_ioctl_freeze(struct > binder_freeze_info *info, > * for transactions to drain. > */ > binder_inner_proc_lock(target_proc); > + target_proc->sync_recv = false; > + target_proc->async_recv = false; > target_proc->is_frozen = true; > binder_inner_proc_unlock(target_proc); > > @@ -4668,6 +4676,33 @@ static int binder_ioctl_freeze(struct > binder_freeze_info *info, > return ret; > } > > +static int binder_ioctl_get_freezer_info( > + struct binder_frozen_status_info *info) > +{ > + struct binder_proc *target_proc; > + bool found = false; > + > + info->sync_recv = 0; > + info->async_recv = 0; > + > + mutex_lock(&binder_procs_lock); > + hlist_for_each_entry(target_proc, &binder_procs, proc_node) { > + if (target_proc->pid == info->pid) { > + found = true; > + binder_inner_proc_lock(target_proc); > + info->sync_recv |= target_proc->sync_recv; > + info->async_recv |= target_proc->async_recv; > + binder_inner_proc_unlock(target_proc); > + } > + } > + mutex_unlock(&binder_procs_lock); > + > + if (!found) > + return -EINVAL; > + > + return 0; > +} > + > static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long > arg) > { > int ret; > @@ -4846,6 +4881,24 @@ static long binder_ioctl(struct file *filp, unsigned > int cmd, unsigned long arg) > goto err; > break; > } > + case BINDER_GET_FROZEN_INFO: { > + struct binder_frozen_status_info info; > + > + if (copy_from_user(&info, ubuf, sizeof(info))) { > + ret = -EFAULT; > + goto err; > + } > + > + ret = binder_ioctl_get_freezer_info(&info); > + if (ret < 0) > + goto err; > + > + if (copy_to_user(ubuf, &info, sizeof(info))) { > + ret = -EFAULT; > + goto err; > + } > + break; > + } > default: > ret = -EINVAL; > goto err; > @@ -5156,6 +5209,8 @@ static void binder_deferred_release(struct binder_proc > *proc) > > proc->is_dead = true; > proc->is_frozen = false; > + proc->sync_recv = false; > + proc->async_recv = false; > threads = 0; > active_transactions = 0; > while ((n = rb_first(&proc->threads))) { > diff --git a/drivers/android/binder_internal.h > b/drivers/android/binder_internal.h > index e6a53e98c6da..2872a7de68e1 100644 > --- a/drivers/android/binder_internal.h > +++ b/drivers/android/binder_
[PATCH] binder: add flag to clear buffer on txn complete
Add a per-transaction flag to indicate that the buffer must be cleared when the transaction is complete to prevent copies of sensitive data from being preserved in memory. Signed-off-by: Todd Kjos --- drivers/android/binder.c| 1 + drivers/android/binder_alloc.c | 48 + drivers/android/binder_alloc.h | 4 ++- include/uapi/linux/android/binder.h | 1 + 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index b5117576792b..2a3952925855 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3146,6 +3146,7 @@ static void binder_transaction(struct binder_proc *proc, t->buffer->debug_id = t->debug_id; t->buffer->transaction = t; t->buffer->target_node = target_node; + t->buffer->clear_on_free = !!(t->flags & TF_CLEAR_BUF); trace_binder_transaction_alloc_buf(t->buffer); if (binder_alloc_copy_user_to_buffer( diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 2f846b7ae8b8..7caf74ad2405 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -696,6 +696,8 @@ static void binder_free_buf_locked(struct binder_alloc *alloc, binder_insert_free_buffer(alloc, buffer); } +static void binder_alloc_clear_buf(struct binder_alloc *alloc, + struct binder_buffer *buffer); /** * binder_alloc_free_buf() - free a binder buffer * @alloc: binder_alloc for this proc @@ -706,6 +708,18 @@ static void binder_free_buf_locked(struct binder_alloc *alloc, void binder_alloc_free_buf(struct binder_alloc *alloc, struct binder_buffer *buffer) { + /* +* We could eliminate the call to binder_alloc_clear_buf() +* from binder_alloc_deferred_release() by moving this to +* binder_alloc_free_buf_locked(). However, that could +* increase contention for the alloc mutex if clear_on_free +* is used frequently for large buffers. The mutex is not +* needed for correctness here. +*/ + if (buffer->clear_on_free) { + binder_alloc_clear_buf(alloc, buffer); + buffer->clear_on_free = false; + } mutex_lock(&alloc->mutex); binder_free_buf_locked(alloc, buffer); mutex_unlock(&alloc->mutex); @@ -802,6 +816,10 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) /* Transaction should already have been freed */ BUG_ON(buffer->transaction); + if (buffer->clear_on_free) { + binder_alloc_clear_buf(alloc, buffer); + buffer->clear_on_free = false; + } binder_free_buf_locked(alloc, buffer); buffers++; } @@ -1135,6 +1153,36 @@ static struct page *binder_alloc_get_page(struct binder_alloc *alloc, return lru_page->page_ptr; } +/** + * binder_alloc_clear_buf() - zero out buffer + * @alloc: binder_alloc for this proc + * @buffer: binder buffer to be cleared + * + * memset the given buffer to 0 + */ +static void binder_alloc_clear_buf(struct binder_alloc *alloc, + struct binder_buffer *buffer) +{ + size_t bytes = binder_alloc_buffer_size(alloc, buffer); + binder_size_t buffer_offset = 0; + + while (bytes) { + unsigned long size; + struct page *page; + pgoff_t pgoff; + void *kptr; + + page = binder_alloc_get_page(alloc, buffer, +buffer_offset, &pgoff); + size = min_t(size_t, bytes, PAGE_SIZE - pgoff); + kptr = kmap(page) + pgoff; + memset(kptr, 0, size); + kunmap(page); + bytes -= size; + buffer_offset += size; + } +} + /** * binder_alloc_copy_user_to_buffer() - copy src user to tgt user * @alloc: binder_alloc for this proc diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index 55d8b4106766..6e8e001381af 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -23,6 +23,7 @@ struct binder_transaction; * @entry: entry alloc->buffers * @rb_node:node for allocated_buffers/free_buffers rb trees * @free: %true if buffer is free + * @clear_on_free: %true if buffer must be zeroed after use * @allow_user_free:%true if user is allowed to free buffer * @async_transaction: %true if buffer is in use for an async txn * @debug_id: unique ID for debugging @@ -41,9 +42,10 @@ struct binder_buffer { struct rb_node rb_node; /* free entry by size or allocated entry */ /* by address */
Re: [PATCH] binder: add flag to clear buffer on txn complete
On Fri, Nov 20, 2020 at 11:14 PM Greg KH wrote: > > On Fri, Nov 20, 2020 at 03:37:43PM -0800, Todd Kjos wrote: > > Add a per-transaction flag to indicate that the buffer > > must be cleared when the transaction is complete to > > prevent copies of sensitive data from being preserved > > in memory. > > > > Signed-off-by: Todd Kjos > > --- > > DOes this need to be backported to stable kernels as well? It doesn't technically qualify for stable since it is a new feature -- not a bug fix. We will want it for Android S launch devices (5.4+), so it would be handy to have it in 5.4.y and later. > > thanks, > > greg k-h > > -- > To unsubscribe from this group and stop receiving emails from it, send an > email to kernel-team+unsubscr...@android.com. >
Re: [PATCH v3 0/7] binder: eliminate use of vmalloc space for binder buffers
On Mon, Feb 11, 2019 at 8:57 AM Christoph Hellwig wrote: > > On Fri, Feb 08, 2019 at 10:35:13AM -0800, Todd Kjos wrote: > > Binder buffers have always been mapped into kernel space > > via map_kernel_range_noflush() to allow the binder driver > > to modify the buffer before posting to userspace for > > processing. > > > > In recent Android releases, the number of long-running > > binder processes has increased to the point that for > > 32-bit systems, there is a risk of running out of > > vmalloc space. > > > > This patch set removes the persistent mapping of the > > binder buffers into kernel space. Instead, the binder > > driver creates temporary mappings with kmap() or > > kmap_atomic() to copy to or from the buffer only when > > necessary. > > Is there any good reason to actually map the user memory to kernel > space instead of just using copy_{to,from}_user? Yes, the mappings are needed for cases where we are accessing binder buffers of the target while in sender context. For example, we copy the message from the sender to the target with 1 copy while in the sender's context. For this we use copy_from_user(), but use these temporary mappings for the destination (target process). -Todd
Re: [PATCH] staging: android: vsoc: Remove VSOC_WAIT_FOR_INCOMING_INTERRUPT
+Alistair Strachan On Mon, Feb 11, 2019 at 9:11 AM Greg KH wrote: > > On Mon, Feb 11, 2019 at 10:15:18PM +0530, Souptick Joarder wrote: > > On Mon, Feb 11, 2019 at 9:27 PM Greg KH wrote: > > > > > > On Mon, Feb 11, 2019 at 09:21:19PM +0530, Souptick Joarder wrote: > > > > On Mon, Feb 11, 2019 at 9:10 PM Greg KH > > > > wrote: > > > > > > > > > > On Mon, Feb 11, 2019 at 08:56:02PM +0530, Souptick Joarder wrote: > > > > > > As mentioned in TODO list, Removed VSOC_WAIT_FOR_INCOMING_INTERRUPT > > > > > > ioctl. This functionality has been superseded by the futex and is > > > > > > there for legacy reasons. > > > > > > > > > > > > Signed-off-by: Souptick Joarder > > > > > > --- > > > > > > drivers/staging/android/uapi/vsoc_shm.h | 7 --- > > > > > > drivers/staging/android/vsoc.c | 5 - > > > > > > 2 files changed, 12 deletions(-) > > > > > > > > > > So userspace is all fixed up now and this ioctl can be dropped? Any > > > > > pointers to the userspace commit that did this? > > > > > > > > I am not sure about user space part. > > > > > > Then we can not just delete the ioctl :) > > > > Agree, but where to verify the user space commit ? > > Any pointer to the source code path ? > > Please work with the android developers to solve this. It should be in > AOSP "somewhere" :( > > good luck, > > greg k-h
Re: [PATCH 3/4] binder: Make transaction_log available in binderfs
On Wed, Aug 28, 2019 at 5:59 AM Christian Brauner wrote: > > On Tue, Aug 27, 2019 at 01:41:51PM -0700, Hridya Valsaraju wrote: > > Currently, the binder transaction log files 'transaction_log' > > and 'failed_transaction_log' live in debugfs at the following locations: > > > > /sys/kernel/debug/binder/failed_transaction_log > > /sys/kernel/debug/binder/transaction_log > > > > This patch makes these files also available in a binderfs instance > > mounted with the mount option "stats=global". > > It does not affect the presence of these files in debugfs. > > If a binderfs instance is mounted at path /dev/binderfs, the location of > > these files will be as follows: > > > > /dev/binderfs/binder_logs/failed_transaction_log > > /dev/binderfs/binder_logs/transaction_log > > > > This change provides an alternate option to access these files when > > debugfs is not mounted. > > > > Signed-off-by: Hridya Valsaraju > > Acked-by: Christian Brauner Acked-by: Todd Kjos > > > --- > > drivers/android/binder.c | 34 +-- > > drivers/android/binder_internal.h | 30 +++ > > drivers/android/binderfs.c| 19 + > > 3 files changed, 54 insertions(+), 29 deletions(-) > > > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > > index de795bd229c4..bed217310197 100644 > > --- a/drivers/android/binder.c > > +++ b/drivers/android/binder.c > > @@ -197,30 +197,8 @@ static inline void binder_stats_created(enum > > binder_stat_types type) > > atomic_inc(&binder_stats.obj_created[type]); > > } > > > > -struct binder_transaction_log_entry { > > - int debug_id; > > - int debug_id_done; > > - int call_type; > > - int from_proc; > > - int from_thread; > > - int target_handle; > > - int to_proc; > > - int to_thread; > > - int to_node; > > - int data_size; > > - int offsets_size; > > - int return_error_line; > > - uint32_t return_error; > > - uint32_t return_error_param; > > - const char *context_name; > > -}; > > -struct binder_transaction_log { > > - atomic_t cur; > > - bool full; > > - struct binder_transaction_log_entry entry[32]; > > -}; > > -static struct binder_transaction_log binder_transaction_log; > > -static struct binder_transaction_log binder_transaction_log_failed; > > +struct binder_transaction_log binder_transaction_log; > > +struct binder_transaction_log binder_transaction_log_failed; > > > > static struct binder_transaction_log_entry *binder_transaction_log_add( > > struct binder_transaction_log *log) > > @@ -6166,7 +6144,7 @@ static void print_binder_transaction_log_entry(struct > > seq_file *m, > > "\n" : " (incomplete)\n"); > > } > > > > -static int transaction_log_show(struct seq_file *m, void *unused) > > +int binder_transaction_log_show(struct seq_file *m, void *unused) > > { > > struct binder_transaction_log *log = m->private; > > unsigned int log_cur = atomic_read(&log->cur); > > @@ -6198,8 +6176,6 @@ const struct file_operations binder_fops = { > > .release = binder_release, > > }; > > > > -DEFINE_SHOW_ATTRIBUTE(transaction_log); > > - > > static int __init init_binder_device(const char *name) > > { > > int ret; > > @@ -6268,12 +6244,12 @@ static int __init binder_init(void) > > 0444, > > binder_debugfs_dir_entry_root, > > &binder_transaction_log, > > - &transaction_log_fops); > > + &binder_transaction_log_fops); > > debugfs_create_file("failed_transaction_log", > > 0444, > > binder_debugfs_dir_entry_root, > > &binder_transaction_log_failed, > > - &transaction_log_fops); > > + &binder_transaction_log_fops); > > } > > > > if (!IS_ENABLED(CONFIG_ANDROID_BINDERFS) && > > diff --git a/drivers/android/binder_internal.h > > b/drivers/android/binder_internal.h > > index 12ef96f256c6..b9be42d9464c 100644 > >
Re: [PATCH 4/4] binder: Add binder_proc logging to binderfs
On Wed, Aug 28, 2019 at 6:08 AM Christian Brauner wrote: > > On Tue, Aug 27, 2019 at 01:41:52PM -0700, Hridya Valsaraju wrote: > > Currently /sys/kernel/debug/binder/proc contains > > the debug data for every binder_proc instance. > > This patch makes this information also available > > in a binderfs instance mounted with a mount option > > "stats=global" in addition to debugfs. The patch does > > not affect the presence of the file in debugfs. > > > > If a binderfs instance is mounted at path /dev/binderfs, > > this file would be present at /dev/binderfs/binder_logs/proc. > > This change provides an alternate way to access this file when debugfs > > is not mounted. > > > > Signed-off-by: Hridya Valsaraju > > I'm still wondering whether there's a nicer way to create those debuf > files per-process without doing it in binder_open() but it has worked > fine for a long time with debugfs. > > Also, one minor question below. Otherwise > > Acked-by: Christian Brauner Acked-by: Todd Kjos > > > --- > > drivers/android/binder.c | 38 ++- > > drivers/android/binder_internal.h | 46 ++ > > drivers/android/binderfs.c| 63 ++- > > 3 files changed, 111 insertions(+), 36 deletions(-) > > > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > > index bed217310197..37189838f32a 100644 > > --- a/drivers/android/binder.c > > +++ b/drivers/android/binder.c > > @@ -481,6 +481,7 @@ struct binder_priority { > > * @inner_lock: can nest under outer_lock and/or node lock > > * @outer_lock: no nesting under innor or node lock > > *Lock order: 1) outer, 2) node, 3) inner > > + * @binderfs_entry: process-specific binderfs log file > > * > > * Bookkeeping structure for binder processes > > */ > > @@ -510,6 +511,7 @@ struct binder_proc { > > struct binder_context *context; > > spinlock_t inner_lock; > > spinlock_t outer_lock; > > + struct dentry *binderfs_entry; > > }; > > > > enum { > > @@ -5347,6 +5349,8 @@ static int binder_open(struct inode *nodp, struct > > file *filp) > > { > > struct binder_proc *proc; > > struct binder_device *binder_dev; > > + struct binderfs_info *info; > > + struct dentry *binder_binderfs_dir_entry_proc = NULL; > > > > binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__, > >current->group_leader->pid, current->pid); > > @@ -5368,11 +5372,14 @@ static int binder_open(struct inode *nodp, struct > > file *filp) > > } > > > > /* binderfs stashes devices in i_private */ > > - if (is_binderfs_device(nodp)) > > + if (is_binderfs_device(nodp)) { > > binder_dev = nodp->i_private; > > - else > > + info = nodp->i_sb->s_fs_info; > > + binder_binderfs_dir_entry_proc = info->proc_log_dir; > > + } else { > > binder_dev = container_of(filp->private_data, > > struct binder_device, miscdev); > > + } > > proc->context = &binder_dev->context; > > binder_alloc_init(&proc->alloc); > > > > @@ -5403,6 +5410,27 @@ static int binder_open(struct inode *nodp, struct > > file *filp) > > &proc_fops); > > } > > > > + if (binder_binderfs_dir_entry_proc) { > > + char strbuf[11]; > > + struct dentry *binderfs_entry; > > + > > + snprintf(strbuf, sizeof(strbuf), "%u", proc->pid); > > + /* > > + * Similar to debugfs, the process specific log file is shared > > + * between contexts. If the file has already been created for > > a > > + * process, the following binderfs_create_file() call will > > + * fail if another context of the same process invoked > > + * binder_open(). This is ok since same as debugfs, > > + * the log file will contain information on all contexts of a > > + * given PID. > > + */ > > + binderfs_entry = > > binderfs_create_file(binder_binderfs_dir_entry_proc, > > + strbuf, &proc_fops, (void *)(unsigned long)proc->pid); > > + if (!IS_ERR(binderfs_entry)) > >
Re: UAF read in print_binder_transaction_log_entry() on ANDROID_BINDERFS kernels
+Hridya Valsaraju On Mon, Oct 7, 2019 at 1:50 PM Jann Horn wrote: > > Hi! > > There is a use-after-free read in print_binder_transaction_log_entry() > on ANDROID_BINDERFS kernels because > print_binder_transaction_log_entry() prints the char* e->context_name > as string, and if the transaction occurred on a binder device from > binderfs, e->context_name belongs to the binder device and is freed > when the inode disappears. > > Luckily this shouldn't have security implications, since: > > a) reading the binder transaction log is already a pretty privileged operation > b) I can't find any actual users of ANDROID_BINDERFS > > I guess there are three ways to fix it: > 1) Create a new shared global spinlock for binderfs_evict_inode() and > binder_transaction_log_show(), and let binderfs_evict_inode() scan the > transaction log for pointers to its name and replace them with > pointers to a statically-allocated string "{DELETED}" or something > like that. > 2) Let the transaction log contain non-reusable device identifiers > instead of name pointers, and let print_binder_transaction_log_entry() > look them up in something like a hashtable. > 3) Just copy the name into the transaction log every time. > > I'm not sure which one is better, or whether there's a nicer fourth > option, so I'm leaving writing a patch for this to y'all. > > > Trigger instructions (requires you to have some helpers that can > register a context manager and send some transaction to it): > == > root@test:/home/user# mkdir /tmp/binder > root@test:/home/user# mount -t binder -o stats=global /dev/null /tmp/binder > root@test:/home/user# ls -l /tmp/binder > total 0 > crw--- 1 root root 248, 1 Oct 7 20:34 binder > crw--- 1 root root 248, 0 Oct 7 20:34 binder-control > drwxr-xr-x 3 root root 0 Oct 7 20:34 binder_logs > crw--- 1 root root 248, 2 Oct 7 20:34 hwbinder > crw--- 1 root root 248, 3 Oct 7 20:34 vndbinder > root@test:/home/user# ln -s /tmp/binder/binder /dev/binder > [run some simple binder demo code to temporarily register a context > manager and send a binder transaction] > root@test:/home/user# rm /tmp/binder/binder > root@test:/home/user# cat /tmp/binder/binder_logs/transaction_log > 2: call from 2277:2277 to 2273:0 context @��� node 1 handle 0 > size 24:8 ret 0/0 l=0 > 5: call from 2273:2273 to 2277:2277 context @��� node 3 handle 1 > size 0:0 ret 0/0 l=0 > 6: reply from 2277:2277 to 2273:2273 context @��� node 0 handle 0 > size 4:0 ret 0/0 l=0 > 7: reply from 2273:2273 to 2277:2277 context @��� node 0 handle 0 > size 4:0 ret 0/0 l=0 > root@test:/home/user# > == > > ASAN splat: > [ 333.300753] > == > [ 333.303197] BUG: KASAN: use-after-free in string_nocheck+0x9d/0x160 > [ 333.305081] Read of size 1 at addr 8880b0981258 by task cat/2279 > > [ 333.307415] CPU: 1 PID: 2279 Comm: cat Not tainted 5.4.0-rc1+ #513 > [ 333.309304] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), > BIOS 1.12.0-1 04/01/2014 > [ 333.310987] Call Trace: > [ 333.312032] dump_stack+0x7c/0xc0 > [ 333.312581] ? string_nocheck+0x9d/0x160 > [ 333.313157] print_address_description.constprop.7+0x36/0x50 > [ 333.314030] ? string_nocheck+0x9d/0x160 > [ 333.314603] ? string_nocheck+0x9d/0x160 > [ 333.315236] __kasan_report.cold.10+0x1a/0x35 > [ 333.315972] ? string_nocheck+0x9d/0x160 > [ 333.316545] kasan_report+0xe/0x20 > [ 333.317104] string_nocheck+0x9d/0x160 > [ 333.317652] ? widen_string+0x160/0x160 > [ 333.318270] ? string_nocheck+0x160/0x160 > [ 333.318857] ? unwind_get_return_address+0x2a/0x40 > [ 333.319636] ? profile_setup.cold.9+0x96/0x96 > [ 333.320359] string+0xb6/0xc0 > [ 333.320800] ? hex_string+0x280/0x280 > [ 333.321398] vsnprintf+0x20c/0x780 > [ 333.321898] ? num_to_str+0x180/0x180 > [ 333.322503] ? __kasan_kmalloc.constprop.6+0xc1/0xd0 > [ 333.323235] ? vfs_read+0xbc/0x1e0 > [ 333.323814] ? ksys_read+0xb5/0x150 > [ 333.324323] ? do_syscall_64+0xb9/0x3b0 > [ 333.324948] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe > [ 333.325756] seq_vprintf+0x78/0xb0 > [ 333.326253] seq_printf+0x96/0xc0 > [ 333.327132] ? seq_vprintf+0xb0/0xb0 > [ 333.327678] ? match_held_lock+0x2e/0x240 > [ 333.328450] binder_transaction_log_show+0x237/0x2d0 > [ 333.329163] seq_read+0x266/0x690 > [ 333.329705] vfs_read+0xbc/0x1e0 > [ 333.330178] ksys_read+0xb5/0x150 > [ 333.330724] ? kernel_write+0xb0/0xb0 > [ 333.331257] ? trace_hardirqs_off_caller+0x57/0x130 > [ 333.332045] ? mark_held_locks+0x29/0xa0 > [ 333.332678] ? do_syscall_64+0x6b/0x3b0 > [ 333.333235] do_syscall_64+0xb9/0x3b0 > [ 333.333856] entry_SYSCALL_64_after_hwframe+0x49/0xbe > [ 333.334635] RIP: 0033:0x7fbbb95d4461 > [ 333.335153] Code: fe ff ff 50 48 8d 3d fe d0 09 00 e8 e9 03 02 00 > 66 0f 1f 84 00 00 00 00 00 48 8d 05 99 62 0d 00 8b 00 85 c0 75 13 31 > c0 0f 05 <48> 3d 00 f0 ff ff 77 57 c3 66 0f
Re: [PATCH] binder: prevent UAF read in print_binder_transaction_log_entry()
On Tue, Oct 8, 2019 at 6:02 AM Christian Brauner wrote: > > When a binder transaction is initiated on a binder device coming from a > binderfs instance, a pointer to the name of the binder device is stashed > in the binder_transaction_log_entry's context_name member. Later on it > is used to print the name in print_binder_transaction_log_entry(). By > the time print_binder_transaction_log_entry() accesses context_name > binderfs_evict_inode() might have already freed the associated memory > thereby causing a UAF. Do the simple thing and prevent this by copying > the name of the binder device instead of stashing a pointer to it. > > Reported-by: Jann Horn > Fixes: 03e2e07e3814 ("binder: Make transaction_log available in binderfs") > Link: > https://lore.kernel.org/r/cag48ez14q0-f8lqsvcnbyr2o6gpw8shxsm4u5jmd9mpstem...@mail.gmail.com > Cc: Joel Fernandes > Cc: Todd Kjos > Cc: Hridya Valsaraju > Signed-off-by: Christian Brauner > --- > drivers/android/binder.c | 4 +++- > drivers/android/binder_internal.h | 2 +- > 2 files changed, 4 insertions(+), 2 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index c0a491277aca..5b9ac2122e89 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -57,6 +57,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -66,6 +67,7 @@ > #include > > #include > +#include > > #include > > @@ -2876,7 +2878,7 @@ static void binder_transaction(struct binder_proc *proc, > e->target_handle = tr->target.handle; > e->data_size = tr->data_size; > e->offsets_size = tr->offsets_size; > - e->context_name = proc->context->name; > + strscpy(e->context_name, proc->context->name, BINDERFS_MAX_NAME); > > if (reply) { > binder_inner_proc_lock(proc); > diff --git a/drivers/android/binder_internal.h > b/drivers/android/binder_internal.h > index bd47f7f72075..ae991097d14d 100644 > --- a/drivers/android/binder_internal.h > +++ b/drivers/android/binder_internal.h > @@ -130,7 +130,7 @@ struct binder_transaction_log_entry { > int return_error_line; > uint32_t return_error; > uint32_t return_error_param; > - const char *context_name; > + char context_name[BINDERFS_MAX_NAME + 1]; > }; > > struct binder_transaction_log { > -- > 2.23.0 > Acked-by: Todd Kjos
Re: [PATCH] binder: prevent UAF read in print_binder_transaction_log_entry()
On Wed, Oct 9, 2019 at 3:40 AM Christian Brauner wrote: > > On Tue, Oct 08, 2019 at 02:05:16PM -0400, Joel Fernandes wrote: > > On Tue, Oct 08, 2019 at 03:01:59PM +0200, Christian Brauner wrote: [...] > > > > One more thought, this can be made dependent on CONFIG_BINDERFS since > > regular > > binder devices cannot be unregistered AFAICS and as Jann said, the problem > > is > > BINDERFS specific. That way we avoid the memcpy for _every_ transaction. > > These can be thundering when Android starts up. > > Unless Todd sees this as a real performance problem I'm weary to > introduce additional checking and record a pointer for non-binderfs and > a memcpy() for binderfs devices. :) > I don't see this as a real problem. In practice, memcpy will be moving < 10 bytes. Also, by the time this code is in an android device, CONFIG_BINDERFS will always be enabled since this is how we are removing binder's use of debugfs. So a micro-optimization of the !BINDERFS case will not be meaningful. [...]
Re: [PATCH] binder: prevent transactions to context manager from its own process.
On Mon, Jul 15, 2019 at 12:18 PM Hridya Valsaraju wrote: > > Currently, a transaction to context manager from its own process > is prevented by checking if its binder_proc struct is the same as > that of the sender. However, this would not catch cases where the > process opens the binder device again and uses the new fd to send > a transaction to the context manager. > > Reported-by: syzbot+8b3c354d33c4ac78b...@syzkaller.appspotmail.com > Signed-off-by: Hridya Valsaraju Acked-by: Todd Kjos > --- > drivers/android/binder.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index e4d25ebec5be..89b9cedae088 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -3138,7 +3138,7 @@ static void binder_transaction(struct binder_proc *proc, > else > return_error = BR_DEAD_REPLY; > mutex_unlock(&context->context_mgr_node_lock); > - if (target_node && target_proc == proc) { > + if (target_node && target_proc->pid == proc->pid) { > binder_user_error("%d:%d got transaction to > context manager from process owning it\n", > proc->pid, thread->pid); > return_error = BR_FAILED_REPLY; > -- > 2.22.0.510.g264f2c817a-goog >
Re: WARNING in binder_transaction_buffer_release
+Hridya Valsaraju Fix posted: https://lkml.kernel.org/lkml/20190715191804.112933-1-hri...@google.com/ On Wed, Jun 12, 2019 at 1:14 PM Todd Kjos wrote: > > On Wed, Jun 12, 2019 at 12:23 PM Eric Biggers wrote: > > > > On Mon, May 20, 2019 at 07:18:06AM -0700, syzbot wrote: > > > Hello, > > > > > > syzbot found the following crash on: > > > > > > HEAD commit:72cf0b07 Merge tag 'sound-fix-5.2-rc1' of > > > git://git.kernel.. > > > git tree: upstream > > > console output: https://syzkaller.appspot.com/x/log.txt?x=17c7d4bca0 > > > kernel config: https://syzkaller.appspot.com/x/.config?x=d103f114f9010324 > > > dashboard link: > > > https://syzkaller.appspot.com/bug?extid=8b3c354d33c4ac78bfad > > > compiler: gcc (GCC) 9.0.0 20181231 (experimental) > > > userspace arch: i386 > > > syz repro: https://syzkaller.appspot.com/x/repro.syz?x=15b99b44a0 > > > > > > IMPORTANT: if you fix the bug, please add the following tag to the commit: > > > Reported-by: syzbot+8b3c354d33c4ac78b...@syzkaller.appspotmail.com > > > > > > WARNING: CPU: 1 PID: 8535 at drivers/android/binder.c:2368 > > > binder_transaction_buffer_release+0x673/0x8f0 > > > drivers/android/binder.c:2368 > > > Kernel panic - not syncing: panic_on_warn set ... > > > CPU: 1 PID: 8535 Comm: syz-executor.2 Not tainted 5.1.0+ #19 > > > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS > > > Google 01/01/2011 > > > Call Trace: > > > __dump_stack lib/dump_stack.c:77 [inline] > > > dump_stack+0x172/0x1f0 lib/dump_stack.c:113 > > > panic+0x2cb/0x715 kernel/panic.c:214 > > > __warn.cold+0x20/0x4c kernel/panic.c:571 > > > report_bug+0x263/0x2b0 lib/bug.c:186 > > > fixup_bug arch/x86/kernel/traps.c:179 [inline] > > > fixup_bug arch/x86/kernel/traps.c:174 [inline] > > > do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:272 > > > do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:291 > > > invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:986 > > > RIP: 0010:binder_transaction_buffer_release+0x673/0x8f0 > > > drivers/android/binder.c:2368 > > > Code: 31 ff 41 89 c5 89 c6 e8 7b 04 1f fc 45 85 ed 0f 85 1f 41 01 00 49 8d > > > 47 40 48 89 85 50 fe ff ff e9 9d fa ff ff e8 dd 02 1f fc <0f> 0b e9 7f fc > > > ff > > > ff e8 d1 02 1f fc 48 89 d8 45 31 c9 4c 89 fe 4c > > > RSP: 0018:88807b2775f0 EFLAGS: 00010293 > > > RAX: 888092b1e040 RBX: 0060 RCX: 111012563caa > > > RDX: RSI: 85519e13 RDI: 888097a2d248 > > > RBP: 88807b2777d8 R08: 888092b1e040 R09: ed100f64eee3 > > > R10: ed100f64eee2 R11: 88807b277717 R12: 88808fd2c340 > > > R13: 0068 R14: 88807b2777b0 R15: 88809f7ea580 > > > binder_transaction+0x153d/0x6620 drivers/android/binder.c:3484 > > > binder_thread_write+0x87e/0x2820 drivers/android/binder.c:3792 > > > binder_ioctl_write_read drivers/android/binder.c:4836 [inline] > > > binder_ioctl+0x102f/0x1833 drivers/android/binder.c:5013 > > > __do_compat_sys_ioctl fs/compat_ioctl.c:1052 [inline] > > > __se_compat_sys_ioctl fs/compat_ioctl.c:998 [inline] > > > __ia32_compat_sys_ioctl+0x195/0x620 fs/compat_ioctl.c:998 > > > do_syscall_32_irqs_on arch/x86/entry/common.c:337 [inline] > > > do_fast_syscall_32+0x27b/0xd7d arch/x86/entry/common.c:408 > > > entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139 > > > RIP: 0023:0xf7f9e849 > > > Code: 85 d2 74 02 89 0a 5b 5d c3 8b 04 24 c3 8b 14 24 c3 8b 3c 24 c3 90 90 > > > 90 90 90 90 90 90 90 90 90 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 > > > 90 > > > 90 90 eb 0d 90 90 90 90 90 90 90 90 90 90 90 90 > > > RSP: 002b:f7f9a0cc EFLAGS: 0296 ORIG_RAX: 0036 > > > RAX: ffda RBX: 0004 RCX: c0306201 > > > RDX: 2140 RSI: RDI: > > > RBP: R08: R09: > > > R10: R11: R12: > > > R13: R14: R15: > > > Kernel Offset: disabled > > > Rebooting in 86400 seconds.. > > > > > > > > > --- > > > This bug is generated by a bot. It may contain errors. > > > See https://goo.gl/tpsmEJ for more information about syzbot. > > > syzbot engineers can be reached at syzkal...@googlegroups.com. > > > > > > syzbot will keep track of this bug report. See: > > > https://goo.gl/tpsmEJ#status for how to communicate with syzbot. > > > syzbot can test patches for this bug, for details see: > > > https://goo.gl/tpsmEJ#testing-patches > > > > > > > Are any of the binder maintainers planning to fix this? This seems to be > > the > > only open syzbot report for binder on the upstream kernel. > > Taking a look. > > > > > - Eric
Re: [PATCH 1/2] binder: Don't modify VMA bounds in ->mmap handler
On Wed, Oct 16, 2019 at 8:01 AM Jann Horn wrote: > > binder_mmap() tries to prevent the creation of overly big binder mappings > by silently truncating the size of the VMA to 4MiB. However, this violates > the API contract of mmap(). If userspace attempts to create a large binder > VMA, and later attempts to unmap that VMA, it will call munmap() on a range > beyond the end of the VMA, which may have been allocated to another VMA in > the meantime. This can lead to userspace memory corruption. > > The following sequence of calls leads to a segfault without this commit: > > int main(void) { > int binder_fd = open("/dev/binder", O_RDWR); > if (binder_fd == -1) err(1, "open binder"); > void *binder_mapping = mmap(NULL, 0x80UL, PROT_READ, MAP_SHARED, > binder_fd, 0); > if (binder_mapping == MAP_FAILED) err(1, "mmap binder"); > void *data_mapping = mmap(NULL, 0x40UL, PROT_READ|PROT_WRITE, > MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); > if (data_mapping == MAP_FAILED) err(1, "mmap data"); > munmap(binder_mapping, 0x80UL); > *(char*)data_mapping = 1; > return 0; > } > > Cc: sta...@vger.kernel.org > Signed-off-by: Jann Horn Acked-by: Todd Kjos > --- > drivers/android/binder.c | 7 --- > drivers/android/binder_alloc.c | 6 -- > 2 files changed, 4 insertions(+), 9 deletions(-) > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > index 5b9ac2122e89..265d9dd46a5e 100644 > --- a/drivers/android/binder.c > +++ b/drivers/android/binder.c > @@ -97,10 +97,6 @@ DEFINE_SHOW_ATTRIBUTE(proc); > #define SZ_1K 0x400 > #endif > > -#ifndef SZ_4M > -#define SZ_4M 0x40 > -#endif > - > #define FORBIDDEN_MMAP_FLAGS(VM_WRITE) > > enum { > @@ -5177,9 +5173,6 @@ static int binder_mmap(struct file *filp, struct > vm_area_struct *vma) > if (proc->tsk != current->group_leader) > return -EINVAL; > > - if ((vma->vm_end - vma->vm_start) > SZ_4M) > - vma->vm_end = vma->vm_start + SZ_4M; > - > binder_debug(BINDER_DEBUG_OPEN_CLOSE, > "%s: %d %lx-%lx (%ld K) vma %lx pagep %lx\n", > __func__, proc->pid, vma->vm_start, vma->vm_end, > diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c > index d42a8b2f636a..eb76a823fbb2 100644 > --- a/drivers/android/binder_alloc.c > +++ b/drivers/android/binder_alloc.c > @@ -22,6 +22,7 @@ > #include > #include > #include > +#include > #include "binder_alloc.h" > #include "binder_trace.h" > > @@ -689,7 +690,9 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, > alloc->buffer = (void __user *)vma->vm_start; > mutex_unlock(&binder_alloc_mmap_lock); > > - alloc->pages = kcalloc((vma->vm_end - vma->vm_start) / PAGE_SIZE, > + alloc->buffer_size = min_t(unsigned long, vma->vm_end - vma->vm_start, > + SZ_4M); > + alloc->pages = kcalloc(alloc->buffer_size / PAGE_SIZE, >sizeof(alloc->pages[0]), >GFP_KERNEL); > if (alloc->pages == NULL) { > @@ -697,7 +700,6 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, > failure_string = "alloc page array"; > goto err_alloc_pages_failed; > } > - alloc->buffer_size = vma->vm_end - vma->vm_start; > > buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); > if (!buffer) { > -- > 2.23.0.700.g56cf767bdb-goog >
Re: [PATCH v3 1/7] binder: create userspace-to-binder-buffer copy function
On Thu, Feb 14, 2019 at 11:45 AM Joel Fernandes wrote: > > Hi Todd, > > One quick question: > > On Fri, Feb 08, 2019 at 10:35:14AM -0800, Todd Kjos wrote: > > The binder driver uses a vm_area to map the per-process > > binder buffer space. For 32-bit android devices, this is > > now taking too much vmalloc space. This patch removes > > the use of vm_area when copying the transaction data > > from the sender to the buffer space. Instead of using > > copy_from_user() for multi-page copies, it now uses > > binder_alloc_copy_user_to_buffer() which uses kmap() > > and kunmap() to map each page, and uses copy_from_user() > > for copying to that page. > > > > Signed-off-by: Todd Kjos > > --- > > v2: remove casts as suggested by Dan Carpenter > > > > drivers/android/binder.c | 29 +++-- > > drivers/android/binder_alloc.c | 113 + > > drivers/android/binder_alloc.h | 8 +++ > > 3 files changed, 143 insertions(+), 7 deletions(-) > > > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c > > index 5f6ef5e63b91e..ab0b3eec363bc 100644 > > --- a/drivers/android/binder.c > > +++ b/drivers/android/binder.c > > @@ -3078,8 +3078,12 @@ static void binder_transaction(struct binder_proc > > *proc, > > ALIGN(tr->data_size, sizeof(void *))); > > offp = off_start; > > > > - if (copy_from_user(t->buffer->data, (const void __user *)(uintptr_t) > > -tr->data.ptr.buffer, tr->data_size)) { > > + if (binder_alloc_copy_user_to_buffer( > > + &target_proc->alloc, > > + t->buffer, 0, > > + (const void __user *) > > + (uintptr_t)tr->data.ptr.buffer, > > + tr->data_size)) { > > binder_user_error("%d:%d got transaction with invalid data > > ptr\n", > > proc->pid, thread->pid); > > return_error = BR_FAILED_REPLY; > > @@ -3087,8 +3091,13 @@ static void binder_transaction(struct binder_proc > > *proc, > > return_error_line = __LINE__; > > goto err_copy_data_failed; > > } > > - if (copy_from_user(offp, (const void __user *)(uintptr_t) > > -tr->data.ptr.offsets, tr->offsets_size)) { > > + if (binder_alloc_copy_user_to_buffer( > > + &target_proc->alloc, > > + t->buffer, > > + ALIGN(tr->data_size, sizeof(void *)), > > + (const void __user *) > > + (uintptr_t)tr->data.ptr.offsets, > > + tr->offsets_size)) { > > binder_user_error("%d:%d got transaction with invalid offsets > > ptr\n", > > proc->pid, thread->pid); > > return_error = BR_FAILED_REPLY; > > @@ -3217,6 +3226,8 @@ static void binder_transaction(struct binder_proc > > *proc, > > struct binder_buffer_object *bp = > > to_binder_buffer_object(hdr); > > size_t buf_left = sg_buf_end - sg_bufp; > > + binder_size_t sg_buf_offset = (uintptr_t)sg_bufp - > > + (uintptr_t)t->buffer->data; > > > > if (bp->length > buf_left) { > > binder_user_error("%d:%d got transaction with > > too large buffer\n", > > @@ -3226,9 +3237,13 @@ static void binder_transaction(struct binder_proc > > *proc, > > return_error_line = __LINE__; > > goto err_bad_offset; > > } > > - if (copy_from_user(sg_bufp, > > -(const void __user *)(uintptr_t) > > -bp->buffer, bp->length)) { > > + if (binder_alloc_copy_user_to_buffer( > > + &target_proc->alloc, > > + t->buffer, > > + sg_buf_offset, > > + (const void __user *) >
Re: [PATCH v3 1/7] binder: create userspace-to-binder-buffer copy function
On Thu, Feb 14, 2019 at 1:25 PM Joel Fernandes wrote: > > On Thu, Feb 14, 2019 at 03:53:54PM -0500, Joel Fernandes wrote: > > On Thu, Feb 14, 2019 at 3:42 PM Todd Kjos wrote: > > > > > > On Thu, Feb 14, 2019 at 11:45 AM Joel Fernandes wrote: > > [snip] > > > > > + * check_buffer() - verify that buffer/offset is safe to access > > > > > + * @alloc: binder_alloc for this proc > > > > > + * @buffer: binder buffer to be accessed > > > > > + * @offset: offset into @buffer data > > > > > + * @bytes: bytes to access from offset > > > > > + * > > > > > + * Check that the @offset/@bytes are within the size of the given > > > > > + * @buffer and that the buffer is currently active and not freeable. > > > > > + * Offsets must also be multiples of sizeof(u32). The kernel is > > > > > > > > In all callers of binder_alloc_copy_user_to_buffer, the alignment of > > > > offsets > > > > is set to sizeof(void *). Then shouldn't this function check for > > > > sizeof(void *) > > > > alignment instead of u32? > > > > > > But there are other callers of check_buffer() later in the series that > > > don't require pointer-size alignment. u32 alignment is consistent with > > > the alignment requirements of the binder driver before this change. > > > The copy functions don't actually need to insist on alignment, but > > > these binder buffer objects have always used u32 alignment which has > > > been checked in the driver. If user code misaligned it, then errors > > > are returned. The alignment checks are really to be consistent with > > > previous binder driver behavior. > > > > Got it, thanks. > > One more thing I wanted to ask is, kmap() will now cause global lock > contention because of using spin_lock due to kmap_high(). > > Previously the binder driver was made to not use global lock (as you had > done). Now these paths will start global locking on 32-bit architectures. > Would that degrade performance? There was a lot of concern about 32-bit performance both for lock-contention and the cost of map/unmap operations. Of course, 32-bit systems are also where the primary win is -- namely avoiding vmalloc space depletion. So there was a several months-long evaluation period on 32-bit devices by a silicon vendor who did a lot of testing across a broad set of benchmarks / workloads to verify the performance costs are acceptable. We also ran tests to try to exhaust the kmap space with multiple large buffers. The testing did find that there is some performance degradation for large buffer transfers, but there are no cases where this significantly impacted a meaningful user workload. > > Are we not using kmap_atomic() in this patch because of any concern that the > kmap fixmap space is limited and may run out? We're not using the atomic version here since we can't guarantee that the loop will be atomic since we are calling copy_from_user(). Later in the series, other cases do use kmap_atomic(). > > thanks, > > - Joel > >
Re: kernel BUG at drivers/android/binder_alloc.c:LINE! (2)
On Thu, Feb 14, 2019 at 3:35 AM syzbot wrote: > > syzbot has found a reproducer for the following crash on: > > HEAD commit:b3418f8bddf4 Add linux-next specific files for 20190214 > git tree: linux-next > console output: https://syzkaller.appspot.com/x/log.txt?x=161d2048c0 > kernel config: https://syzkaller.appspot.com/x/.config?x=8a3a37525a677c71 > dashboard link: https://syzkaller.appspot.com/bug?extid=55de1eb4975dec156d8f > compiler: gcc (GCC) 9.0.0 20181231 (experimental) > syz repro: https://syzkaller.appspot.com/x/repro.syz?x=11cd2f1f40 > > IMPORTANT: if you fix the bug, please add the following tag to the commit: > Reported-by: syzbot+55de1eb4975dec156...@syzkaller.appspotmail.com > Testing a fix: #syz test: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git linux-next patch Description: Binary data
Re: kernel BUG at drivers/android/binder_alloc.c:LINE! (2)
Trying again with the correct branch spec... On Thu, Feb 14, 2019 at 2:34 PM Todd Kjos wrote: > > On Thu, Feb 14, 2019 at 3:35 AM syzbot > wrote: > > > > syzbot has found a reproducer for the following crash on: > > > > HEAD commit:b3418f8bddf4 Add linux-next specific files for 20190214 > > git tree: linux-next > > console output: https://syzkaller.appspot.com/x/log.txt?x=161d2048c0 > > kernel config: https://syzkaller.appspot.com/x/.config?x=8a3a37525a677c71 > > dashboard link: https://syzkaller.appspot.com/bug?extid=55de1eb4975dec156d8f > > compiler: gcc (GCC) 9.0.0 20181231 (experimental) > > syz repro: https://syzkaller.appspot.com/x/repro.syz?x=11cd2f1f40 > > > > IMPORTANT: if you fix the bug, please add the following tag to the commit: > > Reported-by: syzbot+55de1eb4975dec156...@syzkaller.appspotmail.com > > > > Testing a fix: > #syz test: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master patch Description: Binary data
[PATCH] binder: fix handling of misaligned binder object
Fixes crash found by syzbot: kernel BUG at drivers/android/binder_alloc.c:LINE! (2) Reported-by: syzbot+55de1eb4975dec156...@syzkaller.appspotmail.com Signed-off-by: Todd Kjos --- Applies to linux-next drivers/android/binder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 2dba539eb792c..8685882da64cd 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2057,7 +2057,7 @@ static size_t binder_get_object(struct binder_proc *proc, size_t object_size = 0; read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset); - if (read_size < sizeof(*hdr)) + if (read_size < sizeof(*hdr) || !IS_ALIGNED(offset, sizeof(u32))) return 0; binder_alloc_copy_from_buffer(&proc->alloc, object, buffer, offset, read_size); -- 2.21.0.rc0.258.g878e2cd30e-goog
Re: [PATCH] binder: reduce mmap_sem write-side lock
On Mon, Feb 18, 2019 at 2:47 AM Minchan Kim wrote: > > On Mon, Feb 18, 2019 at 09:32:08AM +0100, Greg KH wrote: > > On Mon, Feb 18, 2019 at 05:11:45PM +0900, Minchan Kim wrote: > > > binder has used write-side mmap_sem semaphore to release memory > > > mapped at address space of the process. However, right lock to > > > release pages is down_read, not down_write because page table lock > > > already protects the race for parallel freeing. > > > > > > Please do not use mmap_sem write-side lock which is well known > > > contented lock. > > > > > > Cc: Todd Kjos > > > Cc: Martijn Coenen > > > Cc: Arve Hjønnevåg > > > Cc: Greg Kroah-Hartman > > > Signed-off-by: Minchan Kim Acked-by: Todd Kjos > > > --- > > > drivers/android/binder_alloc.c | 4 ++-- > > > 1 file changed, 2 insertions(+), 2 deletions(-) > > > > Is this just needed for the recent binder changes that are in > > linux-next, or for older kernels as well? > > It has been there for several years but no need to fix older kernels > because down_write of mmap_sem is technically not a bug to stop the > working of binder. Rather than, it's just abuse of the lock so > it's okay to fix only recent kernel. > > Thanks.
Re: [PATCH v2 0/7] binder: eliminate use of vmalloc space for binder buffers
On Fri, Feb 8, 2019 at 3:26 AM Greg KH wrote: > > On Wed, Jan 30, 2019 at 02:46:48PM -0800, Todd Kjos wrote: > > Binder buffers have always been mapped into kernel space > > via map_kernel_range_noflush() to allow the binder driver > > to modify the buffer before posting to userspace for > > processing. > > > > In recent Android releases, the number of long-running > > binder processes has increased to the point that for > > 32-bit systems, there is a risk of running out of > > vmalloc space. > > > > This patch set removes the persistent mapping of the > > binder buffers into kernel space. Instead, the binder > > driver creates temporary mappings with kmap() or > > kmap_atomic() to copy to or from the buffer only when > > necessary. > > This patch series blows up when I apply it to my char-misc-next branch: > > drivers/android/binder_alloc_selftest.c: In function > ‘check_buffer_pages_allocated’: > drivers/android/binder_alloc_selftest.c:108:44: error: ‘struct binder_buffer’ > has no member named ‘data’ > end = (void *)PAGE_ALIGN((uintptr_t)buffer->data + size); > ^~ > Did you forget to enable CONFIG_ANDROID_BINDER_IPC_SELFTEST when doing your > builds? I did forget. Thanks for catching this. I'll repost with the fix. -Todd > > thanks, > > greg k-h
[PATCH v3 0/7] binder: eliminate use of vmalloc space for binder buffers
Binder buffers have always been mapped into kernel space via map_kernel_range_noflush() to allow the binder driver to modify the buffer before posting to userspace for processing. In recent Android releases, the number of long-running binder processes has increased to the point that for 32-bit systems, there is a risk of running out of vmalloc space. This patch set removes the persistent mapping of the binder buffers into kernel space. Instead, the binder driver creates temporary mappings with kmap() or kmap_atomic() to copy to or from the buffer only when necessary. Todd Kjos (7): binder: create userspace-to-binder-buffer copy function binder: add functions to copy to/from binder buffers binder: add function to copy binder object from buffer binder: avoid kernel vm_area for buffer fixups binder: remove kernel vm_area for buffer space binder: remove user_buffer_offset binder: use userspace pointer as base of buffer space v2: remove casts as suggested by Dan Carpenter v3: fix build-break when CONFIG_ANDROID_BINDER_IPC_SELFTEST enabled drivers/android/Kconfig | 2 +- drivers/android/binder.c| 460 ++ drivers/android/binder_alloc.c | 299 +-- drivers/android/binder_alloc.h | 47 drivers/android/binder_alloc_selftest.c | 4 +- drivers/android/binder_trace.h | 2 +- 6 files changed, 536 insertions(+), 278 deletions(-)
[PATCH v3 1/7] binder: create userspace-to-binder-buffer copy function
The binder driver uses a vm_area to map the per-process binder buffer space. For 32-bit android devices, this is now taking too much vmalloc space. This patch removes the use of vm_area when copying the transaction data from the sender to the buffer space. Instead of using copy_from_user() for multi-page copies, it now uses binder_alloc_copy_user_to_buffer() which uses kmap() and kunmap() to map each page, and uses copy_from_user() for copying to that page. Signed-off-by: Todd Kjos --- v2: remove casts as suggested by Dan Carpenter drivers/android/binder.c | 29 +++-- drivers/android/binder_alloc.c | 113 + drivers/android/binder_alloc.h | 8 +++ 3 files changed, 143 insertions(+), 7 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 5f6ef5e63b91e..ab0b3eec363bc 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3078,8 +3078,12 @@ static void binder_transaction(struct binder_proc *proc, ALIGN(tr->data_size, sizeof(void *))); offp = off_start; - if (copy_from_user(t->buffer->data, (const void __user *)(uintptr_t) - tr->data.ptr.buffer, tr->data_size)) { + if (binder_alloc_copy_user_to_buffer( + &target_proc->alloc, + t->buffer, 0, + (const void __user *) + (uintptr_t)tr->data.ptr.buffer, + tr->data_size)) { binder_user_error("%d:%d got transaction with invalid data ptr\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; @@ -3087,8 +3091,13 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_copy_data_failed; } - if (copy_from_user(offp, (const void __user *)(uintptr_t) - tr->data.ptr.offsets, tr->offsets_size)) { + if (binder_alloc_copy_user_to_buffer( + &target_proc->alloc, + t->buffer, + ALIGN(tr->data_size, sizeof(void *)), + (const void __user *) + (uintptr_t)tr->data.ptr.offsets, + tr->offsets_size)) { binder_user_error("%d:%d got transaction with invalid offsets ptr\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; @@ -3217,6 +3226,8 @@ static void binder_transaction(struct binder_proc *proc, struct binder_buffer_object *bp = to_binder_buffer_object(hdr); size_t buf_left = sg_buf_end - sg_bufp; + binder_size_t sg_buf_offset = (uintptr_t)sg_bufp - + (uintptr_t)t->buffer->data; if (bp->length > buf_left) { binder_user_error("%d:%d got transaction with too large buffer\n", @@ -3226,9 +3237,13 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } - if (copy_from_user(sg_bufp, - (const void __user *)(uintptr_t) - bp->buffer, bp->length)) { + if (binder_alloc_copy_user_to_buffer( + &target_proc->alloc, + t->buffer, + sg_buf_offset, + (const void __user *) + (uintptr_t)bp->buffer, + bp->length)) { binder_user_error("%d:%d got transaction with invalid offsets ptr\n", proc->pid, thread->pid); return_error_param = -EFAULT; diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 022cd80e80cc3..94c0d85c4e75b 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include "binder_alloc.h" #include "binder_trace.h" @@ -1053,3 +1055,114 @@ int binder_alloc_shrinker_init(void) } return ret; } + +/** + * check_buffer() - verify that buffer/offset is safe
[PATCH v3 5/7] binder: remove kernel vm_area for buffer space
Remove the kernel's vm_area and the code that maps buffer pages into it. Signed-off-by: Todd Kjos --- drivers/android/binder_alloc.c | 40 ++ 1 file changed, 2 insertions(+), 38 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 2eebff4be83e0..d4cbe4b3947a6 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -265,16 +265,6 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, page->alloc = alloc; INIT_LIST_HEAD(&page->lru); - ret = map_kernel_range_noflush((unsigned long)page_addr, - PAGE_SIZE, PAGE_KERNEL, - &page->page_ptr); - flush_cache_vmap((unsigned long)page_addr, - (unsigned long)page_addr + PAGE_SIZE); - if (ret != 1) { - pr_err("%d: binder_alloc_buf failed to map page at %pK in kernel\n", - alloc->pid, page_addr); - goto err_map_kernel_failed; - } user_page_addr = (uintptr_t)page_addr + alloc->user_buffer_offset; ret = vm_insert_page(vma, user_page_addr, page[0].page_ptr); @@ -314,8 +304,6 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, continue; err_vm_insert_page_failed: - unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); -err_map_kernel_failed: __free_page(page->page_ptr); page->page_ptr = NULL; err_alloc_page_failed: @@ -695,7 +683,6 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, struct vm_area_struct *vma) { int ret; - struct vm_struct *area; const char *failure_string; struct binder_buffer *buffer; @@ -706,28 +693,10 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, goto err_already_mapped; } - area = get_vm_area(vma->vm_end - vma->vm_start, VM_ALLOC); - if (area == NULL) { - ret = -ENOMEM; - failure_string = "get_vm_area"; - goto err_get_vm_area_failed; - } - alloc->buffer = area->addr; - alloc->user_buffer_offset = - vma->vm_start - (uintptr_t)alloc->buffer; + alloc->buffer = (void *)vma->vm_start; + alloc->user_buffer_offset = 0; mutex_unlock(&binder_alloc_mmap_lock); -#ifdef CONFIG_CPU_CACHE_VIPT - if (cache_is_vipt_aliasing()) { - while (CACHE_COLOUR( - (vma->vm_start ^ (uint32_t)alloc->buffer))) { - pr_info("%s: %d %lx-%lx maps %pK bad alignment\n", - __func__, alloc->pid, vma->vm_start, - vma->vm_end, alloc->buffer); - vma->vm_start += PAGE_SIZE; - } - } -#endif alloc->pages = kcalloc((vma->vm_end - vma->vm_start) / PAGE_SIZE, sizeof(alloc->pages[0]), GFP_KERNEL); @@ -760,9 +729,7 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, alloc->pages = NULL; err_alloc_pages_failed: mutex_lock(&binder_alloc_mmap_lock); - vfree(alloc->buffer); alloc->buffer = NULL; -err_get_vm_area_failed: err_already_mapped: mutex_unlock(&binder_alloc_mmap_lock); binder_alloc_debug(BINDER_DEBUG_USER_ERROR, @@ -821,12 +788,10 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) "%s: %d: page %d at %pK %s\n", __func__, alloc->pid, i, page_addr, on_lru ? "on lru" : "active"); - unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); __free_page(alloc->pages[i].page_ptr); page_count++; } kfree(alloc->pages); - vfree(alloc->buffer); } mutex_unlock(&alloc->mutex); if (alloc->vma_vm_mm) @@ -988,7 +953,6 @@ enum lru_status binder_alloc_free_page(struct list_head *item, trace_binder_unmap_kernel_start(alloc, index); - unmap_kernel_range(page_addr, PAGE_SIZE); __free_page(page->page_ptr); page->page_ptr = NULL; -- 2.20.1.791.gb4d0f1c61a-goog
[PATCH v3 4/7] binder: avoid kernel vm_area for buffer fixups
Refactor the functions to validate and fixup struct binder_buffer pointer objects to avoid using vm_area pointers. Instead copy to/from kernel space using binder_alloc_copy_to_buffer() and binder_alloc_copy_from_buffer(). The following functions were refactored: refactor binder_validate_ptr() binder_validate_fixup() binder_fixup_parent() Signed-off-by: Todd Kjos --- drivers/android/binder.c | 146 ++- 1 file changed, 97 insertions(+), 49 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8063b405e4fa1..98163bf5f35c7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2092,10 +2092,13 @@ static size_t binder_get_object(struct binder_proc *proc, /** * binder_validate_ptr() - validates binder_buffer_object in a binder_buffer. + * @proc: binder_proc owning the buffer * @b: binder_buffer containing the object + * @object:struct binder_object to read into * @index: index in offset array at which the binder_buffer_object is * located - * @start: points to the start of the offset array + * @start_offset: points to the start of the offset array + * @object_offsetp: offset of @object read from @b * @num_valid: the number of valid offsets in the offset array * * Return: If @index is within the valid range of the offset array @@ -2106,34 +2109,46 @@ static size_t binder_get_object(struct binder_proc *proc, * Note that the offset found in index @index itself is not * verified; this function assumes that @num_valid elements * from @start were previously verified to have valid offsets. + * If @object_offsetp is non-NULL, then the offset within + * @b is written to it. */ -static struct binder_buffer_object *binder_validate_ptr(struct binder_buffer *b, - binder_size_t index, - binder_size_t *start, - binder_size_t num_valid) +static struct binder_buffer_object *binder_validate_ptr( + struct binder_proc *proc, + struct binder_buffer *b, + struct binder_object *object, + binder_size_t index, + binder_size_t start_offset, + binder_size_t *object_offsetp, + binder_size_t num_valid) { - struct binder_buffer_object *buffer_obj; - binder_size_t *offp; + size_t object_size; + binder_size_t object_offset; + unsigned long buffer_offset; if (index >= num_valid) return NULL; - offp = start + index; - buffer_obj = (struct binder_buffer_object *)(b->data + *offp); - if (buffer_obj->hdr.type != BINDER_TYPE_PTR) + buffer_offset = start_offset + sizeof(binder_size_t) * index; + binder_alloc_copy_from_buffer(&proc->alloc, &object_offset, + b, buffer_offset, sizeof(object_offset)); + object_size = binder_get_object(proc, b, object_offset, object); + if (!object_size || object->hdr.type != BINDER_TYPE_PTR) return NULL; + if (object_offsetp) + *object_offsetp = object_offset; - return buffer_obj; + return &object->bbo; } /** * binder_validate_fixup() - validates pointer/fd fixups happen in order. + * @proc: binder_proc owning the buffer * @b: transaction buffer - * @objects_start start of objects buffer - * @buffer:binder_buffer_object in which to fix up - * @offset:start offset in @buffer to fix up - * @last_obj: last binder_buffer_object that we fixed up in - * @last_min_offset: minimum fixup offset in @last_obj + * @objects_start_offset: offset to start of objects buffer + * @buffer_obj_offset: offset to binder_buffer_object in which to fix up + * @fixup_offset: start offset in @buffer to fix up + * @last_obj_offset: offset to last binder_buffer_object that we fixed + * @last_min_offset: minimum fixup offset in object at @last_obj_offset * * Return: %true if a fixup in buffer @buffer at offset @offset is * allowed. @@ -2164,28 +2179,41 @@ static struct binder_buffer_object *binder_validate_ptr(struct binder_buffer *b, * C (parent = A, offset = 16) * D (parent = B, offset = 0) // B is not A or any of A's parents */ -static bool binder_validate_fixup(struct binder_buffer *b, - binder_size_t *objects_start, -
[PATCH v3 7/7] binder: use userspace pointer as base of buffer space
Now that alloc->buffer points to the userspace vm_area rename buffer->data to buffer->user_data and rename local pointers that hold user addresses. Also use the "__user" tag to annotate all user pointers so sparse can flag cases where user pointer vaues are copied to kernel pointers. Refactor code to use offsets instead of user pointers. Signed-off-by: Todd Kjos --- v2: remove casts as suggested by Dan Carpenter v3: fix build-break when CONFIG_ANDROID_BINDER_IPC_SELFTEST enabled drivers/android/binder.c| 118 ++-- drivers/android/binder_alloc.c | 87 - drivers/android/binder_alloc.h | 6 +- drivers/android/binder_alloc_selftest.c | 4 +- drivers/android/binder_trace.h | 2 +- 5 files changed, 118 insertions(+), 99 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index b3d609b5935a6..25491eceb7503 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2278,33 +2278,30 @@ static void binder_deferred_fd_close(int fd) static void binder_transaction_buffer_release(struct binder_proc *proc, struct binder_buffer *buffer, - binder_size_t *failed_at) + binder_size_t failed_at, + bool is_failure) { - binder_size_t *offp, *off_start, *off_end; int debug_id = buffer->debug_id; - binder_size_t off_start_offset; + binder_size_t off_start_offset, buffer_offset, off_end_offset; binder_debug(BINDER_DEBUG_TRANSACTION, -"%d buffer release %d, size %zd-%zd, failed at %pK\n", +"%d buffer release %d, size %zd-%zd, failed at %llx\n", proc->pid, buffer->debug_id, -buffer->data_size, buffer->offsets_size, failed_at); +buffer->data_size, buffer->offsets_size, +(unsigned long long)failed_at); if (buffer->target_node) binder_dec_node(buffer->target_node, 1, 0); off_start_offset = ALIGN(buffer->data_size, sizeof(void *)); - off_start = (binder_size_t *)(buffer->data + off_start_offset); - if (failed_at) - off_end = failed_at; - else - off_end = (void *)off_start + buffer->offsets_size; - for (offp = off_start; offp < off_end; offp++) { + off_end_offset = is_failure ? failed_at : + off_start_offset + buffer->offsets_size; + for (buffer_offset = off_start_offset; buffer_offset < off_end_offset; +buffer_offset += sizeof(binder_size_t)) { struct binder_object_header *hdr; size_t object_size; struct binder_object object; binder_size_t object_offset; - binder_size_t buffer_offset = (uintptr_t)offp - - (uintptr_t)buffer->data; binder_alloc_copy_from_buffer(&proc->alloc, &object_offset, buffer, buffer_offset, @@ -2380,9 +2377,10 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, struct binder_fd_array_object *fda; struct binder_buffer_object *parent; struct binder_object ptr_object; - u32 *fd_array; + binder_size_t fda_offset; size_t fd_index; binder_size_t fd_buf_size; + binder_size_t num_valid; if (proc->tsk != current->group_leader) { /* @@ -2393,12 +2391,14 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, continue; } + num_valid = (buffer_offset - off_start_offset) / + sizeof(binder_size_t); fda = to_binder_fd_array_object(hdr); parent = binder_validate_ptr(proc, buffer, &ptr_object, fda->parent, off_start_offset, NULL, -offp - off_start); +num_valid); if (!parent) { pr_err("transaction release %d bad parent offset\n", debug_id); @@ -2417,14 +2417,21 @@ static void binder_transaction_buffer_release(struct binder_