[lttng-dev] [PATCH lttng-tools] Fix: run-as thread deadlock on itself in restart error path

2019-01-16 Thread Jonathan Rajotte
The deadlock was found using this backtrace

Thread 5:
0  __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
1  0x7efc6b650023 in __GI___pthread_mutex_lock 
(mutex=mutex@entry=0x55fc37128400 ) at 
../nptl/pthread_mutex_lock.c:78
2  0x55fc36efbe05 in run_as_destroy_worker () at runas.c:1233
3  0x55fc36efc2e7 in run_as_restart_worker (worker=) at 
runas.c:998
4  run_as (cmd=cmd@entry=RUN_AS_UNLINK, data=data@entry=0x7efc5b7fa630, 
ret_value=ret_value@entry=0x7efc5b7fa510, uid=uid@entry=1000, 
gid=gid@entry=1000) at runas.c:1033
5  0x55fc36efc9ce in run_as_unlink (path=path@entry=0x7efc5b7fb690 
"/home/joraj/lttng-traces/auto-20190116-111518/20190116T111729-0500-33/kernel/index/channel0_3.idx",
 uid=uid@entry=1000, gid=gid@entry=1000) at runas.c :1120
6  0x55fc36ef7feb in utils_unlink_stream_file 
(path_name=path_name@entry=0x7efc5b7fc7e0 
"/home/joraj/lttng-traces/auto-20190116-111518/20190116T111729-0500-33/kernel/index",
 file_name=file_name@entry=0x7efc500085d4 "channel0_3", size=size@entry=0, 
count=count@entry=0, uid=uid@entry=1000, gid=gid@entry=1000, 
suffix=0x55fc36f19b26 ".idx") at utils.c:929
7  0x55fc36f01d4e in lttng_index_file_create 
(path_name=path_name@entry=0x7efc500087a0 
"/home/joraj/lttng-traces/auto-20190116-111518/20190116T111729-0500-33/kernel", 
stream_name=stream_name@entry=0x7efc500085d4 "channel0_3", uid=1000, gid=1000, 
size=0, count=0, major=1, minor=1) at index.c:79
8  0x55fc36ed9475 in rotate_local_stream (ctx=, 
stream=0x7efc50008460) at consumer.c:4105
9  0x55fc36ed98b5 in lttng_consumer_rotate_stream 
(ctx=ctx@entry=0x55fc37428d80, stream=stream@entry=0x7efc50008460, 
rotated=rotated@entry=0x7efc5b7fdb27) at consumer.c:4181
10 0x55fc36ee354e in lttng_kconsumer_read_subbuffer 
(stream=stream@entry=0x7efc50008460, ctx=ctx@entry=0x55fc37428d80, 
rotated=rotated@entry=0x7efc5b7fdb27) at kernel-consumer.c:1740
11 0x55fc36ed7a30 in lttng_consumer_read_subbuffer (stream=0x7efc50008460, 
ctx=0x55fc37428d80) at consumer.c:3383
12 0x55fc36ed4b74 in consumer_thread_data_poll (data=0x55fc37428d80) at 
consumer.c:2751
13 0x7efc6b64d6db in start_thread (arg=0x7efc5b7fe700) at 
pthread_create.c:463
14 0x7efc6af6488f in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:95

The owner of the lock is itself:
  print worker_lock.__data.__owner
$2 = 25725
  thread find 25725
Thread 5 has target id 'Thread 0x7efc5b7fe700 (LWP 25725)'

The worker_lock is first taken in frame #4: run_as runas.c:1033

  pthread_mutex_lock(&worker_lock);
  if (use_clone()) {
...
/*
 * If the worker thread crashed the errno is set to EIO. we log
 * the error and  start a new worker process.
 */
if (ret == -1 && saved_errno == EIO) {
DBG("Socket closed unexpectedly... "
"Restarting the worker process");
->  ret = run_as_restart_worker(global_worker);
if (ret == -1) {
  ERR("Failed to restart worker process.");
  goto err;
}

Solution


Create run_as_restart_worker_no_lock which does not to take the lock on
execution.
Use run_as_restart_worker_no_lock at the run_as error path call site.
Use run_as_restart_worker_no_lock inside run_as_restart_worker while
holding the worker lock to provide identical behaviour to other call sites.

Signed-off-by: Jonathan Rajotte 
---
 src/common/runas.c | 89 --
 1 file changed, 47 insertions(+), 42 deletions(-)

diff --git a/src/common/runas.c b/src/common/runas.c
index d727cf417..9e6277078 100644
--- a/src/common/runas.c
+++ b/src/common/runas.c
@@ -986,6 +986,51 @@ error_procname_alloc:
return ret;
 }
 
+static
+void run_as_destroy_worker_no_lock(void)
+{
+   struct run_as_worker *worker = global_worker;
+
+   DBG("Destroying run_as worker");
+   if (!worker) {
+   return;
+   }
+   /* Close unix socket */
+   DBG("Closing run_as worker socket");
+   if (lttcomm_close_unix_sock(worker->sockpair[0])) {
+   PERROR("close");
+   }
+   worker->sockpair[0] = -1;
+   /* Wait for worker. */
+   for (;;) {
+   int status;
+   pid_t wait_ret;
+
+   wait_ret = waitpid(worker->pid, &status, 0);
+   if (wait_ret < 0) {
+   if (errno == EINTR) {
+   continue;
+   }
+   PERROR("waitpid");
+   break;
+   }
+
+   if (WIFEXITED(status)) {
+   LOG(WEXITSTATUS(status) == 0 ? PRINT_DBG : PRINT_ERR,
+   DEFAULT_RUN_AS_WORKER_NAME " terminated 
with status code %d",
+   WEX

Re: [lttng-dev] [PATCH lttng-modules 1/4] Fix: Remove 'type' argument from access_ok() function (v5.0)

2019-01-16 Thread Mathieu Desnoyers
All merged into master, stable-2.11, all but last patch merged into
stable-2.10 and stable-2.9,

Thanks!

Mathieu

- On Jan 9, 2019, at 2:59 PM, Michael Jeanson mjean...@efficios.com wrote:

> See upstream commit :
> 
>  commit 96d4f267e40f9509e8a66e2b39e8b95655617693
>  Author: Linus Torvalds 
>  Date:   Thu Jan 3 18:57:57 2019 -0800
> 
>Remove 'type' argument from access_ok() function
> 
>Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument
>of the user address range verification function since we got rid of the
>old racy i386-only code to walk page tables by hand.
> 
>It existed because the original 80386 would not honor the write protect
>bit when in kernel mode, so you had to do COW by hand before doing any
>user access.  But we haven't supported that in a long time, and these
>days the 'type' argument is a purely historical artifact.
> 
>A discussion about extending 'user_access_begin()' to do the range
>checking resulted this patch, because there is no way we're going to
>move the old VERIFY_xyz interface to that model.  And it's best done at
>the end of the merge window when I've done most of my merges, so let's
>just get this done once and for all.
> 
>This patch was mostly done with a sed-script, with manual fix-ups for
>the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form.
> 
>There were a couple of notable cases:
> 
> - csky still had the old "verify_area()" name as an alias.
> 
> - the iter_iov code had magical hardcoded knowledge of the actual
>   values of VERIFY_{READ,WRITE} (not that they mattered, since nothing
>   really used it)
> 
> - microblaze used the type argument for a debug printout
> 
>but other than those oddities this should be a total no-op patch.
> 
>I tried to fix up all architectures, did fairly extensive grepping for
>access_ok() uses, and the changes are trivial, but I may have missed
>something.  Any missed conversion should be trivially fixable, though.
> 
> Signed-off-by: Michael Jeanson 
> ---
> lib/ringbuffer/backend.h  |  8 
> lib/ringbuffer/ring_buffer_iterator.c |  3 ++-
> lttng-filter-interpreter.c|  4 ++--
> probes/lttng-probe-user.c |  3 ++-
> wrapper/uaccess.h | 28 +++
> 5 files changed, 38 insertions(+), 8 deletions(-)
> create mode 100644 wrapper/uaccess.h
> 
> diff --git a/lib/ringbuffer/backend.h b/lib/ringbuffer/backend.h
> index 501fad4..da937f2 100644
> --- a/lib/ringbuffer/backend.h
> +++ b/lib/ringbuffer/backend.h
> @@ -21,7 +21,7 @@
> #include 
> #include 
> #include 
> -#include 
> +#include 
> 
> /* Internal helpers */
> #include 
> @@ -289,7 +289,7 @@ void lib_ring_buffer_copy_from_user_inatomic(const struct
> lib_ring_buffer_config
> 
>   set_fs(KERNEL_DS);
>   pagefault_disable();
> - if (unlikely(!access_ok(VERIFY_READ, src, len)))
> + if (unlikely(!lttng_access_ok(VERIFY_READ, src, len)))
>   goto fill_buffer;
> 
>   if (likely(pagecpy == len)) {
> @@ -359,7 +359,7 @@ void lib_ring_buffer_strcpy_from_user_inatomic(const 
> struct
> lib_ring_buffer_conf
> 
>   set_fs(KERNEL_DS);
>   pagefault_disable();
> - if (unlikely(!access_ok(VERIFY_READ, src, len)))
> + if (unlikely(!lttng_access_ok(VERIFY_READ, src, len)))
>   goto fill_buffer;
> 
>   if (likely(pagecpy == len)) {
> @@ -449,7 +449,7 @@ unsigned long
> lib_ring_buffer_copy_from_user_check_nofault(void *dest,
>   unsigned long ret;
>   mm_segment_t old_fs;
> 
> - if (!access_ok(VERIFY_READ, src, len))
> + if (!lttng_access_ok(VERIFY_READ, src, len))
>   return 1;
>   old_fs = get_fs();
>   set_fs(KERNEL_DS);
> diff --git a/lib/ringbuffer/ring_buffer_iterator.c
> b/lib/ringbuffer/ring_buffer_iterator.c
> index 9efe491..d25db72 100644
> --- a/lib/ringbuffer/ring_buffer_iterator.c
> +++ b/lib/ringbuffer/ring_buffer_iterator.c
> @@ -11,6 +11,7 @@
> 
> #include 
> #include 
> +#include 
> #include 
> #include 
> #include 
> @@ -605,7 +606,7 @@ ssize_t channel_ring_buffer_file_read(struct file *filp,
>   ssize_t len;
> 
>   might_sleep();
> - if (!access_ok(VERIFY_WRITE, user_buf, count))
> + if (!lttng_access_ok(VERIFY_WRITE, user_buf, count))
>   return -EFAULT;
> 
>   /* Finish copy of previous record */
> diff --git a/lttng-filter-interpreter.c b/lttng-filter-interpreter.c
> index bf69549..3dad6cc 100644
> --- a/lttng-filter-interpreter.c
> +++ b/lttng-filter-interpreter.c
> @@ -7,7 +7,7 @@
>  * Copyright (C) 2010-2016 Mathieu Desnoyers 
>  */
> 
> -#include 
> +#include 
> #include 
> #include 
> #include 
> @@ -30,7 +30,7 @@ char get_char(struct estack_entry *reg, size_t offset)
>   char c;
> 
>   /* Handle invalid access as end of string. */
> - if (unlikely(!access_ok(VERIFY_READ,
> + if (

Re: [lttng-dev] [PATCH lttng-modules stable-2.10] Fix: btrfs: Remove fsid/metadata_fsid fields from btrfs_info

2019-01-16 Thread Mathieu Desnoyers
Merged into stable-2.10, stable-2.9, thanks!

Mathieu

- On Jan 10, 2019, at 2:56 PM, Michael Jeanson mjean...@efficios.com wrote:

> Introduced in v5.0.
> 
> See upstream commit :
> 
>  commit de37aa513105f864d3c21105bf5542d498f21ca2
>  Author: Nikolay Borisov 
>  Date:   Tue Oct 30 16:43:24 2018 +0200
> 
>btrfs: Remove fsid/metadata_fsid fields from btrfs_info
> 
>Currently btrfs_fs_info structure contains a copy of the
>fsid/metadata_uuid fields. Same values are also contained in the
>btrfs_fs_devices structure which fs_info has a reference to. Let's
>reduce duplication by removing the fields from fs_info and always refer
>to the ones in fs_devices. No functional changes.
> 
> Signed-off-by: Michael Jeanson 
> ---
> instrumentation/events/lttng-module/btrfs.h | 100 +++-
> 1 file changed, 53 insertions(+), 47 deletions(-)
> 
> diff --git a/instrumentation/events/lttng-module/btrfs.h
> b/instrumentation/events/lttng-module/btrfs.h
> index 4dfbf5b..ec45a1e 100644
> --- a/instrumentation/events/lttng-module/btrfs.h
> +++ b/instrumentation/events/lttng-module/btrfs.h
> @@ -32,6 +32,12 @@ struct extent_state;
> 
> #define BTRFS_UUID_SIZE 16
> 
> +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(5,0,0))
> +#define lttng_fs_info_fsid fs_info->fs_devices->fsid
> +#else
> +#define lttng_fs_info_fsid fs_info->fsid
> +#endif
> +
> #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0) || \
>   LTTNG_SLE_KERNEL_RANGE(4,4,73,5,0,0, 4,4,73,6,0,0) || \
>   LTTNG_SLE_KERNEL_RANGE(4,4,82,6,0,0, 4,4,82,7,0,0) || \
> @@ -629,7 +635,7 @@ LTTNG_TRACEPOINT_EVENT(btrfs_add_block_group,
>   TP_ARGS(fs_info, block_group, create),
> 
>   TP_FIELDS(
> - ctf_array(u8, fsid, fs_info->fsid, BTRFS_UUID_SIZE)
> + ctf_array(u8, fsid, lttng_fs_info_fsid, BTRFS_UUID_SIZE)
>   ctf_integer(u64, offset, block_group->key.objectid)
>   ctf_integer(u64, size, block_group->key.offset)
>   ctf_integer(u64, flags, block_group->flags)
> @@ -647,7 +653,7 @@ LTTNG_TRACEPOINT_EVENT(btrfs_add_block_group,
>   TP_ARGS(fs_info, block_group, create),
> 
>   TP_FIELDS(
> - ctf_array(u8, fsid, fs_info->fsid, BTRFS_UUID_SIZE)
> + ctf_array(u8, fsid, lttng_fs_info_fsid, BTRFS_UUID_SIZE)
>   ctf_integer(u64, offset, block_group->key.objectid)
>   ctf_integer(u64, size, block_group->key.offset)
>   ctf_integer(u64, flags, block_group->flags)
> @@ -1015,18 +1021,18 @@ LTTNG_TRACEPOINT_EVENT_CLASS(btrfs__chunk,
> 
> LTTNG_TRACEPOINT_EVENT_INSTANCE(btrfs__chunk,  btrfs_chunk_alloc,
> 
> - TP_PROTO(const struct btrfs_fs_info *info, const struct map_lookup *map,
> + TP_PROTO(const struct btrfs_fs_info *fs_info, const struct map_lookup 
> *map,
>u64 offset, u64 size),
> 
> - TP_ARGS(info, map, offset, size)
> + TP_ARGS(fs_info, map, offset, size)
> )
> 
> LTTNG_TRACEPOINT_EVENT_INSTANCE(btrfs__chunk,  btrfs_chunk_free,
> 
> - TP_PROTO(const struct btrfs_fs_info *info, const struct map_lookup *map,
> + TP_PROTO(const struct btrfs_fs_info *fs_info, const struct map_lookup 
> *map,
>u64 offset, u64 size),
> 
> - TP_ARGS(info, map, offset, size)
> + TP_ARGS(fs_info, map, offset, size)
> )
> 
> #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
> @@ -1050,18 +1056,18 @@ LTTNG_TRACEPOINT_EVENT_CLASS(btrfs__chunk,
> 
> LTTNG_TRACEPOINT_EVENT_INSTANCE(btrfs__chunk,  btrfs_chunk_alloc,
> 
> - TP_PROTO(struct btrfs_fs_info *info, struct map_lookup *map,
> + TP_PROTO(struct btrfs_fs_info *fs_info, struct map_lookup *map,
>u64 offset, u64 size),
> 
> - TP_ARGS(info, map, offset, size)
> + TP_ARGS(fs_info, map, offset, size)
> )
> 
> LTTNG_TRACEPOINT_EVENT_INSTANCE(btrfs__chunk,  btrfs_chunk_free,
> 
> - TP_PROTO(struct btrfs_fs_info *info, struct map_lookup *map,
> + TP_PROTO(struct btrfs_fs_info *fs_info, struct map_lookup *map,
>u64 offset, u64 size),
> 
> - TP_ARGS(info, map, offset, size)
> + TP_ARGS(fs_info, map, offset, size)
> )
> 
> #elif (LTTNG_SLE_KERNEL_RANGE(4,4,73,5,0,0, 4,4,73,6,0,0) || \
> @@ -1192,7 +1198,7 @@ LTTNG_TRACEPOINT_EVENT(btrfs_space_reservation,
>   TP_ARGS(fs_info, type, val, bytes, reserve),
> 
>   TP_FIELDS(
> - ctf_array(u8, fsid, fs_info->fsid, BTRFS_UUID_SIZE)
> + ctf_array(u8, fsid, lttng_fs_info_fsid, BTRFS_UUID_SIZE)
>   ctf_string(type, type)
>   ctf_integer(u64, val, val)
>   ctf_integer(u64, bytes, bytes)
> @@ -1208,7 +1214,7 @@ LTTNG_TRACEPOINT_EVENT(btrfs_space_reservation,
>   TP_ARGS(fs_info, type, val, bytes, reserve),
> 
>   TP_FIELDS(
> - ctf_array(u8, fsid, fs_info->fsid, BTRFS_UUID_SIZE)
> + ctf_array(u8, fsid, lttng_fs_info_fsid, BTRFS_UUID_SIZE)
>   ctf_string(type, type)
>   ctf_integer(u64, val, val)
>