Bug #745
closedbase address dump triggers deadlocks
100%
Description
It appears that the daemon() test case of lttng-tools deadlocks when base address is dumped.
(lttng-tools tests/regression/ust/daemon)
From preliminary analysis, it seems to be a hang in the child process, likely caused by use of the dynamic linker lock within a lttng-ust thread without protection of ust_lock().
We'll need to figure out a way to rework locking to make use of the dynamic linked safe wrt fork/clone/daemon. It's possible that we need to disable the base address feature for 2.4, as it is late in the rc cycle.
MD Updated by Mathieu Desnoyers almost 12 years ago
Example lockup:
Thread 3 (Thread 0x7f71709bf700 (LWP 27007)):
#0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
No locals.
#1 0x00007f71717ee354 in _L_lock_997 () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#2 0x00007f71717ee1b7 in __pthread_mutex_lock (mutex=0x7f7172a35970) at pthread_mutex_lock.c:82
ignore1 = <optimized out>
ignore2 = <optimized out>
ignore3 = <optimized out>
__PRETTY_FUNCTION__ = "__pthread_mutex_lock"
type = 1923307888
#3 0x00007f7171f2110b in *__GI___dl_iterate_phdr (callback=callback@entry=0x7f71721ba1d0 <extract_soinfo_events>, data=0x80,
data@entry=0x7f71709be4a0) at dl-iteratephdr.c:42
l = <optimized out>
info = {dlpi_addr = 140125197297880, dlpi_name = 0x3 <Address 0x3 out of bounds>, dlpi_phdr = 0x7f71709be8d8, dlpi_phnum = 2,
dlpi_adds = 0, dlpi_subs = 140125219504425, dlpi_tls_modid = 0, dlpi_tls_data = 0x7f7171e453da}
ret = <optimized out>
#4 0x00007f71721ba574 in lttng_ust_baddr_statedump (owner=owner@entry=0x7f71723e2160) at lttng-ust-baddr.c:189
data = {owner = 0x7f71723e2160, exec_baddr = 0x0}
#5 0x00007f71721b4bb9 in lttng_handle_pending_statedump (owner=owner@entry=0x7f71723e2160) at lttng-events.c:680
session = <optimized out>
#6 0x00007f71721ad41d in handle_pending_statedump (sock_info=0x7f71723e2160) at lttng-ust-comm.c:452
ctor_passed = <optimized out>
#7 handle_message (lum=0x7f71709be8b0, sock=3, sock_info=0x7f71723e2160) at lttng-ust-comm.c:778
ret = 0
lur = {handle = 0, cmd = 68, ret_code = 0, ret_val = 0, padding = '\000' <repeats 31 times>, u = {channel = {memory_map_size = 0},
stream = {memory_map_size = 0}, version = {major = 0, minor = 0, patchlevel = 0}, tracepoint = {name = '\000' <repeats 255 times>,
loglevel = 0, padding = '\000' <repeats 15 times>}, padding = '\000' <repeats 31 times>}}
len = <optimized out>
ops = <optimized out>
args = {channel = {chan_data = 0xd0000000c, wakeup_fd = 5}, stream = {shm_fd = 12, wakeup_fd = 13}, field_list = {entry = {
event_name = "\f\000\000\000\r\000\000\000\005", '\000' <repeats 246 times>, field_name = '\000' <repeats 255 times>,
type = LTTNG_UST_FIELD_OTHER, loglevel = 0, nowrite = 0, padding = '\000' <repeats 283 times>}}}
#8 ust_listener_thread (arg=0x7f71723e2160) at lttng-ust-comm.c:1285
len = <optimized out>
lum = {handle = 0, cmd = 68, padding = '\000' <repeats 31 times>, u = {channel = {len = 0, type = LTTNG_UST_CHAN_PER_CPU,
padding = '\000' <repeats 287 times>, data = 0x7f71709bea04 ""}, stream = {len = 0, stream_nr = 0,
padding = '\000' <repeats 287 times>}, event = {instrumentation = LTTNG_UST_TRACEPOINT, name = '\000' <repeats 255 times>,
loglevel_type = LTTNG_UST_LOGLEVEL_ALL, loglevel = 0, padding = '\000' <repeats 15 times>, u = {
padding = '\000' <repeats 287 times>}}, context = {ctx = LTTNG_UST_CONTEXT_VTID, padding = '\000' <repeats 15 times>, u = {
padding = '\000' <repeats 287 times>}}, version = {major = 0, minor = 0, patchlevel = 0}, tracepoint = {
name = '\000' <repeats 255 times>, loglevel = 0, padding = '\000' <repeats 15 times>}, filter = {data_size = 0, reloc_offset = 0,
seqnum = 0}, exclusion = {count = 0}, padding = '\000' <repeats 31 times>}}
sock_info = 0x7f71723e2160
sock = 3
ret = <optimized out>
prev_connect_failed = 0
has_waited = 0
timeout = 3
__func__ = "ust_listener_thread"
__PRETTY_FUNCTION__ = "ust_listener_thread"
#9 0x00007f71717ebb50 in start_thread (arg=<optimized out>) at pthread_create.c:304
__res = <optimized out>
pd = 0x7f71709bf700
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140125197301504, -4603202139538584103, 140735518955104, 140125197302208, 140125231337536,
3, 4538719390909861337, 4538721291012672985}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0,
cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimized out>
freesize = <optimized out>
__PRETTY_FUNCTION__ = "start_thread"
#10 0x00007f7171eeb0ed in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112
No locals.
#11 0x0000000000000000 in ?? ()
No symbol table info available.
Thread 2 (Thread 0x7f71711c0700 (LWP 27008)):
#0 syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:39
No locals.
#1 0x00007f71721ad5fb in wait_for_sessiond (sock_info=0x7f71723e0120) at lttng-ust-comm.c:1053
ret = <optimized out>
#2 ust_listener_thread (arg=0x7f71723e0120) at lttng-ust-comm.c:1098
sock_info = 0x7f71723e0120
sock = <optimized out>
ret = <optimized out>
prev_connect_failed = <optimized out>
has_waited = 0
timeout = 140125231296512
__func__ = "ust_listener_thread"
__PRETTY_FUNCTION__ = "ust_listener_thread"
#3 0x00007f71717ebb50 in start_thread (arg=<optimized out>) at pthread_create.c:304
__res = <optimized out>
pd = 0x7f71711c0700
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140125205694208, -4603202139538584103, 140735518955104, 140125205694912, 140125231337536,
3, 4538720489884618201, 4538721291012672985}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0,
cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimized out>
freesize = <optimized out>
__PRETTY_FUNCTION__ = "start_thread"
#4 0x00007f7171eeb0ed in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112
No locals.
#5 0x0000000000000000 in ?? ()
No symbol table info available.
Thread 1 (Thread 0x7f7172a2c760 (LWP 27006)):
#0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
No locals.
#1 0x00007f71717ee354 in _L_lock_997 () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#2 0x00007f71717ee1b7 in __pthread_mutex_lock (mutex=0x7f7172a35970) at pthread_mutex_lock.c:82
ignore1 = <optimized out>
ignore2 = <optimized out>
ignore3 = <optimized out>
__PRETTY_FUNCTION__ = "__pthread_mutex_lock"
type = 1923307888
#3 0x00007f717281f9d9 in ?? () from /lib64/ld-linux-x86-64.so.2
No symbol table info available.
#4 0x00007f717281b90c in ?? () from /lib64/ld-linux-x86-64.so.2
No symbol table info available.
#5 0x00007f717281c95a in ?? () from /lib64/ld-linux-x86-64.so.2
No symbol table info available.
#6 0x00007f717282710e in ?? () from /lib64/ld-linux-x86-64.so.2
No symbol table info available.
#7 0x00007f7172822bd6 in ?? () from /lib64/ld-linux-x86-64.so.2
No symbol table info available.
#8 0x00007f7172826b4a in ?? () from /lib64/ld-linux-x86-64.so.2
No symbol table info available.
#9 0x00007f7171f21710 in do_dlopen (ptr=0x7fff8a9d5900) at dl-libc.c:86
No locals.
#10 0x00007f7172822bd6 in ?? () from /lib64/ld-linux-x86-64.so.2
No symbol table info available.
#11 0x00007f7171f217af in dlerror_run (operate=0xffffffffffffffff, args=0x7f7172a35970) at dl-libc.c:47
objname = 0x697f <Address 0x697f out of bounds>
last_errstring = 0x0
malloced = false
result = <optimized out>
#12 0x00007f7171f218a7 in *__GI___libc_dlopen_mode (name=<optimized out>, mode=<optimized out>) at dl-libc.c:160
args = {name = 0x7f71717f676f "libgcc_s.so.1", mode = -2147483647, map = 0x697e}
#13 0x00007f71717f472c in pthread_cancel_init () at ../nptl/sysdeps/pthread/unwind-forcedunwind.c:53
resume = <optimized out>
personality = <optimized out>
forcedunwind = <optimized out>
getcfa = <optimized out>
handle = <optimized out>
#14 0x00007f71717f15f8 in pthread_cancel (th=140125231339888) at pthread_cancel.c:40
pd = 0x7f7172a35970
result = <optimized out>
oldval = <optimized out>
newval = <optimized out>
#15 0x00007f71721a5da8 in lttng_ust_exit () at lttng-ust-comm.c:1514
ret = <optimized out>
__func__ = "lttng_ust_exit"
#16 0x00007f71728234cc in ?? () from /lib64/ld-linux-x86-64.so.2
No symbol table info available.
#17 0x00007f7171e45df2 in __run_exit_handlers (status=0, listp=0x7f71721944c8, run_list_atexit=true) at exit.c:78
atfct = <optimized out>
onfct = <optimized out>
cxafct = 0xfffffffffffffe00
#18 0x00007f7171e45e45 in *__GI_exit (status=1923307888) at exit.c:100
No locals.
#19 0x00007f7171e2deb4 in __libc_start_main (main=<optimized out>, argc=<optimized out>, ubp_av=<optimized out>, init=<optimized out>,
fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7fff8a9d5db8) at libc-start.c:276
result = -512
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {0, -4603202139538584103, 4197860, 140735518956992, 0, 0, 4603014377495072217,
4538722217557233113}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x4013b0, 0x7fff8a9d5dc8}, data = {prev = 0x0,
cleanup = 0x0, canceltype = 4199344}}}
not_first_call = <optimized out>
#20 0x0000000000400e0d in _start ()
MD Updated by Mathieu Desnoyers almost 12 years ago
My guess is that the dynamic linker lock is in a stale state in the parent when doing the call to daemon().
MD Updated by Mathieu Desnoyers almost 12 years ago
Making the feature disabled by default:
commit b11abb674e50c67e3410ab2bd5d0a263e88b73ba
Author: Paul Woegerer <paul_woegerer@mentor.com>
Date: Fri Feb 28 10:02:00 2014 +0100
Turn base address dump into experimental feature
Instead of having base address state dump enabled by default make it
necessary to explicitly enable it with environment variable
LTTNG_UST_WITH_EXPERIMENTAL_BADDR_STATEDUMP.
Documentation is updated to reflect this change and explain about the
experimental nature of this feature.
Signed-off-by: Paul Woegerer <paul_woegerer@mentor.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
MD Updated by Mathieu Desnoyers almost 12 years ago
- Target version changed from 2.4 to 2.5
Target version 2.5, since the feature is now inactive by default.
DG Updated by David Goulet over 11 years ago
- Target version changed from 2.5 to 2.6
PW Updated by Paul Woegerer over 11 years ago
- Status changed from Confirmed to Resolved
- % Done changed from 0 to 100
Applied in changeset ust|commit:458d678c8e42caa810b80a99d5f6a69f24598a37.