Project

General

Profile

Bug #1134

lttng-consumerd: hashtable.c:383: lttng_ht_add_unique_u64: Assertion `node_ptr == &node->node' failed.

Added by Julien Desfossez about 3 years ago. Updated almost 3 years ago.

Status:
Resolved
Priority:
Normal
Assignee:
-
Target version:
-
Start date:
11/08/2017
Due date:
% Done:

100%

Estimated time:

Description

Under a stress test with the rotation session feature, this happened in the consumer and it does not seem related to the rotation feature:

Core was generated by `lttng-consumerd --verbose -k --consumerd-cmd-sock /var/run/lttng/kconsumerd/com'.
Program terminated with signal SIGABRT, Aborted.
#0  0x00007f998dbc6c37 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
56    ../nptl/sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt full
#0  0x00007f998dbc6c37 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
        resultvar = 0
        pid = 16889
        selftid = 16897
#1  0x00007f998dbca028 in __GI_abort () at abort.c:89
        save_stage = 2
        act = {__sigaction_handler = {sa_handler = 0x7ffeebe892fd, sa_sigaction = 0x7ffeebe892fd}, sa_mask = {
            __val = {140297486009668, 4534938, 383, 4294967295, 140297484637523, 0, 140297354835216, 10536098800, 
              140296985908080, 140296985828272, 0, 0, 0, 21474836480, 140297505804288, 140297486024728}}, 
          sa_flags = 4535110, sa_restorer = 0x453550 <__PRETTY_FUNCTION__.5549>}
        sigs = {__val = {32, 0 <repeats 15 times>}}
#2  0x00007f998dbbfbf6 in __assert_fail_base (fmt=0x7f998dd14018 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", 
    assertion=assertion@entry=0x453346 "node_ptr == &node->node", file=file@entry=0x45329a "hashtable.c", 
    line=line@entry=383, function=function@entry=0x453550 <__PRETTY_FUNCTION__.5549> "lttng_ht_add_unique_u64")
    at assert.c:92
        str = 0x7f9970008ff0 "" 
        total = 4096
#3  0x00007f998dbbfca2 in __GI___assert_fail (assertion=0x453346 "node_ptr == &node->node", 
    file=0x45329a "hashtable.c", line=383, function=0x453550 <__PRETTY_FUNCTION__.5549> "lttng_ht_add_unique_u64")
    at assert.c:101
No locals.
#4  0x000000000042d3e9 in lttng_ht_add_unique_u64 (ht=0x1e56e40, node=0x7f9970021368) at hashtable.c:383
        node_ptr = 0x7f997000dbb0
        __PRETTY_FUNCTION__ = "lttng_ht_add_unique_u64" 
#5  0x0000000000410f37 in consumer_add_metadata_stream (stream=0x7f9970021340) at consumer.c:2223
        ht = 0x1e584c0
        ret = 0
        iter = {iter = {node = 0x0, next = 0x0}}
        node = 0x0
        __PRETTY_FUNCTION__ = "consumer_add_metadata_stream" 
        __func__ = "consumer_add_metadata_stream" 
#6  0x0000000000423592 in lttng_kconsumer_recv_cmd (ctx=0x1e59020, sock=46, consumer_sockpoll=0x7f9985ffabd0)
    at kernel-consumer.c:751
        fd = 49
        stream_pipe = 0x1e590d0
        channel = 0x7f997001e070
        alloc_ret = 0
        new_stream = 0x7f9970021340
        ret = 0
        ret_code = LTTCOMM_CONSUMERD_SUCCESS
        msg = {cmd_type = 1, u = {channel = {channel_key = 63, session_id = 62, 
              pathname = '\000' <repeats 4095 times>, uid = 0, gid = 0, relayd_id = 0, nb_init_streams = 0, 
              name = '\000' <repeats 255 times>, output = LTTNG_EVENT_SPLICE, type = 0, tracefile_size = 0, 
              tracefile_count = 0, monitor = 0, live_timer_interval = 0, monitor_timer_interval = 0}, stream = {
              stream_key = 63, channel_key = 62, cpu = 0, no_monitor = 0}, relayd_sock = {net_index = 63, 
              type = (unknown: 62), sock = {sock = {fd = 0, proto = LTTCOMM_SOCK_UDP, sockaddr = {
                    type = LTTCOMM_INET, addr = {sin = {sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, 
                        sin_zero = "\000\000\000\000\000\000\000"}, sin6 = {sin6_family = 0, sin6_port = 0, 
                        sin6_flowinfo = 0, sin6_addr = {__in6_u = {__u6_addr8 = '\000' <repeats 15 times>, 
                            __u6_addr16 = {0, 0, 0, 0, 0, 0, 0, 0}, __u6_addr32 = {0, 0, 0, 0}}}, 
                        sin6_scope_id = 0}}}, ops = 0x0}, major = 0, minor = 0}, session_id = 0, 
              relayd_session_id = 0}, destroy_relayd = {net_seq_idx = 63}, data_pending = {session_id = 63}, 
            ask_channel = {subbuf_size = 63, num_subbuf = 62, overwrite = 0, switch_timer_interval = 0, 
              read_timer_interval = 0, live_timer_interval = 0, monitor_timer_interval = 0, output = 0, type = 0, 
              session_id = 0, pathname = '\000' <repeats 4095 times>, name = '\000' <repeats 255 times>, uid = 0, 
              gid = 0, relayd_id = 0, key = 0, uuid = '\000' <repeats 15 times>, chan_id = 0, tracefile_size = 0, 
              tracefile_count = 0, session_id_per_pid = 0, monitor = 0, ust_app_uid = 0, blocking_timeout = 0, 
              root_shm_path = '\000' <repeats 4095 times>, shm_path = '\000' <repeats 4095 times>}, get_channel = {
              key = 63}, destroy_channel = {key = 63}, push_metadata = {key = 63, target_offset = 62, len = 0, 
              version = 0}, close_metadata = {key = 63}, setup_metadata = {key = 63}, flush_channel = {key = 63}, 
            clear_quiescent_channel = {key = 63}, snapshot_channel = {
              pathname = "?\000\000\000\000\000\000\000>", '\000' <repeats 4086 times>, use_relayd = 0, metadata = 0, 
              relayd_id = 0, key = 0, nb_packets_per_stream = 0}, sent_streams = {channel_key = 63, 
              net_seq_idx = 62}, discarded_events = {session_id = 63, channel_key = 62}, lost_packets = {
              session_id = 63, channel_key = 62}, regenerate_metadata = {session_id = 63}, rotate_channel = {
              pathname = "?\000\000\000\000\000\000\000>", '\000' <repeats 4086 times>, metadata = 0, relayd_id = 0, 
              key = 0, new_chunk_id = 0}, rotate_rename = {
              current_path = "?\000\000\000\000\000\000\000>", '\000' <repeats 4086 times>, 
              new_path = '\000' <repeats 4095 times>, relayd_id = 0, session_id = 0, uid = 0, gid = 0}, 
            rotate_pending_relay = {relayd_id = 63, session_id = 62, chunk_id = 0}, mkdir = {
              path = "?\000\000\000\000\000\000\000>", '\000' <repeats 4086 times>, relayd_id = 0, session_id = 0, 
              uid = 0, gid = 0}}}
        __PRETTY_FUNCTION__ = "lttng_kconsumer_recv_cmd" 
        __func__ = "lttng_kconsumer_recv_cmd" 
#7  0x00000000004108d6 in lttng_consumer_recv_cmd (ctx=0x1e59020, sock=46, consumer_sockpoll=0x7f9985ffabd0)
    at consumer.c:2074
        __PRETTY_FUNCTION__ = "lttng_consumer_recv_cmd" 
#8  0x00000000004152ea in consumer_thread_sessiond_poll (data=0x1e59020) at consumer.c:3429
        sock = 46
        client_socket = -1
        ret = 0
        err = -1
        consumer_sockpoll = {{fd = 28, events = 3, revents = 0}, {fd = 46, events = 3, revents = 1}}
        ctx = 0x1e59020
        __func__ = "consumer_thread_sessiond_poll" 
#9  0x00007f998df61184 in start_thread (arg=0x7f9985ffb700) at pthread_create.c:312
        __res = <optimized out>
        pd = 0x7f9985ffb700
        now = <optimized out>
        unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140297354852096, -6366347262164855394, 0, 0, 140297354852800, 
                140297354852096, 6383073031335581086, 6383055364983100830}, mask_was_saved = 0}}, priv = {pad = {0x0, 
              0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
        not_first_call = <optimized out>
        pagesize_m1 = <optimized out>
        sp = <optimized out>
        freesize = <optimized out>
        __PRETTY_FUNCTION__ = "start_thread" 
#10 0x00007f998dc8dffd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
No locals.

The sessiond was there at the time this was triggered:

#4  0x000000000041516e in consumer_socket_recv (socket=0x7fef68003f70, msg=0x7fef7e7e83c0, len=4) at consumer.c:109
#5  0x00000000004151c3 in consumer_recv_status_reply (sock=0x7fef68003f70) at consumer.c:141
#6  0x0000000000416a00 in consumer_send_msg (sock=0x7fef68003f70, msg=0x7fef7e7e8500) at consumer.c:773
#7  0x0000000000416a6b in consumer_send_channel (sock=0x7fef68003f70, msg=0x7fef7e7e8500) at consumer.c:790
#8  0x000000000041c915 in kernel_consumer_add_channel (sock=0x7fef68003f70, channel=0x7fef68007ad0, 
    ksession=0x7fef6802b330, monitor=1) at kernel-consumer.c:154
#9  0x000000000041d24f in kernel_consumer_send_channel_stream (sock=0x7fef68003f70, channel=0x7fef68007ad0, 
    session=0x7fef6802b330, monitor=1) at kernel-consumer.c:367
#10 0x000000000041d469 in kernel_consumer_send_session (sock=0x7fef68003f70, session=0x7fef6802b330)
    at kernel-consumer.c:429
#11 0x00000000004201db in init_kernel_tracing (session=0x7fef6802b330) at cmd.c:829
#12 0x0000000000420c0b in start_kernel_session (ksess=0x7fef6802b330, wpipe=31) at cmd.c:1133
#13 0x0000000000423147 in cmd_start_trace (session=0x7fef6802eb00) at cmd.c:2513
#14 0x0000000000469699 in process_client_msg (cmd_ctx=0x7fef68006490, sock=57, sock_error=0x7fef7e7fbb28)
    at main.c:3853
#15 0x000000000046bcb8 in thread_manage_clients (data=0x247e5a0) at main.c:4728
#16 0x00007fef86d3d184 in start_thread (arg=0x7fef7e7fc700) at pthread_create.c:312
#17 0x00007fef86a69ffd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111

I have the full log of sessiond and consumerd, but it is huge (9.5G). The core dumps and the logs are available on cloud06:/root/src/lttng-tools/bug-rcu-ht-add-unique/.

#1

Updated by Julien Desfossez about 3 years ago

The problem is with this:

consumer_add_metadata_stream
    lttng_ht_add_unique_u64(consumer_data.stream_per_chan_id_ht,                
        &stream->node_channel_id);  

consumer_add_data_stream
    lttng_ht_add_u64(consumer_data.stream_per_chan_id_ht,                       
            &stream->node_channel_id);

The node_channel_id is the channel_key which is not unique, we just happen to be lucky usually and the metadata_stream is added before the data_stream.

#2

Updated by Julien Desfossez almost 3 years ago

  • Status changed from New to Resolved
  • % Done changed from 0 to 100

Also available in: Atom PDF