Bug #433 » 0001-Fix-health-check-now-validates-deadlock-and-lock-wai.patch
src/bin/lttng-sessiond/cmd.c | ||
---|---|---|
if (session->kernel_session != NULL) {
|
||
cds_list_for_each_entry(kchan,
|
||
&session->kernel_session->channel_list.head, list) {
|
||
health_code_update();
|
||
/* Copy lttng_channel struct to array */
|
||
memcpy(&channels[i], kchan->channel, sizeof(struct lttng_channel));
|
||
channels[i].enabled = kchan->enabled;
|
||
... | ... | |
rcu_read_lock();
|
||
cds_lfht_for_each_entry(session->ust_session->domain_global.channels->ht,
|
||
&iter.iter, uchan, node.node) {
|
||
health_code_update();
|
||
strncpy(channels[i].name, uchan->name, LTTNG_SYMBOL_NAME_LEN);
|
||
channels[i].attr.overwrite = uchan->attr.overwrite;
|
||
channels[i].attr.subbuf_size = uchan->attr.subbuf_size;
|
||
... | ... | |
}
|
||
cds_lfht_for_each_entry(uchan->events->ht, &iter.iter, uevent, node.node) {
|
||
health_code_update();
|
||
strncpy(tmp[i].name, uevent->attr.name, LTTNG_SYMBOL_NAME_LEN);
|
||
tmp[i].name[LTTNG_SYMBOL_NAME_LEN - 1] = '\0';
|
||
tmp[i].enabled = uevent->enabled;
|
||
... | ... | |
/* Kernel channels */
|
||
cds_list_for_each_entry(event, &kchan->events_list.head , list) {
|
||
health_code_update();
|
||
strncpy((*events)[i].name, event->event->name, LTTNG_SYMBOL_NAME_LEN);
|
||
(*events)[i].name[LTTNG_SYMBOL_NAME_LEN - 1] = '\0';
|
||
(*events)[i].enabled = event->enabled;
|
||
... | ... | |
assert(uri);
|
||
health_code_update();
|
||
if (consumer == NULL) {
|
||
DBG("No consumer detected. Don't add URI. Stopping.");
|
||
ret = LTTNG_ERR_NO_CONSUMER;
|
||
... | ... | |
}
|
||
error:
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
/* Code flow error */
|
||
assert(socket->fd >= 0);
|
||
pthread_mutex_lock(socket->lock);
|
||
consumer_socket_lock(socket);
|
||
ret = kernel_consumer_send_session(socket, session);
|
||
pthread_mutex_unlock(socket->lock);
|
||
consumer_socket_unlock(socket);
|
||
if (ret < 0) {
|
||
ret = LTTNG_ERR_KERN_CONSUMER_FAIL;
|
||
goto error;
|
||
... | ... | |
int ret;
|
||
struct lttcomm_sock *sock;
|
||
health_code_update();
|
||
/* Create socket object from URI */
|
||
sock = lttcomm_alloc_sock_from_uri(uri);
|
||
if (sock == NULL) {
|
||
... | ... | |
}
|
||
/* Connect to relayd so we can proceed with a session creation. */
|
||
health_poll_entry();
|
||
ret = relayd_connect(sock);
|
||
health_poll_exit();
|
||
if (ret < 0) {
|
||
ERR("Unable to reach lttng-relayd");
|
||
ret = LTTNG_ERR_RELAYD_CONNECT_FAIL;
|
||
... | ... | |
DBG3("Creating relayd stream socket from URI");
|
||
/* Check relayd version */
|
||
health_poll_entry();
|
||
ret = relayd_version_check(sock, RELAYD_VERSION_COMM_MAJOR,
|
||
RELAYD_VERSION_COMM_MINOR);
|
||
health_poll_exit();
|
||
if (ret < 0) {
|
||
ret = LTTNG_ERR_RELAYD_VERSION_FAIL;
|
||
goto close_sock;
|
||
... | ... | |
*relayd_sock = sock;
|
||
health_code_update();
|
||
return LTTNG_OK;
|
||
close_sock:
|
||
... | ... | |
lttcomm_destroy_sock(sock);
|
||
}
|
||
error:
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
int ret;
|
||
struct lttcomm_sock *sock = NULL;
|
||
health_code_update();
|
||
/* Set the network sequence index if not set. */
|
||
if (consumer->net_seq_index == -1) {
|
||
/*
|
||
... | ... | |
lttcomm_destroy_sock(sock);
|
||
}
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
/* Code flow error */
|
||
assert(socket->fd >= 0);
|
||
pthread_mutex_lock(socket->lock);
|
||
consumer_socket_lock(socket);
|
||
ret = send_consumer_relayd_sockets(LTTNG_DOMAIN_UST, session,
|
||
usess->consumer, socket);
|
||
pthread_mutex_unlock(socket->lock);
|
||
consumer_socket_unlock(socket);
|
||
if (ret != LTTNG_OK) {
|
||
goto error;
|
||
}
|
||
... | ... | |
/* Code flow error */
|
||
assert(socket->fd >= 0);
|
||
pthread_mutex_lock(socket->lock);
|
||
consumer_socket_lock(socket);
|
||
ret = send_consumer_relayd_sockets(LTTNG_DOMAIN_KERNEL, session,
|
||
ksess->consumer, socket);
|
||
pthread_mutex_unlock(socket->lock);
|
||
consumer_socket_unlock(socket);
|
||
if (ret != LTTNG_OK) {
|
||
goto error;
|
||
}
|
||
... | ... | |
int ret;
|
||
struct ltt_kernel_channel *kchan;
|
||
health_code_update();
|
||
/* Open kernel metadata */
|
||
if (ksess->metadata == NULL) {
|
||
ret = kernel_open_metadata(ksess);
|
||
... | ... | |
/* For each channel */
|
||
cds_list_for_each_entry(kchan, &ksess->channel_list.head, list) {
|
||
health_code_update();
|
||
if (kchan->stream_count == 0) {
|
||
ret = kernel_open_channel_stream(kchan);
|
||
if (ret < 0) {
|
||
... | ... | |
ret = LTTNG_OK;
|
||
error:
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
usess = session->ust_session;
|
||
rcu_read_lock();
|
||
health_code_update();
|
||
switch (domain) {
|
||
case LTTNG_DOMAIN_KERNEL:
|
||
... | ... | |
ret = LTTNG_OK;
|
||
error:
|
||
health_code_update();
|
||
rcu_read_unlock();
|
||
return ret;
|
||
}
|
||
... | ... | |
DBG("Enabling channel %s for session %s", attr->name, session->name);
|
||
rcu_read_lock();
|
||
health_code_update();
|
||
switch (domain) {
|
||
case LTTNG_DOMAIN_KERNEL:
|
||
... | ... | |
}
|
||
error:
|
||
health_code_update();
|
||
rcu_read_unlock();
|
||
return ret;
|
||
}
|
||
... | ... | |
int ret;
|
||
rcu_read_lock();
|
||
health_code_update();
|
||
switch (domain) {
|
||
case LTTNG_DOMAIN_KERNEL:
|
||
... | ... | |
ret = LTTNG_OK;
|
||
error:
|
||
health_code_update();
|
||
rcu_read_unlock();
|
||
return ret;
|
||
}
|
||
... | ... | |
int ret;
|
||
rcu_read_lock();
|
||
health_code_update();
|
||
switch (domain) {
|
||
case LTTNG_DOMAIN_KERNEL:
|
||
... | ... | |
ret = LTTNG_OK;
|
||
error:
|
||
health_code_update();
|
||
rcu_read_unlock();
|
||
return ret;
|
||
}
|
||
... | ... | |
{
|
||
int ret;
|
||
health_code_update();
|
||
switch (domain) {
|
||
case LTTNG_DOMAIN_KERNEL:
|
||
assert(session->kernel_session);
|
||
... | ... | |
ret = LTTNG_OK;
|
||
error:
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
assert(channel_name);
|
||
rcu_read_lock();
|
||
health_code_update();
|
||
switch (domain) {
|
||
case LTTNG_DOMAIN_KERNEL:
|
||
... | ... | |
ret = LTTNG_OK;
|
||
error:
|
||
health_code_update();
|
||
rcu_read_unlock();
|
||
return ret;
|
||
}
|
||
... | ... | |
assert(channel_name);
|
||
rcu_read_lock();
|
||
health_code_update();
|
||
switch (domain) {
|
||
case LTTNG_DOMAIN_KERNEL:
|
||
... | ... | |
ret = LTTNG_OK;
|
||
error:
|
||
health_code_update();
|
||
rcu_read_unlock();
|
||
return ret;
|
||
}
|
||
... | ... | |
int ret;
|
||
ssize_t nb_events = 0;
|
||
health_code_update();
|
||
switch (domain) {
|
||
case LTTNG_DOMAIN_KERNEL:
|
||
nb_events = kernel_list_events(kernel_tracer_fd, events);
|
||
... | ... | |
goto error;
|
||
}
|
||
health_code_update();
|
||
return nb_events;
|
||
error:
|
||
health_code_update();
|
||
/* Return negative value to differentiate return code */
|
||
return -ret;
|
||
}
|
||
... | ... | |
int ret;
|
||
ssize_t nb_fields = 0;
|
||
health_code_update();
|
||
switch (domain) {
|
||
case LTTNG_DOMAIN_UST:
|
||
nb_fields = ust_app_list_event_fields(fields);
|
||
... | ... | |
goto error;
|
||
}
|
||
health_code_update();
|
||
return nb_fields;
|
||
error:
|
||
health_code_update();
|
||
/* Return negative value to differentiate return code */
|
||
return -ret;
|
||
}
|
||
... | ... | |
assert(session);
|
||
health_code_update();
|
||
/* Ease our life a bit ;) */
|
||
ksession = session->kernel_session;
|
||
usess = session->ust_session;
|
||
... | ... | |
ret = LTTNG_OK;
|
||
error:
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
assert(session);
|
||
health_code_update();
|
||
/* Short cut */
|
||
ksession = session->kernel_session;
|
||
usess = session->ust_session;
|
||
... | ... | |
/* Flush all buffers before stopping */
|
||
cds_list_for_each_entry(kchan, &ksession->channel_list.head, list) {
|
||
health_code_update();
|
||
ret = kernel_flush_buffer(kchan);
|
||
if (ret < 0) {
|
||
ERR("Kernel flush buffer error");
|
||
... | ... | |
ret = LTTNG_OK;
|
||
error:
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
assert(uris);
|
||
assert(nb_uri > 0);
|
||
health_code_update();
|
||
/* Can't enable consumer after session started. */
|
||
if (session->enabled) {
|
||
ret = LTTNG_ERR_TRACE_ALREADY_STARTED;
|
||
... | ... | |
/* A socket in the HT should never have a negative fd */
|
||
assert(socket->fd >= 0);
|
||
pthread_mutex_lock(socket->lock);
|
||
consumer_socket_lock(socket);
|
||
ret = send_consumer_relayd_socket(domain, session, &uris[i],
|
||
consumer, socket);
|
||
pthread_mutex_unlock(socket->lock);
|
||
consumer_socket_unlock(socket);
|
||
if (ret != LTTNG_OK) {
|
||
rcu_read_unlock();
|
||
goto error;
|
||
... | ... | |
ret = LTTNG_OK;
|
||
error:
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
assert(name);
|
||
health_code_update();
|
||
/*
|
||
* Verify if the session already exist
|
||
*
|
||
... | ... | |
session->consumer->enabled = 1;
|
||
end:
|
||
health_code_update();
|
||
return LTTNG_OK;
|
||
consumer_error:
|
||
session_destroy(session);
|
||
session_error:
|
||
find_error:
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
/* Safety net */
|
||
assert(session);
|
||
health_code_update();
|
||
usess = session->ust_session;
|
||
ksess = session->kernel_session;
|
||
... | ... | |
ret = session_destroy(session);
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
{
|
||
int ret;
|
||
health_code_update();
|
||
switch (domain) {
|
||
case LTTNG_DOMAIN_KERNEL:
|
||
{
|
||
... | ... | |
ret = LTTNG_OK;
|
||
error:
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
assert(cdata);
|
||
assert(sock_path);
|
||
health_code_update();
|
||
switch (domain) {
|
||
case LTTNG_DOMAIN_KERNEL:
|
||
{
|
||
... | ... | |
goto error;
|
||
}
|
||
health_poll_entry();
|
||
sock = lttcomm_connect_unix_sock(sock_path);
|
||
health_poll_exit();
|
||
if (sock < 0) {
|
||
ret = LTTNG_ERR_CONNECT_FAIL;
|
||
goto error;
|
||
... | ... | |
goto error;
|
||
}
|
||
socket->lock = zmalloc(sizeof(pthread_mutex_t));
|
||
if (socket->lock == NULL) {
|
||
socket->_lock = zmalloc(sizeof(pthread_mutex_t));
|
||
if (socket->_lock == NULL) {
|
||
PERROR("zmalloc pthread mutex");
|
||
ret = LTTNG_ERR_FATAL;
|
||
goto error;
|
||
}
|
||
pthread_mutex_init(socket->lock, NULL);
|
||
pthread_mutex_init(socket->_lock, NULL);
|
||
socket->registered = 1;
|
||
rcu_read_lock();
|
||
... | ... | |
ret = LTTNG_OK;
|
||
error:
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
int ret, index = 0;
|
||
ssize_t nb_dom = 0;
|
||
health_code_update();
|
||
if (session->kernel_session != NULL) {
|
||
DBG3("Listing domains found kernel domain");
|
||
nb_dom++;
|
||
... | ... | |
index++;
|
||
}
|
||
health_code_update();
|
||
return nb_dom;
|
||
error:
|
||
health_code_update();
|
||
/* Return negative value to differentiate return code */
|
||
return -ret;
|
||
}
|
||
... | ... | |
int ret;
|
||
ssize_t nb_chan = 0;
|
||
health_code_update();
|
||
switch (domain) {
|
||
case LTTNG_DOMAIN_KERNEL:
|
||
if (session->kernel_session != NULL) {
|
||
... | ... | |
goto error;
|
||
}
|
||
health_code_update();
|
||
return nb_chan;
|
||
error:
|
||
health_code_update();
|
||
/* Return negative value to differentiate return code */
|
||
return -ret;
|
||
}
|
||
... | ... | |
int ret = 0;
|
||
ssize_t nb_event = 0;
|
||
health_code_update();
|
||
switch (domain) {
|
||
case LTTNG_DOMAIN_KERNEL:
|
||
if (session->kernel_session != NULL) {
|
||
... | ... | |
goto error;
|
||
}
|
||
health_code_update();
|
||
return nb_event;
|
||
error:
|
||
health_code_update();
|
||
/* Return negative value to differentiate return code */
|
||
return -ret;
|
||
}
|
||
... | ... | |
DBG("Getting all available session for UID %d GID %d",
|
||
uid, gid);
|
||
health_code_update();
|
||
/*
|
||
* Iterate over session list and append data after the control struct in
|
||
* the buffer.
|
||
*/
|
||
cds_list_for_each_entry(session, &list->head, list) {
|
||
health_code_update();
|
||
/*
|
||
* Only list the sessions the user can control.
|
||
*/
|
||
... | ... | |
assert(session);
|
||
health_code_update();
|
||
if (session->enabled) {
|
||
/* Can't disable consumer on an already started session */
|
||
ret = LTTNG_ERR_TRACE_ALREADY_STARTED;
|
||
... | ... | |
}
|
||
error:
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
assert(session);
|
||
health_code_update();
|
||
/* Can't enable consumer after session started. */
|
||
if (session->enabled) {
|
||
ret = LTTNG_ERR_TRACE_ALREADY_STARTED;
|
||
... | ... | |
}
|
||
error:
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
assert(session);
|
||
health_code_update();
|
||
/* Session MUST be stopped to ask for data availability. */
|
||
if (session->enabled) {
|
||
ret = LTTNG_ERR_SESSION_STARTED;
|
||
... | ... | |
ret = 0;
|
||
error:
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
src/bin/lttng-sessiond/consumer.c | ||
---|---|---|
#include <common/uri.h>
|
||
#include "consumer.h"
|
||
#include "health.h"
|
||
void _consumer_socket_lock(struct consumer_socket *socket,
|
||
const char *func, const char *file, unsigned int line)
|
||
{
|
||
health_lock_wait(&socket->lock_object, func, file, line);
|
||
pthread_mutex_lock(socket->_lock);
|
||
health_lock_take(&socket->lock_object, func, file, line);
|
||
}
|
||
void consumer_socket_unlock(struct consumer_socket *socket)
|
||
{
|
||
health_lock_release(&socket->lock_object);
|
||
pthread_mutex_unlock(socket->_lock);
|
||
}
|
||
/*
|
||
* Receive a reply command status message from the consumer. Consumer socket
|
||
... | ... | |
assert(sock);
|
||
health_poll_entry();
|
||
ret = lttcomm_recv_unix_sock(sock->fd, &reply, sizeof(reply));
|
||
health_poll_exit();
|
||
if (ret <= 0) {
|
||
if (ret == 0) {
|
||
/* Orderly shutdown. Don't return 0 which means success. */
|
||
... | ... | |
msg.cmd_type = LTTNG_CONSUMER_DESTROY_RELAYD;
|
||
msg.u.destroy_relayd.net_seq_idx = consumer->net_seq_index;
|
||
pthread_mutex_lock(sock->lock);
|
||
consumer_socket_lock(sock);
|
||
ret = lttcomm_send_unix_sock(sock->fd, &msg, sizeof(msg));
|
||
if (ret < 0) {
|
||
/* Indicate that the consumer is probably closing at this point. */
|
||
... | ... | |
DBG2("Consumer send destroy relayd command done");
|
||
error_send:
|
||
pthread_mutex_unlock(sock->lock);
|
||
consumer_socket_unlock(sock);
|
||
error:
|
||
return ret;
|
||
}
|
||
... | ... | |
}
|
||
socket->registered = 0;
|
||
socket->lock = &data->lock;
|
||
socket->_lock = &data->lock;
|
||
rcu_read_lock();
|
||
consumer_add_socket(socket, output);
|
||
rcu_read_unlock();
|
||
... | ... | |
}
|
||
copy_sock->registered = socket->registered;
|
||
copy_sock->lock = socket->lock;
|
||
copy_sock->_lock = socket->_lock;
|
||
consumer_add_socket(copy_sock, output);
|
||
}
|
||
rcu_read_unlock();
|
||
... | ... | |
assert(consumer);
|
||
assert(consumer_sock);
|
||
health_code_update();
|
||
/* Bail out if consumer is disabled */
|
||
if (!consumer->enabled) {
|
||
ret = LTTNG_OK;
|
||
... | ... | |
DBG2("Consumer relayd socket sent");
|
||
error:
|
||
health_code_update();
|
||
return ret;
|
||
}
|
||
... | ... | |
/* Code flow error */
|
||
assert(socket->fd >= 0);
|
||
pthread_mutex_lock(socket->lock);
|
||
consumer_socket_lock(socket);
|
||
ret = lttcomm_send_unix_sock(socket->fd, &msg, sizeof(msg));
|
||
if (ret < 0) {
|
||
/* The above call will print a PERROR on error. */
|
||
DBG("Error on consumer is data pending on sock %d", socket->fd);
|
||
pthread_mutex_unlock(socket->lock);
|
||
consumer_socket_unlock(socket);
|
||
goto error_unlock;
|
||
}
|
||
... | ... | |
* the reply status message.
|
||
*/
|
||
health_poll_entry();
|
||
ret = lttcomm_recv_unix_sock(socket->fd, &ret_code, sizeof(ret_code));
|
||
health_poll_exit();
|
||
if (ret <= 0) {
|
||
if (ret == 0) {
|
||
/* Orderly shutdown. Don't return 0 which means success. */
|
||
... | ... | |
}
|
||
/* The above call will print a PERROR on error. */
|
||
DBG("Error on recv consumer is data pending on sock %d", socket->fd);
|
||
pthread_mutex_unlock(socket->lock);
|
||
consumer_socket_unlock(socket);
|
||
goto error_unlock;
|
||
}
|
||
pthread_mutex_unlock(socket->lock);
|
||
consumer_socket_unlock(socket);
|
||
if (ret_code == 1) {
|
||
break;
|
src/bin/lttng-sessiond/consumer.h | ||
---|---|---|
int fd;
|
||
/*
|
||
* To use this socket (send/recv), this lock MUST be acquired.
|
||
* Use consumer_socket_lock()/unlock() to access lock.
|
||
*/
|
||
pthread_mutex_t *lock;
|
||
pthread_mutex_t *_lock;
|
||
/* Lock owner */
|
||
struct health_object lock_object;
|
||
/*
|
||
* Indicates if the socket was registered by a third part
|
||
... | ... | |
unsigned int nb_init_streams);
|
||
int consumer_is_data_pending(unsigned int id,
|
||
struct consumer_output *consumer);
|
||
void _consumer_socket_lock(struct consumer_socket *socket,
|
||
const char *func, const char *file, unsigned int line);
|
||
void consumer_socket_unlock(struct consumer_socket *socket);
|
||
#define consumer_socket_lock(socket) \
|
||
_consumer_socket_lock(socket, __func__, __FILE__, __LINE__)
|
||
#endif /* _CONSUMER_H */
|
src/bin/lttng-sessiond/health.c | ||
---|---|---|
return 0;
|
||
}
|
||
static void print_lock_wait_chain(struct health_state *state)
|
||
{
|
||
struct health_object *object;
|
||
object = state->wait.object;
|
||
for (;;) {
|
||
struct health_state *owner;
|
||
if (!object) {
|
||
break;
|
||
}
|
||
owner = object->owner;
|
||
if (!owner) {
|
||
break;
|
||
}
|
||
ERR("[deadlock chain] waiting at %s()@%s:%u on lock %p "
|
||
"held at %s()@%s:%u\n", owner->wait.func, owner->wait.file,
|
||
owner->wait.line, object, object->func, object->file,
|
||
object->line);
|
||
if (owner == state) {
|
||
break;
|
||
}
|
||
object = owner->wait.object;
|
||
}
|
||
}
|
||
/*
|
||
* Check if a given thread is in a deadlock situation.
|
||
* state_lock() needs to be held while calling this function.
|
||
*
|
||
* Return 0 if health is bad (deadlock), 1 if we are waiting for a lock (so our
|
||
* progress does not need to be checked), -1 if we are not waiting for a lock
|
||
* (so our progress needs to be checked).
|
||
*/
|
||
static int validate_state_no_deadlock(struct health_state *state)
|
||
{
|
||
struct health_object *object;
|
||
int retval = 1;
|
||
/*
|
||
* If thread is waiting for a lock, it means another thread has the lock.
|
||
* We will necessarily check for this other thread's progress (assuming it
|
||
* is properly registered), except in one case: if we have a circular
|
||
* dependency between locks (deadlock). Therefore, check for this case by
|
||
* following the lock chain and checking if we have a loop. If we are
|
||
* holding a lock, and if there is no circular dependency within lock
|
||
* chain, we can report health OK for ourself and let checks on the thread
|
||
* we are waiting for report progress. Iteration on this list is protected
|
||
* by state_lock()/unlock().
|
||
*/
|
||
object = state->wait.object;
|
||
if (!object) {
|
||
retval = -1;
|
||
goto end;
|
||
}
|
||
for (;;) {
|
||
struct health_state *owner;
|
||
/*
|
||
* If we (or any thread in our dependency chain) are waiting on a lock
|
||
* that has no owner, it means we are in a short race during which it
|
||
* can become taken or released very soon. Let the next check find out
|
||
* which it is.
|
||
*/
|
||
owner = object->owner;
|
||
if (!owner) {
|
||
break;
|
||
}
|
||
if (owner == state) {
|
||
ERR("Deadlock detected");
|
||
print_lock_wait_chain(state);
|
||
retval = 0;
|
||
break;
|
||
}
|
||
/* Check wait-state of next thread in chain. */
|
||
object = owner->wait.object;
|
||
/* NULL object: next thread not waiting on a lock. */
|
||
if (!object) {
|
||
break;
|
||
}
|
||
}
|
||
end:
|
||
return retval;
|
||
}
|
||
/*
|
||
* Validate health state. Checks for the error flag or health conditions.
|
||
* state_lock() needs to be held while calling this function.
|
||
*
|
||
* Return 0 if health is bad or else 1.
|
||
*/
|
||
... | ... | |
goto end;
|
||
}
|
||
ret = validate_state_no_deadlock(state);
|
||
switch (ret) {
|
||
case -1:
|
||
/* not waiting for lock */
|
||
break;
|
||
case 0:
|
||
retval = 0;
|
||
/* deadlock */
|
||
goto end;
|
||
case 1:
|
||
retval = 1;
|
||
/* skip progress check, we are waiting for a lock */
|
||
goto end;
|
||
}
|
||
/*
|
||
* Initial condition need to update the last counter and sample time, but
|
||
* should not check health in this initial case, because we don't know how
|
||
... | ... | |
cds_list_del(&URCU_TLS(health_state).node);
|
||
state_unlock();
|
||
}
|
||
/*
|
||
* Need to hold state lock to protect against list traversal.
|
||
*/
|
||
void health_lock_wait(struct health_object *object,
|
||
const char *func, const char *file, unsigned int line)
|
||
{
|
||
struct health_thread_wait *wait;
|
||
wait = &URCU_TLS(health_state).wait;
|
||
state_lock();
|
||
wait->object = object;
|
||
wait->func = func;
|
||
wait->file = file;
|
||
wait->line = line;
|
||
assert(validate_state_no_deadlock(&URCU_TLS(health_state)) > 0);
|
||
state_unlock();
|
||
}
|
||
/*
|
||
* Need to hold state lock to protect against list traversal.
|
||
*/
|
||
void health_lock_take(struct health_object *object,
|
||
const char *func, const char *file, unsigned int line)
|
||
{
|
||
struct health_thread_wait *wait;
|
||
wait = &URCU_TLS(health_state).wait;
|
||
state_lock();
|
||
wait->object = NULL;
|
||
wait->func = NULL;
|
||
wait->file = NULL;
|
||
wait->line = 0;
|
||
object->owner = &URCU_TLS(health_state);
|
||
object->func = func;
|
||
object->file = file;
|
||
object->line = line;
|
||
state_unlock();
|
||
}
|
||
/*
|
||
* Need to hold state lock to protect against list traversal.
|
||
*/
|
||
void health_lock_release(struct health_object *object)
|
||
{
|
||
state_lock();
|
||
object->owner = NULL;
|
||
object->func = NULL;
|
||
object->file = NULL;
|
||
object->line = 0;
|
||
state_unlock();
|
||
}
|
src/bin/lttng-sessiond/health.h | ||
---|---|---|
struct cds_list_head head;
|
||
};
|
||
/*
|
||
* The owner pointer is typically updated by a thread after it successfully
|
||
* acquire a lock providing mutual exclusion to a critical section associated
|
||
* with the struct health_wait object. Please note that if a health check is
|
||
* performed between the lock acquisition and setting the ownership of the
|
||
* object, it will see that the lock is not acquired while it actually is. This
|
||
* short race window does not matter, because the purpose of health check is to
|
||
* detect stalls over long periods of time.
|
||
*/
|
||
struct health_object {
|
||
struct health_state *owner; /* current owner of the object */
|
||
/* ownership taken at site */
|
||
const char *func;
|
||
const char *file;
|
||
unsigned int line;
|
||
};
|
||
struct health_thread_wait {
|
||
struct health_object *object; /* waiting on object, or NULL */
|
||
/* waiting at site */
|
||
const char *func;
|
||
const char *file;
|
||
unsigned int line;
|
||
};
|
||
struct health_state {
|
||
/*
|
||
* last counter and last_time are only read and updated by the health_check
|
||
... | ... | |
unsigned long current; /* progress counter, updated atomically */
|
||
enum health_flags flags; /* other flags, updated atomically */
|
||
enum health_type type; /* Indicates the nature of the thread. */
|
||
struct health_thread_wait wait;
|
||
/* Node of the global TLS state list. */
|
||
struct cds_list_head node;
|
||
};
|
||
... | ... | |
uatomic_add(&URCU_TLS(health_state).current, HEALTH_CODE_VALUE);
|
||
}
|
||
void health_lock_wait(struct health_object *object,
|
||
const char *func, const char *file, unsigned int line);
|
||
void health_lock_take(struct health_object *object,
|
||
const char *func, const char *file, unsigned int line);
|
||
void health_lock_release(struct health_object *object);
|
||
/*
|
||
* Set health "error" flag.
|
||
*/
|
src/bin/lttng-sessiond/main.c | ||
---|---|---|
/* Code flow error */
|
||
assert(socket->fd >= 0);
|
||
pthread_mutex_lock(socket->lock);
|
||
consumer_socket_lock(socket);
|
||
ret = kernel_consumer_send_channel_stream(socket,
|
||
channel, ksess);
|
||
pthread_mutex_unlock(socket->lock);
|
||
consumer_socket_unlock(socket);
|
||
if (ret < 0) {
|
||
rcu_read_unlock();
|
||
goto error;
|
||
... | ... | |
goto error;
|
||
}
|
||
health_code_update();
|
||
/* Setting up the consumer_data pid */
|
||
consumer_data->pid = ret;
|
||
DBG2("Consumer pid %d", consumer_data->pid);
|
||
... | ... | |
goto error;
|
||
}
|
||
health_code_update();
|
||
end:
|
||
return 0;
|
||
... | ... | |
/* Create directory(ies) on local filesystem. */
|
||
if (session->kernel_session->consumer->type == CONSUMER_DST_LOCAL &&
|
||
strlen(session->kernel_session->consumer->dst.trace_path) > 0) {
|
||
health_code_update();
|
||
ret = run_as_mkdir_recursive(
|
||
session->kernel_session->consumer->dst.trace_path,
|
||
S_IRWXU | S_IRWXG, session->uid, session->gid);
|
||
... | ... | |
}
|
||
skip_domain:
|
||
health_code_update();
|
||
/* Validate consumer daemon state when start/stop trace command */
|
||
if (cmd_ctx->lsm->cmd_type == LTTNG_START_TRACE ||
|
||
cmd_ctx->lsm->cmd_type == LTTNG_STOP_TRACE) {
|
||
... | ... | |
/* Receive variable len data */
|
||
DBG("Receiving %zu URI(s) from client ...", nb_uri);
|
||
health_poll_entry();
|
||
ret = lttcomm_recv_unix_sock(sock, uris, len);
|
||
health_poll_exit();
|
||
if (ret <= 0) {
|
||
DBG("No URIs received from client... continuing");
|
||
*sock_error = 1;
|
||
... | ... | |
/* Receive variable len data */
|
||
DBG("Waiting for %zu URIs from client ...", nb_uri);
|
||
health_poll_entry();
|
||
ret = lttcomm_recv_unix_sock(sock, uris, len);
|
||
health_poll_exit();
|
||
if (ret <= 0) {
|
||
DBG("No URIs received from client... continuing");
|
||
*sock_error = 1;
|
||
... | ... | |
}
|
||
/* Receive var. len. data */
|
||
DBG("Receiving var len data from client ...");
|
||
health_poll_entry();
|
||
ret = lttcomm_recv_unix_sock(sock, bytecode,
|
||
cmd_ctx->lsm->u.enable.bytecode_len);
|
||
health_poll_exit();
|
||
if (ret <= 0) {
|
||
DBG("Nothing recv() from client var len data... continuing");
|
||
*sock_error = 1;
|
src/bin/lttng-sessiond/session.c | ||
---|---|---|
#include <common/common.h>
|
||
#include <common/sessiond-comm/sessiond-comm.h>
|
||
#include "health.h"
|
||
#include "session.h"
|
||
/*
|
||
... | ... | |
/*
|
||
* Acquire session list lock
|
||
*/
|
||
void session_lock_list(void)
|
||
void _session_lock_list(const char *func, const char *file, unsigned int line)
|
||
{
|
||
/*
|
||
* Trying to lock this session list can take an arbitrary amount of time so
|
||
* let's set the health poll execution state.
|
||
*/
|
||
health_lock_wait(<t_session_list.lock_object, func, file, line);
|
||
pthread_mutex_lock(<t_session_list.lock);
|
||
health_lock_take(<t_session_list.lock_object, func, file, line);
|
||
}
|
||
/*
|
||
... | ... | |
*/
|
||
void session_unlock_list(void)
|
||
{
|
||
health_lock_release(<t_session_list.lock_object);
|
||
pthread_mutex_unlock(<t_session_list.lock);
|
||
}
|
||
/*
|
||
* Acquire session lock
|
||
*/
|
||
void session_lock(struct ltt_session *session)
|
||
void _session_lock(struct ltt_session *session,
|
||
const char *func, const char *file, unsigned int line)
|
||
{
|
||
assert(session);
|
||
health_lock_wait(&session->lock_object, func, file, line);
|
||
pthread_mutex_lock(&session->lock);
|
||
health_lock_take(&session->lock_object, func, file, line);
|
||
}
|
||
/*
|
||
... | ... | |
*/
|
||
void session_unlock(struct ltt_session *session)
|
||
{
|
||
assert(session);
|
||
health_lock_release(&session->lock_object);
|
||
pthread_mutex_unlock(&session->lock);
|
||
}
|
||
src/bin/lttng-sessiond/session.h | ||
---|---|---|
#include "trace-kernel.h"
|
||
#include "trace-ust.h"
|
||
#include "health.h"
|
||
/*
|
||
* Tracing session list
|
||
... | ... | |
* iterate or/and do any actions on that list.
|
||
*/
|
||
pthread_mutex_t lock;
|
||
/* Lock owner */
|
||
struct health_object lock_object;
|
||
/*
|
||
* Session unique ID generator. The session list lock MUST be
|
||
... | ... | |
* session_lock() and session_unlock() for that.
|
||
*/
|
||
pthread_mutex_t lock;
|
||
/* Lock owner */
|
||
struct health_object lock_object;
|
||
struct cds_list_head list;
|
||
int enabled; /* enabled/started flag */
|
||
unsigned int id; /* session unique identifier */
|
||
... | ... | |
int session_create(char *name, char *path, uid_t uid, gid_t gid);
|
||
int session_destroy(struct ltt_session *session);
|
||
void session_lock(struct ltt_session *session);
|
||
void session_lock_list(void);
|
||
void _session_lock(struct ltt_session *session,
|
||
const char *func, const char *file, unsigned int line);
|
||
void _session_lock_list(const char *func, const char *file, unsigned int line);
|
||
void session_unlock(struct ltt_session *session);
|
||
void session_unlock_list(void);
|
||
#define session_lock(session) \
|
||
_session_lock(session, __func__, __FILE__, __LINE__)
|
||
#define session_lock_list() \
|
||
_session_lock_list(__func__, __FILE__, __LINE__)
|
||
struct ltt_session *session_find_by_name(char *name);
|
||
struct ltt_session_list *session_get_list(void);
|
||
src/bin/lttng-sessiond/ust-consumer.c | ||
---|---|---|
}
|
||
cds_list_for_each_entry_safe(stream, tmp, &uchan->streams.head, list) {
|
||
health_code_update();
|
||
if (!stream->obj->shm_fd) {
|
||
continue;
|
||
}
|
||
... | ... | |
DBG("Sending metadata stream fd to consumer on %d", sock->fd);
|
||
pthread_mutex_lock(sock->lock);
|
||
consumer_socket_lock(sock);
|
||
/* Sending metadata information to the consumer */
|
||
ret = send_metadata(sock, usess, consumer);
|
||
... | ... | |
ret = 0;
|
||
error:
|
||
pthread_mutex_unlock(sock->lock);
|
||
consumer_socket_unlock(sock);
|
||
return ret;
|
||
}
|
tests/tools/Makefile.am | ||
---|---|---|
UTILS=../utils.h
|
||
SESSIONS=$(top_srcdir)/src/bin/lttng-sessiond/session.c \
|
||
$(top_srcdir)/src/bin/lttng-sessiond/consumer.c \
|
||
$(top_srcdir)/src/bin/lttng-sessiond/health.c \
|
||
$(top_srcdir)/src/common/uri.c \
|
||
$(top_srcdir)/src/common/utils.c \
|
||
$(top_srcdir)/src/common/error.c
|
||
KERN_DATA_TRACE=$(top_srcdir)/src/bin/lttng-sessiond/trace-kernel.c \
|
||
$(top_srcdir)/src/bin/lttng-sessiond/consumer.c \
|
||
$(top_srcdir)/src/bin/lttng-sessiond/health.c \
|
||
$(top_srcdir)/src/common/uri.c \
|
||
$(top_srcdir)/src/common/utils.c
|
||
COMMON=$(top_builddir)/src/common/libcommon.la
|
||
COMMON=$(top_builddir)/src/common/libcommon.la -lrt
|
||
HASHTABLE=$(top_builddir)/src/common/hashtable/libhashtable.la
|
||
SESSIOND_COMM=$(top_builddir)/src/common/sessiond-comm/libsessiond-comm.la
|
||
... | ... | |
noinst_PROGRAMS += test_ust_data_trace
|
||
UST_DATA_TRACE=$(top_srcdir)/src/bin/lttng-sessiond/trace-ust.c \
|
||
$(top_srcdir)/src/bin/lttng-sessiond/consumer.c \
|
||
$(top_srcdir)/src/bin/lttng-sessiond/health.c \
|
||
$(top_srcdir)/src/common/uri.c \
|
||
$(top_srcdir)/src/common/utils.c
|
||
# UST trace data unit tests
|