X-Git-Url: http://git.samba.org/?a=blobdiff_plain;f=server%2Fctdb_daemon.c;h=9c650a084f1207cd6bfda7fd99273a9285b75bb6;hb=c562874b9d5caff40f79b2857db8e9a5eb196627;hp=275a2071534ad1695f8de58e3e042c084722ce60;hpb=944434eb6420774e42e58984c6ddaa326a6853bd;p=sahlberg%2Fctdb.git diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c index 275a2071..9c650a08 100644 --- a/server/ctdb_daemon.c +++ b/server/ctdb_daemon.c @@ -20,12 +20,12 @@ #include "includes.h" #include "db_wrap.h" #include "lib/tdb/include/tdb.h" -#include "lib/events/events.h" +#include "lib/tevent/tevent.h" #include "lib/util/dlinklist.h" #include "system/network.h" #include "system/filesys.h" #include "system/wait.h" -#include "../include/ctdb.h" +#include "../include/ctdb_client.h" #include "../include/ctdb_private.h" #include @@ -43,6 +43,33 @@ static void print_exit_message(void) DEBUG(DEBUG_NOTICE,("CTDB daemon shutting down\n")); } + + +static void ctdb_time_tick(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private_data) +{ + struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); + + if (getpid() != ctdbd_pid) { + return; + } + + event_add_timed(ctdb->ev, ctdb, + timeval_current_ofs(1, 0), + ctdb_time_tick, ctdb); +} + +/* Used to trigger a dummy event once per second, to make + * detection of hangs more reliable. + */ +static void ctdb_start_time_tickd(struct ctdb_context *ctdb) +{ + event_add_timed(ctdb->ev, ctdb, + timeval_current_ofs(1, 0), + ctdb_time_tick, ctdb); +} + + /* called when the "startup" event script has finished */ static void ctdb_start_transport(struct ctdb_context *ctdb) { @@ -77,6 +104,9 @@ static void ctdb_start_transport(struct ctdb_context *ctdb) /* start listening for recovery daemon pings */ ctdb_control_recd_ping(ctdb); + + /* start listening to timer ticks */ + ctdb_start_time_tickd(ctdb); } static void block_signal(int signum) @@ -97,7 +127,7 @@ static void block_signal(int signum) */ static int daemon_queue_send(struct ctdb_client *client, struct ctdb_req_header *hdr) { - client->ctdb->statistics.client_packets_sent++; + CTDB_INCREMENT_STAT(client->ctdb, client_packets_sent); if (hdr->operation == CTDB_REQ_MESSAGE) { if (ctdb_queue_length(client->queue) > client->ctdb->tunable.max_queue_depth_drop_msg) { DEBUG(DEBUG_ERR,("CTDB_REQ_MESSAGE queue full - killing client connection.\n")); @@ -184,9 +214,7 @@ static int ctdb_client_destructor(struct ctdb_client *client) ctdb_takeover_client_destructor_hook(client); ctdb_reqid_remove(client->ctdb, client->client_id); - if (client->ctdb->statistics.num_clients) { - client->ctdb->statistics.num_clients--; - } + CTDB_DECREMENT_STAT(client->ctdb, num_clients); if (client->num_persistent_updates != 0) { DEBUG(DEBUG_ERR,(__location__ " Client disconnecting with %u persistent updates in flight. Starting recovery\n", client->num_persistent_updates)); @@ -197,7 +225,16 @@ static int ctdb_client_destructor(struct ctdb_client *client) DEBUG(DEBUG_ERR, (__location__ " client exit while transaction " "commit active. Forcing recovery.\n")); client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; + + /* legacy trans2 transaction state: */ ctdb_db->transaction_active = false; + + /* + * trans3 transaction state: + * + * The destructor sets the pointer to NULL. + */ + talloc_free(ctdb_db->persistent_state); } return 0; @@ -258,10 +295,9 @@ static void daemon_call_from_client_callback(struct ctdb_call_state *state) res = ctdb_daemon_call_recv(state, dstate->call); if (res != 0) { DEBUG(DEBUG_ERR, (__location__ " ctdbd_call_recv() returned error\n")); - if (client->ctdb->statistics.pending_calls > 0) { - client->ctdb->statistics.pending_calls--; - } - ctdb_latency(ctdb_db, "call_from_client_cb 1", &client->ctdb->statistics.max_call_latency, dstate->start_time); + CTDB_DECREMENT_STAT(client->ctdb, pending_calls); + + CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 1", call_latency, dstate->start_time); return; } @@ -270,10 +306,8 @@ static void daemon_call_from_client_callback(struct ctdb_call_state *state) length, struct ctdb_reply_call); if (r == NULL) { DEBUG(DEBUG_ERR, (__location__ " Failed to allocate reply_call in ctdb daemon\n")); - if (client->ctdb->statistics.pending_calls > 0) { - client->ctdb->statistics.pending_calls--; - } - ctdb_latency(ctdb_db, "call_from_client_cb 2", &client->ctdb->statistics.max_call_latency, dstate->start_time); + CTDB_DECREMENT_STAT(client->ctdb, pending_calls); + CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 2", call_latency, dstate->start_time); return; } r->hdr.reqid = dstate->reqid; @@ -288,11 +322,9 @@ static void daemon_call_from_client_callback(struct ctdb_call_state *state) if (res != 0) { DEBUG(DEBUG_ERR, (__location__ " Failed to queue packet from daemon to client\n")); } - ctdb_latency(ctdb_db, "call_from_client_cb 3", &client->ctdb->statistics.max_call_latency, dstate->start_time); + CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 3", call_latency, dstate->start_time); + CTDB_DECREMENT_STAT(client->ctdb, pending_calls); talloc_free(dstate); - if (client->ctdb->statistics.pending_calls > 0) { - client->ctdb->statistics.pending_calls--; - } } struct ctdb_daemon_packet_wrap { @@ -344,18 +376,14 @@ static void daemon_request_call_from_client(struct ctdb_client *client, struct ctdb_context *ctdb = client->ctdb; struct ctdb_daemon_packet_wrap *w; - ctdb->statistics.total_calls++; - if (client->ctdb->statistics.pending_calls > 0) { - ctdb->statistics.pending_calls++; - } + CTDB_INCREMENT_STAT(ctdb, total_calls); + CTDB_DECREMENT_STAT(ctdb, pending_calls); ctdb_db = find_ctdb_db(client->ctdb, c->db_id); if (!ctdb_db) { DEBUG(DEBUG_ERR, (__location__ " Unknown database in request. db_id==0x%08x", c->db_id)); - if (client->ctdb->statistics.pending_calls > 0) { - ctdb->statistics.pending_calls--; - } + CTDB_DECREMENT_STAT(ctdb, pending_calls); return; } @@ -383,9 +411,7 @@ static void daemon_request_call_from_client(struct ctdb_client *client, daemon_incoming_packet_wrap, w, True); if (ret == -2) { /* will retry later */ - if (client->ctdb->statistics.pending_calls > 0) { - ctdb->statistics.pending_calls--; - } + CTDB_DECREMENT_STAT(ctdb, pending_calls); return; } @@ -393,19 +419,19 @@ static void daemon_request_call_from_client(struct ctdb_client *client, if (ret != 0) { DEBUG(DEBUG_ERR,(__location__ " Unable to fetch record\n")); - if (client->ctdb->statistics.pending_calls > 0) { - ctdb->statistics.pending_calls--; - } + CTDB_DECREMENT_STAT(ctdb, pending_calls); return; } dstate = talloc(client, struct daemon_call_state); if (dstate == NULL) { - ctdb_ltdb_unlock(ctdb_db, key); - DEBUG(DEBUG_ERR,(__location__ " Unable to allocate dstate\n")); - if (client->ctdb->statistics.pending_calls > 0) { - ctdb->statistics.pending_calls--; + ret = ctdb_ltdb_unlock(ctdb_db, key); + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret)); } + + DEBUG(DEBUG_ERR,(__location__ " Unable to allocate dstate\n")); + CTDB_DECREMENT_STAT(ctdb, pending_calls); return; } dstate->start_time = timeval_current(); @@ -415,12 +441,14 @@ static void daemon_request_call_from_client(struct ctdb_client *client, call = dstate->call = talloc_zero(dstate, struct ctdb_call); if (call == NULL) { - ctdb_ltdb_unlock(ctdb_db, key); - DEBUG(DEBUG_ERR,(__location__ " Unable to allocate call\n")); - if (client->ctdb->statistics.pending_calls > 0) { - ctdb->statistics.pending_calls--; + ret = ctdb_ltdb_unlock(ctdb_db, key); + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret)); } - ctdb_latency(ctdb_db, "call_from_client 1", &ctdb->statistics.max_call_latency, dstate->start_time); + + DEBUG(DEBUG_ERR,(__location__ " Unable to allocate call\n")); + CTDB_DECREMENT_STAT(ctdb, pending_calls); + CTDB_UPDATE_LATENCY(ctdb, ctdb_db, "call_from_client 1", call_latency, dstate->start_time); return; } @@ -436,14 +464,15 @@ static void daemon_request_call_from_client(struct ctdb_client *client, state = ctdb_daemon_call_send_remote(ctdb_db, call, &header); } - ctdb_ltdb_unlock(ctdb_db, key); + ret = ctdb_ltdb_unlock(ctdb_db, key); + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret)); + } if (state == NULL) { DEBUG(DEBUG_ERR,(__location__ " Unable to setup call send\n")); - if (client->ctdb->statistics.pending_calls > 0) { - ctdb->statistics.pending_calls--; - } - ctdb_latency(ctdb_db, "call_from_client 2", &ctdb->statistics.max_call_latency, dstate->start_time); + CTDB_DECREMENT_STAT(ctdb, pending_calls); + CTDB_UPDATE_LATENCY(ctdb, ctdb_db, "call_from_client 2", call_latency, dstate->start_time); return; } talloc_steal(state, dstate); @@ -483,17 +512,17 @@ static void daemon_incoming_packet(void *p, struct ctdb_req_header *hdr) switch (hdr->operation) { case CTDB_REQ_CALL: - ctdb->statistics.client.req_call++; + CTDB_INCREMENT_STAT(ctdb, client.req_call); daemon_request_call_from_client(client, (struct ctdb_req_call *)hdr); break; case CTDB_REQ_MESSAGE: - ctdb->statistics.client.req_message++; + CTDB_INCREMENT_STAT(ctdb, client.req_message); daemon_request_message_from_client(client, (struct ctdb_req_message *)hdr); break; case CTDB_REQ_CONTROL: - ctdb->statistics.client.req_control++; + CTDB_INCREMENT_STAT(ctdb, client.req_control); daemon_request_control_from_client(client, (struct ctdb_req_control *)hdr); break; @@ -519,7 +548,7 @@ static void ctdb_daemon_read_cb(uint8_t *data, size_t cnt, void *args) return; } - client->ctdb->statistics.client_packets_recv++; + CTDB_INCREMENT_STAT(client->ctdb, client_packets_recv); if (cnt < sizeof(*hdr)) { ctdb_set_error(client->ctdb, "Bad packet length %u in daemon\n", @@ -619,11 +648,12 @@ static void ctdb_accept_client(struct event_context *ev, struct fd_event *fde, DLIST_ADD(ctdb->client_pids, client_pid); client->queue = ctdb_queue_setup(ctdb, client, fd, CTDB_DS_ALIGNMENT, - ctdb_daemon_read_cb, client); + ctdb_daemon_read_cb, client, + "client-%u", client->pid); talloc_set_destructor(client, ctdb_client_destructor); talloc_set_destructor(client_pid, ctdb_clientpid_destructor); - ctdb->statistics.num_clients++; + CTDB_INCREMENT_STAT(ctdb, num_clients); } @@ -713,7 +743,7 @@ static void ctdb_setup_event_callback(struct ctdb_context *ctdb, int status, /* start the protocol going as a daemon */ -int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog) +int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog, const char *public_address_list) { int res, ret = -1; struct fd_event *fde; @@ -751,7 +781,10 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog) DEBUG(DEBUG_ERR, ("Starting CTDBD as pid : %u\n", ctdbd_pid)); - ctdb_high_priority(ctdb); + if (ctdb->do_setsched) { + /* try to set us up as realtime */ + ctdb_set_scheduler(ctdb); + } /* ensure the socket is deleted on exit of the daemon */ domain_socket_name = talloc_strdup(talloc_autofree_context(), ctdb->daemon.name); @@ -761,9 +794,18 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog) } ctdb->ev = event_context_init(NULL); + tevent_loop_allow_nesting(ctdb->ev); + ret = ctdb_init_tevent_logging(ctdb); + if (ret != 0) { + DEBUG(DEBUG_ALERT,("Failed to initialize TEVENT logging\n")); + exit(1); + } ctdb_set_child_logging(ctdb); + /* initialize statistics collection */ + ctdb_statistics_init(ctdb); + /* force initial recovery for election */ ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; @@ -791,30 +833,41 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog) if (ctdb->methods->initialise(ctdb) != 0) { ctdb_fatal(ctdb, "transport failed to initialise"); } + if (public_address_list) { + ret = ctdb_set_public_addresses(ctdb, public_address_list); + if (ret == -1) { + DEBUG(DEBUG_ALERT,("Unable to setup public address list\n")); + exit(1); + } + } + /* attach to existing databases */ if (ctdb_attach_databases(ctdb) != 0) { ctdb_fatal(ctdb, "Failed to attach to databases\n"); } - /* start frozen, then let the first election sort things out */ - if (ctdb_blocking_freeze(ctdb)) { - ctdb_fatal(ctdb, "Failed to get initial freeze\n"); - } - ret = ctdb_event_script(ctdb, CTDB_EVENT_INIT); if (ret != 0) { ctdb_fatal(ctdb, "Failed to run init event\n"); } ctdb_run_notification_script(ctdb, "init"); + /* start frozen, then let the first election sort things out */ + if (ctdb_blocking_freeze(ctdb)) { + ctdb_fatal(ctdb, "Failed to get initial freeze\n"); + } + /* now start accepting clients, only can do this once frozen */ fde = event_add_fd(ctdb->ev, ctdb, ctdb->daemon.sd, - EVENT_FD_READ|EVENT_FD_AUTOCLOSE, + EVENT_FD_READ, ctdb_accept_client, ctdb); + tevent_fd_set_auto_close(fde); /* release any IPs we hold from previous runs of the daemon */ - ctdb_release_all_ips(ctdb); + if (ctdb->tunable.disable_ip_failover == 0) { + ctdb_release_all_ips(ctdb); + } /* start the transport going */ ctdb_start_transport(ctdb); @@ -873,7 +926,7 @@ struct ctdb_req_header *_ctdb_transport_allocate(struct ctdb_context *ctdb, size = (length+(CTDB_DS_ALIGNMENT-1)) & ~(CTDB_DS_ALIGNMENT-1); if (ctdb->methods == NULL) { - DEBUG(DEBUG_ERR,(__location__ " Unable to allocate transport packet for operation %u of length %u. Transport is DOWN.\n", + DEBUG(DEBUG_INFO,(__location__ " Unable to allocate transport packet for operation %u of length %u. Transport is DOWN.\n", operation, (unsigned)length)); return NULL; } @@ -1098,7 +1151,7 @@ int ctdb_daemon_send_message(struct ctdb_context *ctdb, uint32_t pnn, int len; if (ctdb->methods == NULL) { - DEBUG(DEBUG_ERR,(__location__ " Failed to send message. Transport is DOWN\n")); + DEBUG(DEBUG_INFO,(__location__ " Failed to send message. Transport is DOWN\n")); return -1; }