3 Utility functions to read/write blobs of data from a file descriptor
4 and handle the case where we might need multiple read/writes to get all the
7 Copyright (C) Andrew Tridgell 2006
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, see <http://www.gnu.org/licenses/>.
24 #include "lib/tdb/include/tdb.h"
25 #include "lib/events/events.h"
26 #include "lib/util/dlinklist.h"
27 #include "system/network.h"
28 #include "system/filesys.h"
29 #include "../include/ctdb_private.h"
30 #include "../include/ctdb.h"
32 /* structures for packet queueing - see common/ctdb_io.c */
38 struct ctdb_queue_pkt {
39 struct ctdb_queue_pkt *next, *prev;
46 struct ctdb_context *ctdb;
47 struct ctdb_partial partial; /* partial input packet */
48 struct ctdb_queue_pkt *out_queue, *out_queue_tail;
49 uint32_t out_queue_length;
54 ctdb_queue_cb_fn_t callback;
60 int ctdb_queue_length(struct ctdb_queue *queue)
62 return queue->out_queue_length;
66 called when an incoming connection is readable
67 This function MUST be safe for reentry via the queue callback!
69 static void queue_io_read(struct ctdb_queue *queue)
72 uint32_t sz_bytes_req;
74 uint32_t pkt_bytes_remaining;
79 if (ioctl(queue->fd, FIONREAD, &num_ready) != 0) {
83 /* the descriptor has been closed */
87 if (queue->partial.data == NULL) {
88 /* starting fresh, allocate buf for size bytes */
89 sz_bytes_req = sizeof(pkt_size);
90 queue->partial.data = talloc_size(queue, sz_bytes_req);
91 if (queue->partial.data == NULL) {
92 DEBUG(DEBUG_ERR,("read error alloc failed for %u\n",
96 } else if (queue->partial.length < sizeof(pkt_size)) {
97 /* yet to find out the packet length */
98 sz_bytes_req = sizeof(pkt_size) - queue->partial.length;
100 /* partial packet, length known, full buf allocated */
103 data = queue->partial.data;
105 if (sz_bytes_req > 0) {
106 to_read = MIN(sz_bytes_req, num_ready);
107 nread = read(queue->fd, data + queue->partial.length,
110 DEBUG(DEBUG_ERR,("read error nread=%d\n", (int)nread));
113 queue->partial.length += nread;
115 if (nread < sz_bytes_req) {
116 /* not enough to know the length */
117 DEBUG(DEBUG_DEBUG,("Partial packet length read\n"));
120 /* size now known, allocate buffer for the full packet */
121 queue->partial.data = talloc_realloc_size(queue, data,
123 if (queue->partial.data == NULL) {
124 DEBUG(DEBUG_ERR,("read error alloc failed for %u\n",
128 data = queue->partial.data;
132 pkt_size = *(uint32_t *)data;
134 DEBUG(DEBUG_CRIT,("Invalid packet of length 0\n"));
138 pkt_bytes_remaining = pkt_size - queue->partial.length;
139 to_read = MIN(pkt_bytes_remaining, num_ready);
140 nread = read(queue->fd, data + queue->partial.length,
143 DEBUG(DEBUG_ERR,("read error nread=%d\n",
147 queue->partial.length += nread;
149 if (queue->partial.length < pkt_size) {
150 DEBUG(DEBUG_DEBUG,("Partial packet data read\n"));
154 queue->partial.data = NULL;
155 queue->partial.length = 0;
156 /* it is the responsibility of the callback to free 'data' */
157 queue->callback(data, pkt_size, queue->private_data);
161 queue->callback(NULL, 0, queue->private_data);
165 /* used when an event triggers a dead queue */
166 static void queue_dead(struct event_context *ev, struct timed_event *te,
167 struct timeval t, void *private_data)
169 struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
170 queue->callback(NULL, 0, queue->private_data);
175 called when an incoming connection is writeable
177 static void queue_io_write(struct ctdb_queue *queue)
179 while (queue->out_queue) {
180 struct ctdb_queue_pkt *pkt = queue->out_queue;
182 if (queue->ctdb->flags & CTDB_FLAG_TORTURE) {
183 n = write(queue->fd, pkt->data, 1);
185 n = write(queue->fd, pkt->data, pkt->length);
188 if (n == -1 && errno != EAGAIN && errno != EWOULDBLOCK) {
189 if (pkt->length != pkt->full_length) {
190 /* partial packet sent - we have to drop it */
191 DLIST_REMOVE(queue->out_queue, pkt);
192 queue->out_queue_length--;
195 talloc_free(queue->fde);
198 event_add_timed(queue->ctdb->ev, queue, timeval_zero(),
204 if (n != pkt->length) {
210 DLIST_REMOVE(queue->out_queue, pkt);
211 queue->out_queue_length--;
215 EVENT_FD_NOT_WRITEABLE(queue->fde);
219 called when an incoming connection is readable or writeable
221 static void queue_io_handler(struct event_context *ev, struct fd_event *fde,
222 uint16_t flags, void *private_data)
224 struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
226 if (flags & EVENT_FD_READ) {
227 queue_io_read(queue);
229 queue_io_write(queue);
235 queue a packet for sending
237 int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length)
239 struct ctdb_queue_pkt *pkt;
240 uint32_t length2, full_length;
242 if (queue->alignment) {
243 /* enforce the length and alignment rules from the tcp packet allocator */
244 length2 = (length+(queue->alignment-1)) & ~(queue->alignment-1);
245 *(uint32_t *)data = length2;
250 if (length2 != length) {
251 memset(data+length, 0, length2-length);
254 full_length = length2;
256 /* if the queue is empty then try an immediate write, avoiding
257 queue overhead. This relies on non-blocking sockets */
258 if (queue->out_queue == NULL && queue->fd != -1 &&
259 !(queue->ctdb->flags & CTDB_FLAG_TORTURE)) {
260 ssize_t n = write(queue->fd, data, length2);
261 if (n == -1 && errno != EAGAIN && errno != EWOULDBLOCK) {
262 talloc_free(queue->fde);
265 event_add_timed(queue->ctdb->ev, queue, timeval_zero(),
267 /* yes, we report success, as the dead node is
268 handled via a separate event */
275 if (length2 == 0) return 0;
278 pkt = talloc(queue, struct ctdb_queue_pkt);
279 CTDB_NO_MEMORY(queue->ctdb, pkt);
281 pkt->data = talloc_memdup(pkt, data, length2);
282 CTDB_NO_MEMORY(queue->ctdb, pkt->data);
284 pkt->length = length2;
285 pkt->full_length = full_length;
287 if (queue->out_queue == NULL && queue->fd != -1) {
288 EVENT_FD_WRITEABLE(queue->fde);
291 DLIST_ADD_END(queue->out_queue, pkt, NULL);
293 queue->out_queue_length++;
295 if (queue->ctdb->tunable.verbose_memory_names != 0) {
296 struct ctdb_req_header *hdr = (struct ctdb_req_header *)pkt->data;
297 switch (hdr->operation) {
298 case CTDB_REQ_CONTROL: {
299 struct ctdb_req_control *c = (struct ctdb_req_control *)hdr;
300 talloc_set_name(pkt, "ctdb_queue_pkt: control opcode=%u srvid=%llu datalen=%u",
301 (unsigned)c->opcode, (unsigned long long)c->srvid, (unsigned)c->datalen);
304 case CTDB_REQ_MESSAGE: {
305 struct ctdb_req_message *m = (struct ctdb_req_message *)hdr;
306 talloc_set_name(pkt, "ctdb_queue_pkt: message srvid=%llu datalen=%u",
307 (unsigned long long)m->srvid, (unsigned)m->datalen);
311 talloc_set_name(pkt, "ctdb_queue_pkt: operation=%u length=%u src=%u dest=%u",
312 (unsigned)hdr->operation, (unsigned)hdr->length,
313 (unsigned)hdr->srcnode, (unsigned)hdr->destnode);
323 setup the fd used by the queue
325 int ctdb_queue_set_fd(struct ctdb_queue *queue, int fd)
328 talloc_free(queue->fde);
332 queue->fde = event_add_fd(queue->ctdb->ev, queue, fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
333 queue_io_handler, queue);
334 if (queue->fde == NULL) {
338 if (queue->out_queue) {
339 EVENT_FD_WRITEABLE(queue->fde);
346 /* If someone sets up this pointer, they want to know if the queue is freed */
347 static int queue_destructor(struct ctdb_queue *queue)
349 if (queue->destroyed != NULL)
350 *queue->destroyed = true;
355 setup a packet queue on a socket
357 struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb,
358 TALLOC_CTX *mem_ctx, int fd, int alignment,
360 ctdb_queue_cb_fn_t callback,
363 struct ctdb_queue *queue;
365 queue = talloc_zero(mem_ctx, struct ctdb_queue);
366 CTDB_NO_MEMORY_NULL(ctdb, queue);
370 queue->alignment = alignment;
371 queue->private_data = private_data;
372 queue->callback = callback;
374 if (ctdb_queue_set_fd(queue, fd) != 0) {
379 talloc_set_destructor(queue, queue_destructor);