eventscripts: Delete placeholder "recovered" and "shutdown" events
[ctdb.git] / common / system_linux.c
1 /* 
2    ctdb system specific code to manage raw sockets on linux
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "includes.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "../include/ctdb_private.h"
26 #include <netinet/if_ether.h>
27 #include <netinet/ip6.h>
28 #include <netinet/icmp6.h>
29 #include <net/if_arp.h>
30 #include <netpacket/packet.h>
31 #include <sys/prctl.h>
32
33 #ifndef ETHERTYPE_IP6
34 #define ETHERTYPE_IP6 0x86dd
35 #endif
36
37 /*
38   calculate the tcp checksum for tcp over ipv6
39 */
40 static uint16_t tcp_checksum6(uint16_t *data, size_t n, struct ip6_hdr *ip6)
41 {
42         uint32_t phdr[2];
43         uint32_t sum = 0;
44         uint16_t sum2;
45
46         sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
47         sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
48
49         phdr[0] = htonl(n);
50         phdr[1] = htonl(ip6->ip6_nxt);
51         sum += uint16_checksum((uint16_t *)phdr, 8);
52
53         sum += uint16_checksum(data, n);
54
55         sum = (sum & 0xFFFF) + (sum >> 16);
56         sum = (sum & 0xFFFF) + (sum >> 16);
57         sum2 = htons(sum);
58         sum2 = ~sum2;
59         if (sum2 == 0) {
60                 return 0xFFFF;
61         }
62         return sum2;
63 }
64
65 /*
66   send gratuitous arp reply after we have taken over an ip address
67
68   saddr is the address we are trying to claim
69   iface is the interface name we will be using to claim the address
70  */
71 int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
72 {
73         int s, ret;
74         struct sockaddr_ll sall;
75         struct ether_header *eh;
76         struct arphdr *ah;
77         struct ip6_hdr *ip6;
78         struct nd_neighbor_solicit *nd_ns;
79         struct ifreq if_hwaddr;
80         unsigned char buffer[78]; /* ipv6 neigh solicitation size */
81         char *ptr;
82         char bdcast[] = {0xff,0xff,0xff,0xff,0xff,0xff};
83         struct ifreq ifr;
84
85         ZERO_STRUCT(sall);
86
87         switch (addr->ip.sin_family) {
88         case AF_INET:
89                 s = socket(PF_PACKET, SOCK_RAW, htons(ETHERTYPE_ARP));
90                 if (s == -1){
91                         DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
92                         return -1;
93                 }
94
95                 DEBUG(DEBUG_DEBUG, (__location__ " Created SOCKET FD:%d for sending arp\n", s));
96                 strncpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
97                 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
98                         DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
99                         close(s);
100                         return -1;
101                 }
102
103                 /* get the mac address */
104                 strcpy(if_hwaddr.ifr_name, iface);
105                 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
106                 if ( ret < 0 ) {
107                         close(s);
108                         DEBUG(DEBUG_CRIT,(__location__ " ioctl failed\n"));
109                         return -1;
110                 }
111                 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
112                         DEBUG(DEBUG_DEBUG,("Ignoring loopback arp request\n"));
113                         close(s);
114                         return 0;
115                 }
116                 if (if_hwaddr.ifr_hwaddr.sa_family != AF_LOCAL) {
117                         close(s);
118                         errno = EINVAL;
119                         DEBUG(DEBUG_CRIT,(__location__ " not an ethernet address family (0x%x)\n",
120                                  if_hwaddr.ifr_hwaddr.sa_family));
121                         return -1;
122                 }
123
124
125                 memset(buffer, 0 , 64);
126                 eh = (struct ether_header *)buffer;
127                 memset(eh->ether_dhost, 0xff, ETH_ALEN);
128                 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
129                 eh->ether_type = htons(ETHERTYPE_ARP);
130         
131                 ah = (struct arphdr *)&buffer[sizeof(struct ether_header)];
132                 ah->ar_hrd = htons(ARPHRD_ETHER);
133                 ah->ar_pro = htons(ETH_P_IP);
134                 ah->ar_hln = ETH_ALEN;
135                 ah->ar_pln = 4;
136
137                 /* send a gratious arp */
138                 ah->ar_op  = htons(ARPOP_REQUEST);
139                 ptr = (char *)&ah[1];
140                 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
141                 ptr+=ETH_ALEN;
142                 memcpy(ptr, &addr->ip.sin_addr, 4);       
143                 ptr+=4;
144                 memset(ptr, 0, ETH_ALEN); 
145                 ptr+=ETH_ALEN;
146                 memcpy(ptr, &addr->ip.sin_addr, 4);       
147                 ptr+=4;
148         
149                 sall.sll_family = AF_PACKET;
150                 sall.sll_halen = 6;
151                 memcpy(&sall.sll_addr[0], bdcast, sall.sll_halen);
152                 sall.sll_protocol = htons(ETH_P_ALL);
153                 sall.sll_ifindex = ifr.ifr_ifindex;
154                 ret = sendto(s, buffer, 64, 0, (struct sockaddr *)&sall, sizeof(sall));
155                 if (ret < 0 ){
156                         close(s);
157                         DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
158                         return -1;
159                 }       
160
161                 /* send unsolicited arp reply broadcast */
162                 ah->ar_op  = htons(ARPOP_REPLY);
163                 ptr = (char *)&ah[1];
164                 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
165                 ptr+=ETH_ALEN;
166                 memcpy(ptr, &addr->ip.sin_addr, 4);       
167                 ptr+=4;
168                 memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
169                 ptr+=ETH_ALEN;
170                 memcpy(ptr, &addr->ip.sin_addr, 4);       
171                 ptr+=4;
172
173                 ret = sendto(s, buffer, 64, 0, (struct sockaddr *)&sall, sizeof(sall));
174                 if (ret < 0 ){
175                         DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
176                         close(s);
177                         return -1;
178                 }
179
180                 close(s);
181                 break;
182         case AF_INET6:
183                 s = socket(PF_PACKET, SOCK_RAW, htons(ETHERTYPE_ARP));
184                 if (s == -1){
185                         DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
186                         return -1;
187                 }
188
189                 DEBUG(DEBUG_DEBUG, (__location__ " Created SOCKET FD:%d for sending arp\n", s));
190                 strncpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
191                 if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
192                         DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
193                         close(s);
194                         return -1;
195                 }
196
197                 /* get the mac address */
198                 strcpy(if_hwaddr.ifr_name, iface);
199                 ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
200                 if ( ret < 0 ) {
201                         close(s);
202                         DEBUG(DEBUG_CRIT,(__location__ " ioctl failed\n"));
203                         return -1;
204                 }
205                 if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
206                         DEBUG(DEBUG_DEBUG,("Ignoring loopback arp request\n"));
207                         close(s);
208                         return 0;
209                 }
210                 if (if_hwaddr.ifr_hwaddr.sa_family != AF_LOCAL) {
211                         close(s);
212                         errno = EINVAL;
213                         DEBUG(DEBUG_CRIT,(__location__ " not an ethernet address family (0x%x)\n",
214                                  if_hwaddr.ifr_hwaddr.sa_family));
215                         return -1;
216                 }
217
218                 memset(buffer, 0 , sizeof(buffer));
219                 eh = (struct ether_header *)buffer;
220                 memset(eh->ether_dhost, 0xff, ETH_ALEN);
221                 memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
222                 eh->ether_type = htons(ETHERTYPE_IP6);
223
224                 ip6 = (struct ip6_hdr *)(eh+1);
225                 ip6->ip6_vfc  = 0x60;
226                 ip6->ip6_plen = htons(sizeof(*nd_ns));
227                 ip6->ip6_nxt  = IPPROTO_ICMPV6;
228                 ip6->ip6_hlim = 255;
229                 ip6->ip6_dst  = addr->ip6.sin6_addr;
230
231                 nd_ns = (struct nd_neighbor_solicit *)(ip6+1);
232                 nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT;
233                 nd_ns->nd_ns_code = 0;
234                 nd_ns->nd_ns_reserved = 0;
235                 nd_ns->nd_ns_target = addr->ip6.sin6_addr;
236
237                 nd_ns->nd_ns_cksum = tcp_checksum6((uint16_t *)nd_ns, ntohs(ip6->ip6_plen), ip6);
238
239                 sall.sll_family = AF_PACKET;
240                 sall.sll_halen = 6;
241                 memcpy(&sall.sll_addr[0], bdcast, sall.sll_halen);
242                 sall.sll_protocol = htons(ETH_P_ALL);
243                 sall.sll_ifindex = ifr.ifr_ifindex;
244                 ret = sendto(s, buffer, 78, 0, (struct sockaddr *)&sall, sizeof(sall));
245                 if (ret < 0 ){
246                         close(s);
247                         DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
248                         return -1;
249                 }       
250
251                 close(s);
252                 break;
253         default:
254                 DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/ipv6 address (family is %u)\n", addr->ip.sin_family));
255                 return -1;
256         }
257
258         return 0;
259 }
260
261
262 /*
263   simple TCP checksum - assumes data is multiple of 2 bytes long
264  */
265 static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip)
266 {
267         uint32_t sum = uint16_checksum(data, n);
268         uint16_t sum2;
269         sum += uint16_checksum((uint16_t *)(void *)&ip->saddr,
270                                sizeof(ip->saddr));
271         sum += uint16_checksum((uint16_t *)(void *)&ip->daddr,
272                                sizeof(ip->daddr));
273         sum += ip->protocol + n;
274         sum = (sum & 0xFFFF) + (sum >> 16);
275         sum = (sum & 0xFFFF) + (sum >> 16);
276         sum2 = htons(sum);
277         sum2 = ~sum2;
278         if (sum2 == 0) {
279                 return 0xFFFF;
280         }
281         return sum2;
282 }
283
284 /*
285   Send tcp segment from the specified IP/port to the specified
286   destination IP/port. 
287
288   This is used to trigger the receiving host into sending its own ACK,
289   which should trigger early detection of TCP reset by the client
290   after IP takeover
291
292   This can also be used to send RST segments (if rst is true) and also
293   if correct seq and ack numbers are provided.
294  */
295 int ctdb_sys_send_tcp(const ctdb_sock_addr *dest, 
296                       const ctdb_sock_addr *src,
297                       uint32_t seq, uint32_t ack, int rst)
298 {
299         int s;
300         int ret;
301         uint32_t one = 1;
302         uint16_t tmpport;
303         ctdb_sock_addr *tmpdest;
304         struct {
305                 struct iphdr ip;
306                 struct tcphdr tcp;
307         } ip4pkt;
308         struct {
309                 struct ip6_hdr ip6;
310                 struct tcphdr tcp;
311         } ip6pkt;
312
313         switch (src->ip.sin_family) {
314         case AF_INET:
315                 ZERO_STRUCT(ip4pkt);
316                 ip4pkt.ip.version  = 4;
317                 ip4pkt.ip.ihl      = sizeof(ip4pkt.ip)/4;
318                 ip4pkt.ip.tot_len  = htons(sizeof(ip4pkt));
319                 ip4pkt.ip.ttl      = 255;
320                 ip4pkt.ip.protocol = IPPROTO_TCP;
321                 ip4pkt.ip.saddr    = src->ip.sin_addr.s_addr;
322                 ip4pkt.ip.daddr    = dest->ip.sin_addr.s_addr;
323                 ip4pkt.ip.check    = 0;
324
325                 ip4pkt.tcp.source   = src->ip.sin_port;
326                 ip4pkt.tcp.dest     = dest->ip.sin_port;
327                 ip4pkt.tcp.seq      = seq;
328                 ip4pkt.tcp.ack_seq  = ack;
329                 ip4pkt.tcp.ack      = 1;
330                 if (rst) {
331                         ip4pkt.tcp.rst      = 1;
332                 }
333                 ip4pkt.tcp.doff     = sizeof(ip4pkt.tcp)/4;
334                 /* this makes it easier to spot in a sniffer */
335                 ip4pkt.tcp.window   = htons(1234);
336                 ip4pkt.tcp.check    = tcp_checksum((uint16_t *)&ip4pkt.tcp, sizeof(ip4pkt.tcp), &ip4pkt.ip);
337
338                 /* open a raw socket to send this segment from */
339                 s = socket(AF_INET, SOCK_RAW, htons(IPPROTO_RAW));
340                 if (s == -1) {
341                         DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket (%s)\n",
342                                  strerror(errno)));
343                         return -1;
344                 }
345
346                 ret = setsockopt(s, SOL_IP, IP_HDRINCL, &one, sizeof(one));
347                 if (ret != 0) {
348                         DEBUG(DEBUG_CRIT,(__location__ " failed to setup IP headers (%s)\n",
349                                  strerror(errno)));
350                         close(s);
351                         return -1;
352                 }
353
354                 set_nonblocking(s);
355                 set_close_on_exec(s);
356
357                 ret = sendto(s, &ip4pkt, sizeof(ip4pkt), 0,
358                              (const struct sockaddr *)&dest->ip,
359                              sizeof(dest->ip));
360                 close(s);
361                 if (ret != sizeof(ip4pkt)) {
362                         DEBUG(DEBUG_CRIT,(__location__ " failed sendto (%s)\n", strerror(errno)));
363                         return -1;
364                 }
365                 break;
366         case AF_INET6:
367                 ZERO_STRUCT(ip6pkt);
368                 ip6pkt.ip6.ip6_vfc  = 0x60;
369                 ip6pkt.ip6.ip6_plen = htons(20);
370                 ip6pkt.ip6.ip6_nxt  = IPPROTO_TCP;
371                 ip6pkt.ip6.ip6_hlim = 64;
372                 ip6pkt.ip6.ip6_src  = src->ip6.sin6_addr;
373                 ip6pkt.ip6.ip6_dst  = dest->ip6.sin6_addr;
374
375                 ip6pkt.tcp.source   = src->ip6.sin6_port;
376                 ip6pkt.tcp.dest     = dest->ip6.sin6_port;
377                 ip6pkt.tcp.seq      = seq;
378                 ip6pkt.tcp.ack_seq  = ack;
379                 ip6pkt.tcp.ack      = 1;
380                 if (rst) {
381                         ip6pkt.tcp.rst      = 1;
382                 }
383                 ip6pkt.tcp.doff     = sizeof(ip6pkt.tcp)/4;
384                 /* this makes it easier to spot in a sniffer */
385                 ip6pkt.tcp.window   = htons(1234);
386                 ip6pkt.tcp.check    = tcp_checksum6((uint16_t *)&ip6pkt.tcp, sizeof(ip6pkt.tcp), &ip6pkt.ip6);
387
388                 s = socket(PF_INET6, SOCK_RAW, IPPROTO_RAW);
389                 if (s == -1) {
390                         DEBUG(DEBUG_CRIT, (__location__ " Failed to open sending socket\n"));
391                         return -1;
392
393                 }
394                 /* sendto() dont like if the port is set and the socket is
395                    in raw mode.
396                 */
397                 tmpdest = discard_const(dest);
398                 tmpport = tmpdest->ip6.sin6_port;
399
400                 tmpdest->ip6.sin6_port = 0;
401                 ret = sendto(s, &ip6pkt, sizeof(ip6pkt), 0,
402                              (const struct sockaddr *)&dest->ip6,
403                              sizeof(dest->ip6));
404                 tmpdest->ip6.sin6_port = tmpport;
405                 close(s);
406
407                 if (ret != sizeof(ip6pkt)) {
408                         DEBUG(DEBUG_CRIT,(__location__ " failed sendto (%s)\n", strerror(errno)));
409                         return -1;
410                 }
411                 break;
412
413         default:
414                 DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/v6 address\n"));
415                 return -1;
416         }
417
418         return 0;
419 }
420
421 /* 
422    This function is used to open a raw socket to capture from
423  */
424 int ctdb_sys_open_capture_socket(const char *iface, void **private_data)
425 {
426         int s;
427
428         /* Open a socket to capture all traffic */
429         s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
430         if (s == -1) {
431                 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
432                 return -1;
433         }
434
435         DEBUG(DEBUG_DEBUG, (__location__ " Created RAW SOCKET FD:%d for tcp tickle\n", s));
436
437         set_nonblocking(s);
438         set_close_on_exec(s);
439
440         return s;
441 }
442
443 /* 
444    This function is used to do any additional cleanup required when closing
445    a capture socket.
446    Note that the socket itself is closed automatically in the caller.
447  */
448 int ctdb_sys_close_capture_socket(void *private_data)
449 {
450         return 0;
451 }
452
453
454 /*
455   called when the raw socket becomes readable
456  */
457 int ctdb_sys_read_tcp_packet(int s, void *private_data, 
458                         ctdb_sock_addr *src, ctdb_sock_addr *dst,
459                         uint32_t *ack_seq, uint32_t *seq)
460 {
461         int ret;
462 #define RCVPKTSIZE 100
463         char pkt[RCVPKTSIZE];
464         struct ether_header *eth;
465         struct iphdr *ip;
466         struct ip6_hdr *ip6;
467         struct tcphdr *tcp;
468
469         ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC);
470         if (ret < sizeof(*eth)+sizeof(*ip)) {
471                 return -1;
472         }
473
474         /* Ethernet */
475         eth = (struct ether_header *)pkt;
476
477         /* we want either IPv4 or IPv6 */
478         if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
479                 /* IP */
480                 ip = (struct iphdr *)(eth+1);
481
482                 /* We only want IPv4 packets */
483                 if (ip->version != 4) {
484                         return -1;
485                 }
486                 /* Dont look at fragments */
487                 if ((ntohs(ip->frag_off)&0x1fff) != 0) {
488                         return -1;
489                 }
490                 /* we only want TCP */
491                 if (ip->protocol != IPPROTO_TCP) {
492                         return -1;
493                 }
494
495                 /* make sure its not a short packet */
496                 if (offsetof(struct tcphdr, ack_seq) + 4 + 
497                     (ip->ihl*4) + sizeof(*eth) > ret) {
498                         return -1;
499                 }
500                 /* TCP */
501                 tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
502
503                 /* tell the caller which one we've found */
504                 src->ip.sin_family      = AF_INET;
505                 src->ip.sin_addr.s_addr = ip->saddr;
506                 src->ip.sin_port        = tcp->source;
507                 dst->ip.sin_family      = AF_INET;
508                 dst->ip.sin_addr.s_addr = ip->daddr;
509                 dst->ip.sin_port        = tcp->dest;
510                 *ack_seq                = tcp->ack_seq;
511                 *seq                    = tcp->seq;
512
513                 return 0;
514         } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
515                 /* IP6 */
516                 ip6 = (struct ip6_hdr *)(eth+1);
517
518                 /* we only want TCP */
519                 if (ip6->ip6_nxt != IPPROTO_TCP) {
520                         return -1;
521                 }
522
523                 /* TCP */
524                 tcp = (struct tcphdr *)(ip6+1);
525
526                 /* tell the caller which one we've found */
527                 src->ip6.sin6_family = AF_INET6;
528                 src->ip6.sin6_port   = tcp->source;
529                 src->ip6.sin6_addr   = ip6->ip6_src;
530
531                 dst->ip6.sin6_family = AF_INET6;
532                 dst->ip6.sin6_port   = tcp->dest;
533                 dst->ip6.sin6_addr   = ip6->ip6_dst;
534
535                 *ack_seq             = tcp->ack_seq;
536                 *seq                 = tcp->seq;
537
538                 return 0;
539         }
540
541         return -1;
542 }
543
544
545 bool ctdb_sys_check_iface_exists(const char *iface)
546 {
547         int s;
548         struct ifreq ifr;
549
550         s = socket(PF_PACKET, SOCK_RAW, 0);
551         if (s == -1){
552                 /* We dont know if the interface exists, so assume yes */
553                 DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
554                 return true;
555         }
556
557         strncpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
558         if (ioctl(s, SIOCGIFINDEX, &ifr) < 0 && errno == ENODEV) {
559                 DEBUG(DEBUG_CRIT,(__location__ " interface '%s' not found\n", iface));
560                 close(s);
561                 return false;
562         }
563         close(s);
564         
565         return true;
566 }
567
568 int ctdb_get_peer_pid(const int fd, pid_t *peer_pid)
569 {
570         struct ucred cr;
571         socklen_t crl = sizeof(struct ucred);
572         int ret;
573         if ((ret = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &crl) == 0)) {
574                 *peer_pid = cr.pid;
575         }
576         return ret;
577 }
578
579 /*
580  * Find the process name from process ID
581  */
582 char *ctdb_get_process_name(pid_t pid)
583 {
584         char path[32];
585         char buf[PATH_MAX];
586         char *ptr;
587         int n;
588
589         snprintf(path, sizeof(path), "/proc/%d/exe", pid);
590         n = readlink(path, buf, sizeof(buf));
591         if (n < 0) {
592                 return NULL;
593         }
594
595         /* Remove any extra fields */
596         buf[n] = '\0';
597         ptr = strtok(buf, " ");
598         return strdup(ptr);
599 }
600
601 /*
602  * Set process name
603  */
604 int ctdb_set_process_name(const char *name)
605 {
606         char procname[16];
607
608         strncpy(procname, name, 15);
609         procname[15] = '\0';
610         return prctl(PR_SET_NAME, (unsigned long)procname, 0, 0, 0);
611 }
612
613 /*
614  * Parsing a line from /proc/locks,
615  */
616 static bool parse_proc_locks_line(char *line, pid_t *pid,
617                                   struct ctdb_lock_info *curlock)
618 {
619         char *ptr, *saveptr;
620
621         /* output of /proc/locks
622          *
623          * lock assigned
624          * 1: POSIX  ADVISORY  WRITE 25945 fd:00:6424820 212 212
625          *
626          * lock waiting
627          * 1: -> POSIX  ADVISORY  WRITE 25946 fd:00:6424820 212 212
628          */
629
630         /* Id: */
631         ptr = strtok_r(line, " ", &saveptr);
632         if (ptr == NULL) return false;
633
634         /* -> */
635         ptr = strtok_r(NULL, " ", &saveptr);
636         if (ptr == NULL) return false;
637         if (strcmp(ptr, "->") == 0) {
638                 curlock->waiting = true;
639                 ptr = strtok_r(NULL, " ", &saveptr);
640         } else {
641                 curlock->waiting = false;
642         }
643
644         /* POSIX */
645         if (ptr == NULL || strcmp(ptr, "POSIX") != 0) {
646                 return false;
647         }
648
649         /* ADVISORY */
650         ptr = strtok_r(NULL, " ", &saveptr);
651         if (ptr == NULL) return false;
652
653         /* WRITE */
654         ptr = strtok_r(NULL, " ", &saveptr);
655         if (ptr == NULL) return false;
656         if (strcmp(ptr, "READ") == 0) {
657                 curlock->read_only = true;
658         } else if (strcmp(ptr, "WRITE") == 0) {
659                 curlock->read_only = false;
660         } else {
661                 return false;
662         }
663
664         /* PID */
665         ptr = strtok_r(NULL, " ", &saveptr);
666         if (ptr == NULL) return false;
667         *pid = atoi(ptr);
668
669         /* MAJOR:MINOR:INODE */
670         ptr = strtok_r(NULL, " :", &saveptr);
671         if (ptr == NULL) return false;
672         ptr = strtok_r(NULL, " :", &saveptr);
673         if (ptr == NULL) return false;
674         ptr = strtok_r(NULL, " :", &saveptr);
675         if (ptr == NULL) return false;
676         curlock->inode = atol(ptr);
677
678         /* START OFFSET */
679         ptr = strtok_r(NULL, " ", &saveptr);
680         if (ptr == NULL) return false;
681         curlock->start = atol(ptr);
682
683         /* END OFFSET */
684         ptr = strtok_r(NULL, " ", &saveptr);
685         if (ptr == NULL) return false;
686         if (strncmp(ptr, "EOF", 3) == 0) {
687                 curlock->end = (off_t)-1;
688         } else {
689                 curlock->end = atol(ptr);
690         }
691
692         return true;
693 }
694
695 /*
696  * Find information of lock being waited on for given process ID
697  */
698 bool ctdb_get_lock_info(pid_t req_pid, struct ctdb_lock_info *lock_info)
699 {
700         FILE *fp;
701         struct ctdb_lock_info curlock;
702         pid_t pid;
703         char buf[1024];
704         char *ptr;
705         bool status = false;
706
707         if ((fp = fopen("/proc/locks", "r")) == NULL) {
708                 DEBUG(DEBUG_ERR, ("Failed to read locks information"));
709                 return false;
710         }
711         while ((ptr = fgets(buf, sizeof(buf), fp)) != NULL) {
712                 if (! parse_proc_locks_line(buf, &pid, &curlock)) {
713                         continue;
714                 }
715                 if (pid == req_pid && curlock.waiting) {
716                         *lock_info = curlock;
717                         status = true;
718                         break;
719                 }
720         }
721         fclose(fp);
722
723         return status;
724 }
725
726 /*
727  * Find process ID which holds an overlapping byte lock for required
728  * inode and byte range.
729  */
730 bool ctdb_get_blocker_pid(struct ctdb_lock_info *reqlock, pid_t *blocker_pid)
731 {
732         FILE *fp;
733         struct ctdb_lock_info curlock;
734         pid_t pid;
735         char buf[1024];
736         char *ptr;
737         bool status = false;
738
739         if ((fp = fopen("/proc/locks", "r")) == NULL) {
740                 DEBUG(DEBUG_ERR, ("Failed to read locks information"));
741                 return false;
742         }
743         while ((ptr = fgets(buf, sizeof(buf), fp)) != NULL) {
744                 if (! parse_proc_locks_line(buf, &pid, &curlock)) {
745                         continue;
746                 }
747
748                 if (curlock.waiting) {
749                         continue;
750                 }
751
752                 if (curlock.inode != reqlock->inode) {
753                         continue;
754                 }
755
756                 if (curlock.start > reqlock->end ||
757                     curlock.end < reqlock->start) {
758                         /* Outside the required range */
759                         continue;
760                 }
761                 *blocker_pid = pid;
762                 status = true;
763                 break;
764         }
765         fclose(fp);
766
767         return status;
768 }