s3: Fix a long-standing problem with recycled PIDs
[obnox/samba/samba-obnox.git] / source3 / locking / brlock.c
1 /* 
2    Unix SMB/CIFS implementation.
3    byte range locking code
4    Updated to handle range splits/merges.
5
6    Copyright (C) Andrew Tridgell 1992-2000
7    Copyright (C) Jeremy Allison 1992-2000
8    
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13    
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18    
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 /* This module implements a tdb based byte range locking service,
24    replacing the fcntl() based byte range locking previously
25    used. This allows us to provide the same semantics as NT */
26
27 #include "includes.h"
28
29 #undef DBGC_CLASS
30 #define DBGC_CLASS DBGC_LOCKING
31
32 #define ZERO_ZERO 0
33
34 /* The open brlock.tdb database. */
35
36 static struct db_context *brlock_db;
37
38 /****************************************************************************
39  Debug info at level 10 for lock struct.
40 ****************************************************************************/
41
42 static void print_lock_struct(unsigned int i, struct lock_struct *pls)
43 {
44         DEBUG(10,("[%u]: smbpid = %u, tid = %u, pid = %s, ",
45                         i,
46                         (unsigned int)pls->context.smbpid,
47                         (unsigned int)pls->context.tid,
48                         procid_str(talloc_tos(), &pls->context.pid) ));
49         
50         DEBUG(10,("start = %.0f, size = %.0f, fnum = %d, %s %s\n",
51                 (double)pls->start,
52                 (double)pls->size,
53                 pls->fnum,
54                 lock_type_name(pls->lock_type),
55                 lock_flav_name(pls->lock_flav) ));
56 }
57
58 /****************************************************************************
59  See if two locking contexts are equal.
60 ****************************************************************************/
61
62 bool brl_same_context(const struct lock_context *ctx1, 
63                              const struct lock_context *ctx2)
64 {
65         return (procid_equal(&ctx1->pid, &ctx2->pid) &&
66                 (ctx1->smbpid == ctx2->smbpid) &&
67                 (ctx1->tid == ctx2->tid));
68 }
69
70 /****************************************************************************
71  See if lck1 and lck2 overlap.
72 ****************************************************************************/
73
74 static bool brl_overlap(const struct lock_struct *lck1,
75                         const struct lock_struct *lck2)
76 {
77         /* XXX Remove for Win7 compatibility. */
78         /* this extra check is not redundent - it copes with locks
79            that go beyond the end of 64 bit file space */
80         if (lck1->size != 0 &&
81             lck1->start == lck2->start &&
82             lck1->size == lck2->size) {
83                 return True;
84         }
85
86         if (lck1->start >= (lck2->start+lck2->size) ||
87             lck2->start >= (lck1->start+lck1->size)) {
88                 return False;
89         }
90         return True;
91 }
92
93 /****************************************************************************
94  See if lock2 can be added when lock1 is in place.
95 ****************************************************************************/
96
97 static bool brl_conflict(const struct lock_struct *lck1, 
98                          const struct lock_struct *lck2)
99 {
100         /* Ignore PENDING locks. */
101         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
102                 return False;
103
104         /* Read locks never conflict. */
105         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
106                 return False;
107         }
108
109         /* A READ lock can stack on top of a WRITE lock if they have the same
110          * context & fnum. */
111         if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK &&
112             brl_same_context(&lck1->context, &lck2->context) &&
113             lck1->fnum == lck2->fnum) {
114                 return False;
115         }
116
117         return brl_overlap(lck1, lck2);
118
119
120 /****************************************************************************
121  See if lock2 can be added when lock1 is in place - when both locks are POSIX
122  flavour. POSIX locks ignore fnum - they only care about dev/ino which we
123  know already match.
124 ****************************************************************************/
125
126 static bool brl_conflict_posix(const struct lock_struct *lck1, 
127                                 const struct lock_struct *lck2)
128 {
129 #if defined(DEVELOPER)
130         SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
131         SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
132 #endif
133
134         /* Ignore PENDING locks. */
135         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
136                 return False;
137
138         /* Read locks never conflict. */
139         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
140                 return False;
141         }
142
143         /* Locks on the same context con't conflict. Ignore fnum. */
144         if (brl_same_context(&lck1->context, &lck2->context)) {
145                 return False;
146         }
147
148         /* One is read, the other write, or the context is different,
149            do they overlap ? */
150         return brl_overlap(lck1, lck2);
151
152
153 #if ZERO_ZERO
154 static bool brl_conflict1(const struct lock_struct *lck1, 
155                          const struct lock_struct *lck2)
156 {
157         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
158                 return False;
159
160         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
161                 return False;
162         }
163
164         if (brl_same_context(&lck1->context, &lck2->context) &&
165             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
166                 return False;
167         }
168
169         if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
170                 return True;
171         }
172
173         if (lck1->start >= (lck2->start + lck2->size) ||
174             lck2->start >= (lck1->start + lck1->size)) {
175                 return False;
176         }
177             
178         return True;
179
180 #endif
181
182 /****************************************************************************
183  Check to see if this lock conflicts, but ignore our own locks on the
184  same fnum only. This is the read/write lock check code path.
185  This is never used in the POSIX lock case.
186 ****************************************************************************/
187
188 static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
189 {
190         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
191                 return False;
192
193         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
194                 return False;
195
196         /* POSIX flavour locks never conflict here - this is only called
197            in the read/write path. */
198
199         if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
200                 return False;
201
202         /*
203          * Incoming WRITE locks conflict with existing READ locks even
204          * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
205          */
206
207         if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
208                 if (brl_same_context(&lck1->context, &lck2->context) &&
209                                         lck1->fnum == lck2->fnum)
210                         return False;
211         }
212
213         return brl_overlap(lck1, lck2);
214
215
216 /****************************************************************************
217  Check if an unlock overlaps a pending lock.
218 ****************************************************************************/
219
220 static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
221 {
222         if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
223                 return True;
224         if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
225                 return True;
226         return False;
227 }
228
229 /****************************************************************************
230  Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
231  is the same as this one and changes its error code. I wonder if any
232  app depends on this ?
233 ****************************************************************************/
234
235 NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool blocking_lock)
236 {
237         if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
238                 /* amazing the little things you learn with a test
239                    suite. Locks beyond this offset (as a 64 bit
240                    number!) always generate the conflict error code,
241                    unless the top bit is set */
242                 if (!blocking_lock) {
243                         fsp->last_lock_failure = *lock;
244                 }
245                 return NT_STATUS_FILE_LOCK_CONFLICT;
246         }
247
248         if (procid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
249                         lock->context.tid == fsp->last_lock_failure.context.tid &&
250                         lock->fnum == fsp->last_lock_failure.fnum &&
251                         lock->start == fsp->last_lock_failure.start) {
252                 return NT_STATUS_FILE_LOCK_CONFLICT;
253         }
254
255         if (!blocking_lock) {
256                 fsp->last_lock_failure = *lock;
257         }
258         return NT_STATUS_LOCK_NOT_GRANTED;
259 }
260
261 /****************************************************************************
262  Open up the brlock.tdb database.
263 ****************************************************************************/
264
265 void brl_init(bool read_only)
266 {
267         int tdb_flags;
268
269         if (brlock_db) {
270                 return;
271         }
272
273         tdb_flags = TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST;
274
275         if (!lp_clustering()) {
276                 /*
277                  * We can't use the SEQNUM trick to cache brlock
278                  * entries in the clustering case because ctdb seqnum
279                  * propagation has a delay.
280                  */
281                 tdb_flags |= TDB_SEQNUM;
282         }
283
284         brlock_db = db_open(NULL, lock_path("brlock.tdb"),
285                             lp_open_files_db_hash_size(), tdb_flags,
286                             read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
287         if (!brlock_db) {
288                 DEBUG(0,("Failed to open byte range locking database %s\n",
289                         lock_path("brlock.tdb")));
290                 return;
291         }
292 }
293
294 /****************************************************************************
295  Close down the brlock.tdb database.
296 ****************************************************************************/
297
298 void brl_shutdown(void)
299 {
300         TALLOC_FREE(brlock_db);
301 }
302
303 #if ZERO_ZERO
304 /****************************************************************************
305  Compare two locks for sorting.
306 ****************************************************************************/
307
308 static int lock_compare(const struct lock_struct *lck1, 
309                          const struct lock_struct *lck2)
310 {
311         if (lck1->start != lck2->start) {
312                 return (lck1->start - lck2->start);
313         }
314         if (lck2->size != lck1->size) {
315                 return ((int)lck1->size - (int)lck2->size);
316         }
317         return 0;
318 }
319 #endif
320
321 /****************************************************************************
322  Lock a range of bytes - Windows lock semantics.
323 ****************************************************************************/
324
325 NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
326     struct lock_struct *plock, bool blocking_lock)
327 {
328         unsigned int i;
329         files_struct *fsp = br_lck->fsp;
330         struct lock_struct *locks = br_lck->lock_data;
331         NTSTATUS status;
332
333         SMB_ASSERT(plock->lock_type != UNLOCK_LOCK);
334
335         for (i=0; i < br_lck->num_locks; i++) {
336                 if (locks[i].start + locks[i].size < locks[i].start) {
337                         /* 64-bit wrap. Error. */
338                         return NT_STATUS_INVALID_LOCK_RANGE;
339                 }
340
341                 /* Do any Windows or POSIX locks conflict ? */
342                 if (brl_conflict(&locks[i], plock)) {
343                         /* Remember who blocked us. */
344                         plock->context.smbpid = locks[i].context.smbpid;
345                         return brl_lock_failed(fsp,plock,blocking_lock);
346                 }
347 #if ZERO_ZERO
348                 if (plock->start == 0 && plock->size == 0 && 
349                                 locks[i].size == 0) {
350                         break;
351                 }
352 #endif
353         }
354
355         if (!IS_PENDING_LOCK(plock->lock_type)) {
356                 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
357         }
358
359         /* We can get the Windows lock, now see if it needs to
360            be mapped into a lower level POSIX one, and if so can
361            we get it ? */
362
363         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
364                 int errno_ret;
365                 if (!set_posix_lock_windows_flavour(fsp,
366                                 plock->start,
367                                 plock->size,
368                                 plock->lock_type,
369                                 &plock->context,
370                                 locks,
371                                 br_lck->num_locks,
372                                 &errno_ret)) {
373
374                         /* We don't know who blocked us. */
375                         plock->context.smbpid = 0xFFFFFFFF;
376
377                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
378                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
379                                 goto fail;
380                         } else {
381                                 status = map_nt_error_from_unix(errno);
382                                 goto fail;
383                         }
384                 }
385         }
386
387         /* no conflicts - add it to the list of locks */
388         locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
389         if (!locks) {
390                 status = NT_STATUS_NO_MEMORY;
391                 goto fail;
392         }
393
394         memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
395         br_lck->num_locks += 1;
396         br_lck->lock_data = locks;
397         br_lck->modified = True;
398
399         return NT_STATUS_OK;
400  fail:
401         if (!IS_PENDING_LOCK(plock->lock_type)) {
402                 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
403         }
404         return status;
405 }
406
407 /****************************************************************************
408  Cope with POSIX range splits and merges.
409 ****************************************************************************/
410
411 static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,       /* Output array. */
412                                                 struct lock_struct *ex,         /* existing lock. */
413                                                 struct lock_struct *plock)      /* proposed lock. */
414 {
415         bool lock_types_differ = (ex->lock_type != plock->lock_type);
416
417         /* We can't merge non-conflicting locks on different context - ignore fnum. */
418
419         if (!brl_same_context(&ex->context, &plock->context)) {
420                 /* Just copy. */
421                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
422                 return 1;
423         }
424
425         /* We now know we have the same context. */
426
427         /* Did we overlap ? */
428
429 /*********************************************
430                                         +---------+
431                                         | ex      |
432                                         +---------+
433                          +-------+
434                          | plock |
435                          +-------+
436 OR....
437         +---------+
438         |  ex     |
439         +---------+
440 **********************************************/
441
442         if ( (ex->start > (plock->start + plock->size)) ||
443                 (plock->start > (ex->start + ex->size))) {
444
445                 /* No overlap with this lock - copy existing. */
446
447                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
448                 return 1;
449         }
450
451 /*********************************************
452         +---------------------------+
453         |          ex               |
454         +---------------------------+
455         +---------------------------+
456         |       plock               | -> replace with plock.
457         +---------------------------+
458 OR
459              +---------------+
460              |       ex      |
461              +---------------+
462         +---------------------------+
463         |       plock               | -> replace with plock.
464         +---------------------------+
465
466 **********************************************/
467
468         if ( (ex->start >= plock->start) &&
469                 (ex->start + ex->size <= plock->start + plock->size) ) {
470
471                 /* Replace - discard existing lock. */
472
473                 return 0;
474         }
475
476 /*********************************************
477 Adjacent after.
478                         +-------+
479                         |  ex   |
480                         +-------+
481         +---------------+
482         |   plock       |
483         +---------------+
484
485 BECOMES....
486         +---------------+-------+
487         |   plock       | ex    | - different lock types.
488         +---------------+-------+
489 OR.... (merge)
490         +-----------------------+
491         |   plock               | - same lock type.
492         +-----------------------+
493 **********************************************/
494
495         if (plock->start + plock->size == ex->start) {
496
497                 /* If the lock types are the same, we merge, if different, we
498                    add the remainder of the old lock. */
499
500                 if (lock_types_differ) {
501                         /* Add existing. */
502                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
503                         return 1;
504                 } else {
505                         /* Merge - adjust incoming lock as we may have more
506                          * merging to come. */
507                         plock->size += ex->size;
508                         return 0;
509                 }
510         }
511
512 /*********************************************
513 Adjacent before.
514         +-------+
515         |  ex   |
516         +-------+
517                 +---------------+
518                 |   plock       |
519                 +---------------+
520 BECOMES....
521         +-------+---------------+
522         | ex    |   plock       | - different lock types
523         +-------+---------------+
524
525 OR.... (merge)
526         +-----------------------+
527         |      plock            | - same lock type.
528         +-----------------------+
529
530 **********************************************/
531
532         if (ex->start + ex->size == plock->start) {
533
534                 /* If the lock types are the same, we merge, if different, we
535                    add the existing lock. */
536
537                 if (lock_types_differ) {
538                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
539                         return 1;
540                 } else {
541                         /* Merge - adjust incoming lock as we may have more
542                          * merging to come. */
543                         plock->start = ex->start;
544                         plock->size += ex->size;
545                         return 0;
546                 }
547         }
548
549 /*********************************************
550 Overlap after.
551         +-----------------------+
552         |          ex           |
553         +-----------------------+
554         +---------------+
555         |   plock       |
556         +---------------+
557 OR
558                +----------------+
559                |       ex       |
560                +----------------+
561         +---------------+
562         |   plock       |
563         +---------------+
564
565 BECOMES....
566         +---------------+-------+
567         |   plock       | ex    | - different lock types.
568         +---------------+-------+
569 OR.... (merge)
570         +-----------------------+
571         |   plock               | - same lock type.
572         +-----------------------+
573 **********************************************/
574
575         if ( (ex->start >= plock->start) &&
576                 (ex->start <= plock->start + plock->size) &&
577                 (ex->start + ex->size > plock->start + plock->size) ) {
578
579                 /* If the lock types are the same, we merge, if different, we
580                    add the remainder of the old lock. */
581
582                 if (lock_types_differ) {
583                         /* Add remaining existing. */
584                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
585                         /* Adjust existing start and size. */
586                         lck_arr[0].start = plock->start + plock->size;
587                         lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size);
588                         return 1;
589                 } else {
590                         /* Merge - adjust incoming lock as we may have more
591                          * merging to come. */
592                         plock->size += (ex->start + ex->size) - (plock->start + plock->size);
593                         return 0;
594                 }
595         }
596
597 /*********************************************
598 Overlap before.
599         +-----------------------+
600         |  ex                   |
601         +-----------------------+
602                 +---------------+
603                 |   plock       |
604                 +---------------+
605 OR
606         +-------------+
607         |  ex         |
608         +-------------+
609                 +---------------+
610                 |   plock       |
611                 +---------------+
612
613 BECOMES....
614         +-------+---------------+
615         | ex    |   plock       | - different lock types
616         +-------+---------------+
617
618 OR.... (merge)
619         +-----------------------+
620         |      plock            | - same lock type.
621         +-----------------------+
622
623 **********************************************/
624
625         if ( (ex->start < plock->start) &&
626                         (ex->start + ex->size >= plock->start) &&
627                         (ex->start + ex->size <= plock->start + plock->size) ) {
628
629                 /* If the lock types are the same, we merge, if different, we
630                    add the truncated old lock. */
631
632                 if (lock_types_differ) {
633                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
634                         /* Adjust existing size. */
635                         lck_arr[0].size = plock->start - ex->start;
636                         return 1;
637                 } else {
638                         /* Merge - adjust incoming lock as we may have more
639                          * merging to come. MUST ADJUST plock SIZE FIRST ! */
640                         plock->size += (plock->start - ex->start);
641                         plock->start = ex->start;
642                         return 0;
643                 }
644         }
645
646 /*********************************************
647 Complete overlap.
648         +---------------------------+
649         |        ex                 |
650         +---------------------------+
651                 +---------+
652                 |  plock  |
653                 +---------+
654 BECOMES.....
655         +-------+---------+---------+
656         | ex    |  plock  | ex      | - different lock types.
657         +-------+---------+---------+
658 OR
659         +---------------------------+
660         |        plock              | - same lock type.
661         +---------------------------+
662 **********************************************/
663
664         if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
665
666                 if (lock_types_differ) {
667
668                         /* We have to split ex into two locks here. */
669
670                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
671                         memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
672
673                         /* Adjust first existing size. */
674                         lck_arr[0].size = plock->start - ex->start;
675
676                         /* Adjust second existing start and size. */
677                         lck_arr[1].start = plock->start + plock->size;
678                         lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
679                         return 2;
680                 } else {
681                         /* Just eat the existing locks, merge them into plock. */
682                         plock->start = ex->start;
683                         plock->size = ex->size;
684                         return 0;
685                 }
686         }
687
688         /* Never get here. */
689         smb_panic("brlock_posix_split_merge");
690         /* Notreached. */
691
692         /* Keep some compilers happy. */
693         return 0;
694 }
695
696 /****************************************************************************
697  Lock a range of bytes - POSIX lock semantics.
698  We must cope with range splits and merges.
699 ****************************************************************************/
700
701 static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
702                                struct byte_range_lock *br_lck,
703                                struct lock_struct *plock)
704 {
705         unsigned int i, count, posix_count;
706         struct lock_struct *locks = br_lck->lock_data;
707         struct lock_struct *tp;
708         bool signal_pending_read = False;
709         bool break_oplocks = false;
710         NTSTATUS status;
711
712         /* No zero-zero locks for POSIX. */
713         if (plock->start == 0 && plock->size == 0) {
714                 return NT_STATUS_INVALID_PARAMETER;
715         }
716
717         /* Don't allow 64-bit lock wrap. */
718         if (plock->start + plock->size < plock->start ||
719                         plock->start + plock->size < plock->size) {
720                 return NT_STATUS_INVALID_PARAMETER;
721         }
722
723         /* The worst case scenario here is we have to split an
724            existing POSIX lock range into two, and add our lock,
725            so we need at most 2 more entries. */
726
727         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 2));
728         if (!tp) {
729                 return NT_STATUS_NO_MEMORY;
730         }
731
732         count = posix_count = 0;
733
734         for (i=0; i < br_lck->num_locks; i++) {
735                 struct lock_struct *curr_lock = &locks[i];
736
737                 /* If we have a pending read lock, a lock downgrade should
738                    trigger a lock re-evaluation. */
739                 if (curr_lock->lock_type == PENDING_READ_LOCK &&
740                                 brl_pending_overlap(plock, curr_lock)) {
741                         signal_pending_read = True;
742                 }
743
744                 if (curr_lock->lock_flav == WINDOWS_LOCK) {
745                         /* Do any Windows flavour locks conflict ? */
746                         if (brl_conflict(curr_lock, plock)) {
747                                 /* No games with error messages. */
748                                 SAFE_FREE(tp);
749                                 /* Remember who blocked us. */
750                                 plock->context.smbpid = curr_lock->context.smbpid;
751                                 return NT_STATUS_FILE_LOCK_CONFLICT;
752                         }
753                         /* Just copy the Windows lock into the new array. */
754                         memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
755                         count++;
756                 } else {
757                         unsigned int tmp_count = 0;
758
759                         /* POSIX conflict semantics are different. */
760                         if (brl_conflict_posix(curr_lock, plock)) {
761                                 /* Can't block ourselves with POSIX locks. */
762                                 /* No games with error messages. */
763                                 SAFE_FREE(tp);
764                                 /* Remember who blocked us. */
765                                 plock->context.smbpid = curr_lock->context.smbpid;
766                                 return NT_STATUS_FILE_LOCK_CONFLICT;
767                         }
768
769                         /* Work out overlaps. */
770                         tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock);
771                         posix_count += tmp_count;
772                         count += tmp_count;
773                 }
774         }
775
776         /*
777          * Break oplocks while we hold a brl. Since lock() and unlock() calls
778          * are not symetric with POSIX semantics, we cannot guarantee our
779          * contend_level2_oplocks_begin/end calls will be acquired and
780          * released one-for-one as with Windows semantics. Therefore we only
781          * call contend_level2_oplocks_begin if this is the first POSIX brl on
782          * the file.
783          */
784         break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
785                          posix_count == 0);
786         if (break_oplocks) {
787                 contend_level2_oplocks_begin(br_lck->fsp,
788                                              LEVEL2_CONTEND_POSIX_BRL);
789         }
790
791         /* Try and add the lock in order, sorted by lock start. */
792         for (i=0; i < count; i++) {
793                 struct lock_struct *curr_lock = &tp[i];
794
795                 if (curr_lock->start <= plock->start) {
796                         continue;
797                 }
798         }
799
800         if (i < count) {
801                 memmove(&tp[i+1], &tp[i],
802                         (count - i)*sizeof(struct lock_struct));
803         }
804         memcpy(&tp[i], plock, sizeof(struct lock_struct));
805         count++;
806
807         /* We can get the POSIX lock, now see if it needs to
808            be mapped into a lower level POSIX one, and if so can
809            we get it ? */
810
811         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
812                 int errno_ret;
813
814                 /* The lower layer just needs to attempt to
815                    get the system POSIX lock. We've weeded out
816                    any conflicts above. */
817
818                 if (!set_posix_lock_posix_flavour(br_lck->fsp,
819                                 plock->start,
820                                 plock->size,
821                                 plock->lock_type,
822                                 &errno_ret)) {
823
824                         /* We don't know who blocked us. */
825                         plock->context.smbpid = 0xFFFFFFFF;
826
827                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
828                                 SAFE_FREE(tp);
829                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
830                                 goto fail;
831                         } else {
832                                 SAFE_FREE(tp);
833                                 status = map_nt_error_from_unix(errno);
834                                 goto fail;
835                         }
836                 }
837         }
838
839         /* If we didn't use all the allocated size,
840          * Realloc so we don't leak entries per lock call. */
841         if (count < br_lck->num_locks + 2) {
842                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
843                 if (!tp) {
844                         status = NT_STATUS_NO_MEMORY;
845                         goto fail;
846                 }
847         }
848
849         br_lck->num_locks = count;
850         SAFE_FREE(br_lck->lock_data);
851         br_lck->lock_data = tp;
852         locks = tp;
853         br_lck->modified = True;
854
855         /* A successful downgrade from write to read lock can trigger a lock
856            re-evalutation where waiting readers can now proceed. */
857
858         if (signal_pending_read) {
859                 /* Send unlock messages to any pending read waiters that overlap. */
860                 for (i=0; i < br_lck->num_locks; i++) {
861                         struct lock_struct *pend_lock = &locks[i];
862
863                         /* Ignore non-pending locks. */
864                         if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
865                                 continue;
866                         }
867
868                         if (pend_lock->lock_type == PENDING_READ_LOCK &&
869                                         brl_pending_overlap(plock, pend_lock)) {
870                                 DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
871                                         procid_str_static(&pend_lock->context.pid )));
872
873                                 messaging_send(msg_ctx, pend_lock->context.pid,
874                                                MSG_SMB_UNLOCK, &data_blob_null);
875                         }
876                 }
877         }
878
879         return NT_STATUS_OK;
880  fail:
881         if (break_oplocks) {
882                 contend_level2_oplocks_end(br_lck->fsp,
883                                            LEVEL2_CONTEND_POSIX_BRL);
884         }
885         return status;
886 }
887
888 NTSTATUS smb_vfs_call_brl_lock_windows(struct vfs_handle_struct *handle,
889                                        struct byte_range_lock *br_lck,
890                                        struct lock_struct *plock,
891                                        bool blocking_lock,
892                                        struct blocking_lock_record *blr)
893 {
894         VFS_FIND(brl_lock_windows);
895         return handle->fns->brl_lock_windows(handle, br_lck, plock,
896                                              blocking_lock, blr);
897 }
898
899 /****************************************************************************
900  Lock a range of bytes.
901 ****************************************************************************/
902
903 NTSTATUS brl_lock(struct messaging_context *msg_ctx,
904                 struct byte_range_lock *br_lck,
905                 uint32 smbpid,
906                 struct server_id pid,
907                 br_off start,
908                 br_off size, 
909                 enum brl_type lock_type,
910                 enum brl_flavour lock_flav,
911                 bool blocking_lock,
912                 uint32 *psmbpid,
913                 struct blocking_lock_record *blr)
914 {
915         NTSTATUS ret;
916         struct lock_struct lock;
917
918 #if !ZERO_ZERO
919         if (start == 0 && size == 0) {
920                 DEBUG(0,("client sent 0/0 lock - please report this\n"));
921         }
922 #endif
923
924 #ifdef DEVELOPER
925         /* Quieten valgrind on test. */
926         memset(&lock, '\0', sizeof(lock));
927 #endif
928
929         lock.context.smbpid = smbpid;
930         lock.context.pid = pid;
931         lock.context.tid = br_lck->fsp->conn->cnum;
932         lock.start = start;
933         lock.size = size;
934         lock.fnum = br_lck->fsp->fnum;
935         lock.lock_type = lock_type;
936         lock.lock_flav = lock_flav;
937
938         if (lock_flav == WINDOWS_LOCK) {
939                 ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck,
940                     &lock, blocking_lock, blr);
941         } else {
942                 ret = brl_lock_posix(msg_ctx, br_lck, &lock);
943         }
944
945 #if ZERO_ZERO
946         /* sort the lock list */
947         TYPESAFE_QSORT(br_lck->lock_data, (size_t)br_lck->num_locks, lock_compare);
948 #endif
949
950         /* If we're returning an error, return who blocked us. */
951         if (!NT_STATUS_IS_OK(ret) && psmbpid) {
952                 *psmbpid = lock.context.smbpid;
953         }
954         return ret;
955 }
956
957 /****************************************************************************
958  Unlock a range of bytes - Windows semantics.
959 ****************************************************************************/
960
961 bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
962                                struct byte_range_lock *br_lck,
963                                const struct lock_struct *plock)
964 {
965         unsigned int i, j;
966         struct lock_struct *locks = br_lck->lock_data;
967         enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
968
969         SMB_ASSERT(plock->lock_type == UNLOCK_LOCK);
970
971 #if ZERO_ZERO
972         /* Delete write locks by preference... The lock list
973            is sorted in the zero zero case. */
974
975         for (i = 0; i < br_lck->num_locks; i++) {
976                 struct lock_struct *lock = &locks[i];
977
978                 if (lock->lock_type == WRITE_LOCK &&
979                     brl_same_context(&lock->context, &plock->context) &&
980                     lock->fnum == plock->fnum &&
981                     lock->lock_flav == WINDOWS_LOCK &&
982                     lock->start == plock->start &&
983                     lock->size == plock->size) {
984
985                         /* found it - delete it */
986                         deleted_lock_type = lock->lock_type;
987                         break;
988                 }
989         }
990
991         if (i != br_lck->num_locks) {
992                 /* We found it - don't search again. */
993                 goto unlock_continue;
994         }
995 #endif
996
997         for (i = 0; i < br_lck->num_locks; i++) {
998                 struct lock_struct *lock = &locks[i];
999
1000                 /* Only remove our own locks that match in start, size, and flavour. */
1001                 if (brl_same_context(&lock->context, &plock->context) &&
1002                                         lock->fnum == plock->fnum &&
1003                                         lock->lock_flav == WINDOWS_LOCK &&
1004                                         lock->start == plock->start &&
1005                                         lock->size == plock->size ) {
1006                         deleted_lock_type = lock->lock_type;
1007                         break;
1008                 }
1009         }
1010
1011         if (i == br_lck->num_locks) {
1012                 /* we didn't find it */
1013                 return False;
1014         }
1015
1016 #if ZERO_ZERO
1017   unlock_continue:
1018 #endif
1019
1020         /* Actually delete the lock. */
1021         if (i < br_lck->num_locks - 1) {
1022                 memmove(&locks[i], &locks[i+1], 
1023                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1024         }
1025
1026         br_lck->num_locks -= 1;
1027         br_lck->modified = True;
1028
1029         /* Unlock the underlying POSIX regions. */
1030         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1031                 release_posix_lock_windows_flavour(br_lck->fsp,
1032                                 plock->start,
1033                                 plock->size,
1034                                 deleted_lock_type,
1035                                 &plock->context,
1036                                 locks,
1037                                 br_lck->num_locks);
1038         }
1039
1040         /* Send unlock messages to any pending waiters that overlap. */
1041         for (j=0; j < br_lck->num_locks; j++) {
1042                 struct lock_struct *pend_lock = &locks[j];
1043
1044                 /* Ignore non-pending locks. */
1045                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1046                         continue;
1047                 }
1048
1049                 /* We could send specific lock info here... */
1050                 if (brl_pending_overlap(plock, pend_lock)) {
1051                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1052                                 procid_str_static(&pend_lock->context.pid )));
1053
1054                         messaging_send(msg_ctx, pend_lock->context.pid,
1055                                        MSG_SMB_UNLOCK, &data_blob_null);
1056                 }
1057         }
1058
1059         contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
1060         return True;
1061 }
1062
1063 /****************************************************************************
1064  Unlock a range of bytes - POSIX semantics.
1065 ****************************************************************************/
1066
1067 static bool brl_unlock_posix(struct messaging_context *msg_ctx,
1068                              struct byte_range_lock *br_lck,
1069                              struct lock_struct *plock)
1070 {
1071         unsigned int i, j, count;
1072         struct lock_struct *tp;
1073         struct lock_struct *locks = br_lck->lock_data;
1074         bool overlap_found = False;
1075
1076         /* No zero-zero locks for POSIX. */
1077         if (plock->start == 0 && plock->size == 0) {
1078                 return False;
1079         }
1080
1081         /* Don't allow 64-bit lock wrap. */
1082         if (plock->start + plock->size < plock->start ||
1083                         plock->start + plock->size < plock->size) {
1084                 DEBUG(10,("brl_unlock_posix: lock wrap\n"));
1085                 return False;
1086         }
1087
1088         /* The worst case scenario here is we have to split an
1089            existing POSIX lock range into two, so we need at most
1090            1 more entry. */
1091
1092         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 1));
1093         if (!tp) {
1094                 DEBUG(10,("brl_unlock_posix: malloc fail\n"));
1095                 return False;
1096         }
1097
1098         count = 0;
1099         for (i = 0; i < br_lck->num_locks; i++) {
1100                 struct lock_struct *lock = &locks[i];
1101                 unsigned int tmp_count;
1102
1103                 /* Only remove our own locks - ignore fnum. */
1104                 if (IS_PENDING_LOCK(lock->lock_type) ||
1105                                 !brl_same_context(&lock->context, &plock->context)) {
1106                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1107                         count++;
1108                         continue;
1109                 }
1110
1111                 if (lock->lock_flav == WINDOWS_LOCK) {
1112                         /* Do any Windows flavour locks conflict ? */
1113                         if (brl_conflict(lock, plock)) {
1114                                 SAFE_FREE(tp);
1115                                 return false;
1116                         }
1117                         /* Just copy the Windows lock into the new array. */
1118                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1119                         count++;
1120                         continue;
1121                 }
1122
1123                 /* Work out overlaps. */
1124                 tmp_count = brlock_posix_split_merge(&tp[count], lock, plock);
1125
1126                 if (tmp_count == 0) {
1127                         /* plock overlapped the existing lock completely,
1128                            or replaced it. Don't copy the existing lock. */
1129                         overlap_found = true;
1130                 } else if (tmp_count == 1) {
1131                         /* Either no overlap, (simple copy of existing lock) or
1132                          * an overlap of an existing lock. */
1133                         /* If the lock changed size, we had an overlap. */
1134                         if (tp[count].size != lock->size) {
1135                                 overlap_found = true;
1136                         }
1137                         count += tmp_count;
1138                 } else if (tmp_count == 2) {
1139                         /* We split a lock range in two. */
1140                         overlap_found = true;
1141                         count += tmp_count;
1142
1143                         /* Optimisation... */
1144                         /* We know we're finished here as we can't overlap any
1145                            more POSIX locks. Copy the rest of the lock array. */
1146
1147                         if (i < br_lck->num_locks - 1) {
1148                                 memcpy(&tp[count], &locks[i+1],
1149                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1150                                 count += ((br_lck->num_locks-1) - i);
1151                         }
1152                         break;
1153                 }
1154
1155         }
1156
1157         if (!overlap_found) {
1158                 /* Just ignore - no change. */
1159                 SAFE_FREE(tp);
1160                 DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
1161                 return True;
1162         }
1163
1164         /* Unlock any POSIX regions. */
1165         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1166                 release_posix_lock_posix_flavour(br_lck->fsp,
1167                                                 plock->start,
1168                                                 plock->size,
1169                                                 &plock->context,
1170                                                 tp,
1171                                                 count);
1172         }
1173
1174         /* Realloc so we don't leak entries per unlock call. */
1175         if (count) {
1176                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
1177                 if (!tp) {
1178                         DEBUG(10,("brl_unlock_posix: realloc fail\n"));
1179                         return False;
1180                 }
1181         } else {
1182                 /* We deleted the last lock. */
1183                 SAFE_FREE(tp);
1184                 tp = NULL;
1185         }
1186
1187         contend_level2_oplocks_end(br_lck->fsp,
1188                                    LEVEL2_CONTEND_POSIX_BRL);
1189
1190         br_lck->num_locks = count;
1191         SAFE_FREE(br_lck->lock_data);
1192         locks = tp;
1193         br_lck->lock_data = tp;
1194         br_lck->modified = True;
1195
1196         /* Send unlock messages to any pending waiters that overlap. */
1197
1198         for (j=0; j < br_lck->num_locks; j++) {
1199                 struct lock_struct *pend_lock = &locks[j];
1200
1201                 /* Ignore non-pending locks. */
1202                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1203                         continue;
1204                 }
1205
1206                 /* We could send specific lock info here... */
1207                 if (brl_pending_overlap(plock, pend_lock)) {
1208                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1209                                 procid_str_static(&pend_lock->context.pid )));
1210
1211                         messaging_send(msg_ctx, pend_lock->context.pid,
1212                                        MSG_SMB_UNLOCK, &data_blob_null);
1213                 }
1214         }
1215
1216         return True;
1217 }
1218
1219 bool smb_vfs_call_brl_unlock_windows(struct vfs_handle_struct *handle,
1220                                      struct messaging_context *msg_ctx,
1221                                      struct byte_range_lock *br_lck,
1222                                      const struct lock_struct *plock)
1223 {
1224         VFS_FIND(brl_unlock_windows);
1225         return handle->fns->brl_unlock_windows(handle, msg_ctx, br_lck, plock);
1226 }
1227
1228 /****************************************************************************
1229  Unlock a range of bytes.
1230 ****************************************************************************/
1231
1232 bool brl_unlock(struct messaging_context *msg_ctx,
1233                 struct byte_range_lock *br_lck,
1234                 uint32 smbpid,
1235                 struct server_id pid,
1236                 br_off start,
1237                 br_off size,
1238                 enum brl_flavour lock_flav)
1239 {
1240         struct lock_struct lock;
1241
1242         lock.context.smbpid = smbpid;
1243         lock.context.pid = pid;
1244         lock.context.tid = br_lck->fsp->conn->cnum;
1245         lock.start = start;
1246         lock.size = size;
1247         lock.fnum = br_lck->fsp->fnum;
1248         lock.lock_type = UNLOCK_LOCK;
1249         lock.lock_flav = lock_flav;
1250
1251         if (lock_flav == WINDOWS_LOCK) {
1252                 return SMB_VFS_BRL_UNLOCK_WINDOWS(br_lck->fsp->conn, msg_ctx,
1253                     br_lck, &lock);
1254         } else {
1255                 return brl_unlock_posix(msg_ctx, br_lck, &lock);
1256         }
1257 }
1258
1259 /****************************************************************************
1260  Test if we could add a lock if we wanted to.
1261  Returns True if the region required is currently unlocked, False if locked.
1262 ****************************************************************************/
1263
1264 bool brl_locktest(struct byte_range_lock *br_lck,
1265                 uint32 smbpid,
1266                 struct server_id pid,
1267                 br_off start,
1268                 br_off size, 
1269                 enum brl_type lock_type,
1270                 enum brl_flavour lock_flav)
1271 {
1272         bool ret = True;
1273         unsigned int i;
1274         struct lock_struct lock;
1275         const struct lock_struct *locks = br_lck->lock_data;
1276         files_struct *fsp = br_lck->fsp;
1277
1278         lock.context.smbpid = smbpid;
1279         lock.context.pid = pid;
1280         lock.context.tid = br_lck->fsp->conn->cnum;
1281         lock.start = start;
1282         lock.size = size;
1283         lock.fnum = fsp->fnum;
1284         lock.lock_type = lock_type;
1285         lock.lock_flav = lock_flav;
1286
1287         /* Make sure existing locks don't conflict */
1288         for (i=0; i < br_lck->num_locks; i++) {
1289                 /*
1290                  * Our own locks don't conflict.
1291                  */
1292                 if (brl_conflict_other(&locks[i], &lock)) {
1293                         return False;
1294                 }
1295         }
1296
1297         /*
1298          * There is no lock held by an SMB daemon, check to
1299          * see if there is a POSIX lock from a UNIX or NFS process.
1300          * This only conflicts with Windows locks, not POSIX locks.
1301          */
1302
1303         if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
1304                 ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
1305
1306                 DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1307                         (double)start, (double)size, ret ? "locked" : "unlocked",
1308                         fsp->fnum, fsp_str_dbg(fsp)));
1309
1310                 /* We need to return the inverse of is_posix_locked. */
1311                 ret = !ret;
1312         }
1313
1314         /* no conflicts - we could have added it */
1315         return ret;
1316 }
1317
1318 /****************************************************************************
1319  Query for existing locks.
1320 ****************************************************************************/
1321
1322 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
1323                 uint32 *psmbpid,
1324                 struct server_id pid,
1325                 br_off *pstart,
1326                 br_off *psize, 
1327                 enum brl_type *plock_type,
1328                 enum brl_flavour lock_flav)
1329 {
1330         unsigned int i;
1331         struct lock_struct lock;
1332         const struct lock_struct *locks = br_lck->lock_data;
1333         files_struct *fsp = br_lck->fsp;
1334
1335         lock.context.smbpid = *psmbpid;
1336         lock.context.pid = pid;
1337         lock.context.tid = br_lck->fsp->conn->cnum;
1338         lock.start = *pstart;
1339         lock.size = *psize;
1340         lock.fnum = fsp->fnum;
1341         lock.lock_type = *plock_type;
1342         lock.lock_flav = lock_flav;
1343
1344         /* Make sure existing locks don't conflict */
1345         for (i=0; i < br_lck->num_locks; i++) {
1346                 const struct lock_struct *exlock = &locks[i];
1347                 bool conflict = False;
1348
1349                 if (exlock->lock_flav == WINDOWS_LOCK) {
1350                         conflict = brl_conflict(exlock, &lock);
1351                 } else {        
1352                         conflict = brl_conflict_posix(exlock, &lock);
1353                 }
1354
1355                 if (conflict) {
1356                         *psmbpid = exlock->context.smbpid;
1357                         *pstart = exlock->start;
1358                         *psize = exlock->size;
1359                         *plock_type = exlock->lock_type;
1360                         return NT_STATUS_LOCK_NOT_GRANTED;
1361                 }
1362         }
1363
1364         /*
1365          * There is no lock held by an SMB daemon, check to
1366          * see if there is a POSIX lock from a UNIX or NFS process.
1367          */
1368
1369         if(lp_posix_locking(fsp->conn->params)) {
1370                 bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
1371
1372                 DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1373                         (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
1374                         fsp->fnum, fsp_str_dbg(fsp)));
1375
1376                 if (ret) {
1377                         /* Hmmm. No clue what to set smbpid to - use -1. */
1378                         *psmbpid = 0xFFFF;
1379                         return NT_STATUS_LOCK_NOT_GRANTED;
1380                 }
1381         }
1382
1383         return NT_STATUS_OK;
1384 }
1385
1386
1387 bool smb_vfs_call_brl_cancel_windows(struct vfs_handle_struct *handle,
1388                                      struct byte_range_lock *br_lck,
1389                                      struct lock_struct *plock,
1390                                      struct blocking_lock_record *blr)
1391 {
1392         VFS_FIND(brl_cancel_windows);
1393         return handle->fns->brl_cancel_windows(handle, br_lck, plock, blr);
1394 }
1395
1396 /****************************************************************************
1397  Remove a particular pending lock.
1398 ****************************************************************************/
1399 bool brl_lock_cancel(struct byte_range_lock *br_lck,
1400                 uint32 smbpid,
1401                 struct server_id pid,
1402                 br_off start,
1403                 br_off size,
1404                 enum brl_flavour lock_flav,
1405                 struct blocking_lock_record *blr)
1406 {
1407         bool ret;
1408         struct lock_struct lock;
1409
1410         lock.context.smbpid = smbpid;
1411         lock.context.pid = pid;
1412         lock.context.tid = br_lck->fsp->conn->cnum;
1413         lock.start = start;
1414         lock.size = size;
1415         lock.fnum = br_lck->fsp->fnum;
1416         lock.lock_flav = lock_flav;
1417         /* lock.lock_type doesn't matter */
1418
1419         if (lock_flav == WINDOWS_LOCK) {
1420                 ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck,
1421                     &lock, blr);
1422         } else {
1423                 ret = brl_lock_cancel_default(br_lck, &lock);
1424         }
1425
1426         return ret;
1427 }
1428
1429 bool brl_lock_cancel_default(struct byte_range_lock *br_lck,
1430                 struct lock_struct *plock)
1431 {
1432         unsigned int i;
1433         struct lock_struct *locks = br_lck->lock_data;
1434
1435         SMB_ASSERT(plock);
1436
1437         for (i = 0; i < br_lck->num_locks; i++) {
1438                 struct lock_struct *lock = &locks[i];
1439
1440                 /* For pending locks we *always* care about the fnum. */
1441                 if (brl_same_context(&lock->context, &plock->context) &&
1442                                 lock->fnum == plock->fnum &&
1443                                 IS_PENDING_LOCK(lock->lock_type) &&
1444                                 lock->lock_flav == plock->lock_flav &&
1445                                 lock->start == plock->start &&
1446                                 lock->size == plock->size) {
1447                         break;
1448                 }
1449         }
1450
1451         if (i == br_lck->num_locks) {
1452                 /* Didn't find it. */
1453                 return False;
1454         }
1455
1456         if (i < br_lck->num_locks - 1) {
1457                 /* Found this particular pending lock - delete it */
1458                 memmove(&locks[i], &locks[i+1], 
1459                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1460         }
1461
1462         br_lck->num_locks -= 1;
1463         br_lck->modified = True;
1464         return True;
1465 }
1466
1467 /****************************************************************************
1468  Remove any locks associated with a open file.
1469  We return True if this process owns any other Windows locks on this
1470  fd and so we should not immediately close the fd.
1471 ****************************************************************************/
1472
1473 void brl_close_fnum(struct messaging_context *msg_ctx,
1474                     struct byte_range_lock *br_lck)
1475 {
1476         files_struct *fsp = br_lck->fsp;
1477         uint16 tid = fsp->conn->cnum;
1478         int fnum = fsp->fnum;
1479         unsigned int i, j, dcount=0;
1480         int num_deleted_windows_locks = 0;
1481         struct lock_struct *locks = br_lck->lock_data;
1482         struct server_id pid = procid_self();
1483         bool unlock_individually = False;
1484         bool posix_level2_contention_ended = false;
1485
1486         if(lp_posix_locking(fsp->conn->params)) {
1487
1488                 /* Check if there are any Windows locks associated with this dev/ino
1489                    pair that are not this fnum. If so we need to call unlock on each
1490                    one in order to release the system POSIX locks correctly. */
1491
1492                 for (i=0; i < br_lck->num_locks; i++) {
1493                         struct lock_struct *lock = &locks[i];
1494
1495                         if (!procid_equal(&lock->context.pid, &pid)) {
1496                                 continue;
1497                         }
1498
1499                         if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1500                                 continue; /* Ignore pending. */
1501                         }
1502
1503                         if (lock->context.tid != tid || lock->fnum != fnum) {
1504                                 unlock_individually = True;
1505                                 break;
1506                         }
1507                 }
1508
1509                 if (unlock_individually) {
1510                         struct lock_struct *locks_copy;
1511                         unsigned int num_locks_copy;
1512
1513                         /* Copy the current lock array. */
1514                         if (br_lck->num_locks) {
1515                                 locks_copy = (struct lock_struct *)TALLOC_MEMDUP(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
1516                                 if (!locks_copy) {
1517                                         smb_panic("brl_close_fnum: talloc failed");
1518                                 }
1519                         } else {        
1520                                 locks_copy = NULL;
1521                         }
1522
1523                         num_locks_copy = br_lck->num_locks;
1524
1525                         for (i=0; i < num_locks_copy; i++) {
1526                                 struct lock_struct *lock = &locks_copy[i];
1527
1528                                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid) &&
1529                                                 (lock->fnum == fnum)) {
1530                                         brl_unlock(msg_ctx,
1531                                                 br_lck,
1532                                                 lock->context.smbpid,
1533                                                 pid,
1534                                                 lock->start,
1535                                                 lock->size,
1536                                                 lock->lock_flav);
1537                                 }
1538                         }
1539                         return;
1540                 }
1541         }
1542
1543         /* We can bulk delete - any POSIX locks will be removed when the fd closes. */
1544
1545         /* Remove any existing locks for this fnum (or any fnum if they're POSIX). */
1546
1547         for (i=0; i < br_lck->num_locks; i++) {
1548                 struct lock_struct *lock = &locks[i];
1549                 bool del_this_lock = False;
1550
1551                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid)) {
1552                         if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
1553                                 del_this_lock = True;
1554                                 num_deleted_windows_locks++;
1555                                 contend_level2_oplocks_end(br_lck->fsp,
1556                                     LEVEL2_CONTEND_WINDOWS_BRL);
1557                         } else if (lock->lock_flav == POSIX_LOCK) {
1558                                 del_this_lock = True;
1559
1560                                 /* Only end level2 contention once for posix */
1561                                 if (!posix_level2_contention_ended) {
1562                                         posix_level2_contention_ended = true;
1563                                         contend_level2_oplocks_end(br_lck->fsp,
1564                                             LEVEL2_CONTEND_POSIX_BRL);
1565                                 }
1566                         }
1567                 }
1568
1569                 if (del_this_lock) {
1570                         /* Send unlock messages to any pending waiters that overlap. */
1571                         for (j=0; j < br_lck->num_locks; j++) {
1572                                 struct lock_struct *pend_lock = &locks[j];
1573
1574                                 /* Ignore our own or non-pending locks. */
1575                                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1576                                         continue;
1577                                 }
1578
1579                                 /* Optimisation - don't send to this fnum as we're
1580                                    closing it. */
1581                                 if (pend_lock->context.tid == tid &&
1582                                     procid_equal(&pend_lock->context.pid, &pid) &&
1583                                     pend_lock->fnum == fnum) {
1584                                         continue;
1585                                 }
1586
1587                                 /* We could send specific lock info here... */
1588                                 if (brl_pending_overlap(lock, pend_lock)) {
1589                                         messaging_send(msg_ctx, pend_lock->context.pid,
1590                                                        MSG_SMB_UNLOCK, &data_blob_null);
1591                                 }
1592                         }
1593
1594                         /* found it - delete it */
1595                         if (br_lck->num_locks > 1 && i < br_lck->num_locks - 1) {
1596                                 memmove(&locks[i], &locks[i+1], 
1597                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1598                         }
1599                         br_lck->num_locks--;
1600                         br_lck->modified = True;
1601                         i--;
1602                         dcount++;
1603                 }
1604         }
1605
1606         if(lp_posix_locking(fsp->conn->params) && num_deleted_windows_locks) {
1607                 /* Reduce the Windows lock POSIX reference count on this dev/ino pair. */
1608                 reduce_windows_lock_ref_count(fsp, num_deleted_windows_locks);
1609         }
1610 }
1611
1612 /****************************************************************************
1613  Ensure this set of lock entries is valid.
1614 ****************************************************************************/
1615 static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
1616 {
1617         unsigned int i;
1618         unsigned int num_valid_entries = 0;
1619         struct lock_struct *locks = *pplocks;
1620
1621         for (i = 0; i < *pnum_entries; i++) {
1622                 struct lock_struct *lock_data = &locks[i];
1623                 if (!serverid_exists(&lock_data->context.pid)) {
1624                         /* This process no longer exists - mark this
1625                            entry as invalid by zeroing it. */
1626                         ZERO_STRUCTP(lock_data);
1627                 } else {
1628                         num_valid_entries++;
1629                 }
1630         }
1631
1632         if (num_valid_entries != *pnum_entries) {
1633                 struct lock_struct *new_lock_data = NULL;
1634
1635                 if (num_valid_entries) {
1636                         new_lock_data = SMB_MALLOC_ARRAY(struct lock_struct, num_valid_entries);
1637                         if (!new_lock_data) {
1638                                 DEBUG(3, ("malloc fail\n"));
1639                                 return False;
1640                         }
1641
1642                         num_valid_entries = 0;
1643                         for (i = 0; i < *pnum_entries; i++) {
1644                                 struct lock_struct *lock_data = &locks[i];
1645                                 if (lock_data->context.smbpid &&
1646                                                 lock_data->context.tid) {
1647                                         /* Valid (nonzero) entry - copy it. */
1648                                         memcpy(&new_lock_data[num_valid_entries],
1649                                                 lock_data, sizeof(struct lock_struct));
1650                                         num_valid_entries++;
1651                                 }
1652                         }
1653                 }
1654
1655                 SAFE_FREE(*pplocks);
1656                 *pplocks = new_lock_data;
1657                 *pnum_entries = num_valid_entries;
1658         }
1659
1660         return True;
1661 }
1662
1663 struct brl_forall_cb {
1664         void (*fn)(struct file_id id, struct server_id pid,
1665                    enum brl_type lock_type,
1666                    enum brl_flavour lock_flav,
1667                    br_off start, br_off size,
1668                    void *private_data);
1669         void *private_data;
1670 };
1671
1672 /****************************************************************************
1673  Traverse the whole database with this function, calling traverse_callback
1674  on each lock.
1675 ****************************************************************************/
1676
1677 static int traverse_fn(struct db_record *rec, void *state)
1678 {
1679         struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
1680         struct lock_struct *locks;
1681         struct file_id *key;
1682         unsigned int i;
1683         unsigned int num_locks = 0;
1684         unsigned int orig_num_locks = 0;
1685
1686         /* In a traverse function we must make a copy of
1687            dbuf before modifying it. */
1688
1689         locks = (struct lock_struct *)memdup(rec->value.dptr,
1690                                              rec->value.dsize);
1691         if (!locks) {
1692                 return -1; /* Terminate traversal. */
1693         }
1694
1695         key = (struct file_id *)rec->key.dptr;
1696         orig_num_locks = num_locks = rec->value.dsize/sizeof(*locks);
1697
1698         /* Ensure the lock db is clean of entries from invalid processes. */
1699
1700         if (!validate_lock_entries(&num_locks, &locks)) {
1701                 SAFE_FREE(locks);
1702                 return -1; /* Terminate traversal */
1703         }
1704
1705         if (orig_num_locks != num_locks) {
1706                 if (num_locks) {
1707                         TDB_DATA data;
1708                         data.dptr = (uint8_t *)locks;
1709                         data.dsize = num_locks*sizeof(struct lock_struct);
1710                         rec->store(rec, data, TDB_REPLACE);
1711                 } else {
1712                         rec->delete_rec(rec);
1713                 }
1714         }
1715
1716         if (cb->fn) {
1717                 for ( i=0; i<num_locks; i++) {
1718                         cb->fn(*key,
1719                                 locks[i].context.pid,
1720                                 locks[i].lock_type,
1721                                 locks[i].lock_flav,
1722                                 locks[i].start,
1723                                 locks[i].size,
1724                                 cb->private_data);
1725                 }
1726         }
1727
1728         SAFE_FREE(locks);
1729         return 0;
1730 }
1731
1732 /*******************************************************************
1733  Call the specified function on each lock in the database.
1734 ********************************************************************/
1735
1736 int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
1737                           enum brl_type lock_type,
1738                           enum brl_flavour lock_flav,
1739                           br_off start, br_off size,
1740                           void *private_data),
1741                void *private_data)
1742 {
1743         struct brl_forall_cb cb;
1744
1745         if (!brlock_db) {
1746                 return 0;
1747         }
1748         cb.fn = fn;
1749         cb.private_data = private_data;
1750         return brlock_db->traverse(brlock_db, traverse_fn, &cb);
1751 }
1752
1753 /*******************************************************************
1754  Store a potentially modified set of byte range lock data back into
1755  the database.
1756  Unlock the record.
1757 ********************************************************************/
1758
1759 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
1760 {
1761         if (br_lck->read_only) {
1762                 SMB_ASSERT(!br_lck->modified);
1763         }
1764
1765         if (!br_lck->modified) {
1766                 goto done;
1767         }
1768
1769         if (br_lck->num_locks == 0) {
1770                 /* No locks - delete this entry. */
1771                 NTSTATUS status = br_lck->record->delete_rec(br_lck->record);
1772                 if (!NT_STATUS_IS_OK(status)) {
1773                         DEBUG(0, ("delete_rec returned %s\n",
1774                                   nt_errstr(status)));
1775                         smb_panic("Could not delete byte range lock entry");
1776                 }
1777         } else {
1778                 TDB_DATA data;
1779                 NTSTATUS status;
1780
1781                 data.dptr = (uint8 *)br_lck->lock_data;
1782                 data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
1783
1784                 status = br_lck->record->store(br_lck->record, data,
1785                                                TDB_REPLACE);
1786                 if (!NT_STATUS_IS_OK(status)) {
1787                         DEBUG(0, ("store returned %s\n", nt_errstr(status)));
1788                         smb_panic("Could not store byte range mode entry");
1789                 }
1790         }
1791
1792  done:
1793
1794         SAFE_FREE(br_lck->lock_data);
1795         TALLOC_FREE(br_lck->record);
1796         return 0;
1797 }
1798
1799 /*******************************************************************
1800  Fetch a set of byte range lock data from the database.
1801  Leave the record locked.
1802  TALLOC_FREE(brl) will release the lock in the destructor.
1803 ********************************************************************/
1804
1805 static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
1806                                         files_struct *fsp, bool read_only)
1807 {
1808         TDB_DATA key, data;
1809         struct byte_range_lock *br_lck = TALLOC_P(mem_ctx, struct byte_range_lock);
1810
1811         if (br_lck == NULL) {
1812                 return NULL;
1813         }
1814
1815         br_lck->fsp = fsp;
1816         br_lck->num_locks = 0;
1817         br_lck->modified = False;
1818         br_lck->key = fsp->file_id;
1819
1820         key.dptr = (uint8 *)&br_lck->key;
1821         key.dsize = sizeof(struct file_id);
1822
1823         if (!fsp->lockdb_clean) {
1824                 /* We must be read/write to clean
1825                    the dead entries. */
1826                 read_only = False;
1827         }
1828
1829         if (read_only) {
1830                 if (brlock_db->fetch(brlock_db, br_lck, key, &data) == -1) {
1831                         DEBUG(3, ("Could not fetch byte range lock record\n"));
1832                         TALLOC_FREE(br_lck);
1833                         return NULL;
1834                 }
1835                 br_lck->record = NULL;
1836         }
1837         else {
1838                 br_lck->record = brlock_db->fetch_locked(brlock_db, br_lck, key);
1839
1840                 if (br_lck->record == NULL) {
1841                         DEBUG(3, ("Could not lock byte range lock entry\n"));
1842                         TALLOC_FREE(br_lck);
1843                         return NULL;
1844                 }
1845
1846                 data = br_lck->record->value;
1847         }
1848
1849         br_lck->read_only = read_only;
1850         br_lck->lock_data = NULL;
1851
1852         talloc_set_destructor(br_lck, byte_range_lock_destructor);
1853
1854         br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
1855
1856         if (br_lck->num_locks != 0) {
1857                 br_lck->lock_data = SMB_MALLOC_ARRAY(struct lock_struct,
1858                                                      br_lck->num_locks);
1859                 if (br_lck->lock_data == NULL) {
1860                         DEBUG(0, ("malloc failed\n"));
1861                         TALLOC_FREE(br_lck);
1862                         return NULL;
1863                 }
1864
1865                 memcpy(br_lck->lock_data, data.dptr, data.dsize);
1866         }
1867         
1868         if (!fsp->lockdb_clean) {
1869                 int orig_num_locks = br_lck->num_locks;
1870
1871                 /* This is the first time we've accessed this. */
1872                 /* Go through and ensure all entries exist - remove any that don't. */
1873                 /* Makes the lockdb self cleaning at low cost. */
1874
1875                 if (!validate_lock_entries(&br_lck->num_locks,
1876                                            &br_lck->lock_data)) {
1877                         SAFE_FREE(br_lck->lock_data);
1878                         TALLOC_FREE(br_lck);
1879                         return NULL;
1880                 }
1881
1882                 /* Ensure invalid locks are cleaned up in the destructor. */
1883                 if (orig_num_locks != br_lck->num_locks) {
1884                         br_lck->modified = True;
1885                 }
1886
1887                 /* Mark the lockdb as "clean" as seen from this open file. */
1888                 fsp->lockdb_clean = True;
1889         }
1890
1891         if (DEBUGLEVEL >= 10) {
1892                 unsigned int i;
1893                 struct lock_struct *locks = br_lck->lock_data;
1894                 DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
1895                         br_lck->num_locks,
1896                           file_id_string_tos(&fsp->file_id)));
1897                 for( i = 0; i < br_lck->num_locks; i++) {
1898                         print_lock_struct(i, &locks[i]);
1899                 }
1900         }
1901         return br_lck;
1902 }
1903
1904 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
1905                                         files_struct *fsp)
1906 {
1907         return brl_get_locks_internal(mem_ctx, fsp, False);
1908 }
1909
1910 struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
1911 {
1912         struct byte_range_lock *br_lock;
1913
1914         if (lp_clustering()) {
1915                 return brl_get_locks_internal(talloc_tos(), fsp, true);
1916         }
1917
1918         if ((fsp->brlock_rec != NULL)
1919             && (brlock_db->get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
1920                 return fsp->brlock_rec;
1921         }
1922
1923         TALLOC_FREE(fsp->brlock_rec);
1924
1925         br_lock = brl_get_locks_internal(talloc_tos(), fsp, false);
1926         if (br_lock == NULL) {
1927                 return NULL;
1928         }
1929         fsp->brlock_seqnum = brlock_db->get_seqnum(brlock_db);
1930
1931         fsp->brlock_rec = talloc_zero(fsp, struct byte_range_lock);
1932         if (fsp->brlock_rec == NULL) {
1933                 goto fail;
1934         }
1935         fsp->brlock_rec->fsp = fsp;
1936         fsp->brlock_rec->num_locks = br_lock->num_locks;
1937         fsp->brlock_rec->read_only = true;
1938         fsp->brlock_rec->key = br_lock->key;
1939
1940         fsp->brlock_rec->lock_data = (struct lock_struct *)
1941                 talloc_memdup(fsp->brlock_rec, br_lock->lock_data,
1942                               sizeof(struct lock_struct) * br_lock->num_locks);
1943         if (fsp->brlock_rec->lock_data == NULL) {
1944                 goto fail;
1945         }
1946
1947         TALLOC_FREE(br_lock);
1948         return fsp->brlock_rec;
1949 fail:
1950         TALLOC_FREE(br_lock);
1951         TALLOC_FREE(fsp->brlock_rec);
1952         return NULL;
1953 }
1954
1955 struct brl_revalidate_state {
1956         ssize_t array_size;
1957         uint32 num_pids;
1958         struct server_id *pids;
1959 };
1960
1961 /*
1962  * Collect PIDs of all processes with pending entries
1963  */
1964
1965 static void brl_revalidate_collect(struct file_id id, struct server_id pid,
1966                                    enum brl_type lock_type,
1967                                    enum brl_flavour lock_flav,
1968                                    br_off start, br_off size,
1969                                    void *private_data)
1970 {
1971         struct brl_revalidate_state *state =
1972                 (struct brl_revalidate_state *)private_data;
1973
1974         if (!IS_PENDING_LOCK(lock_type)) {
1975                 return;
1976         }
1977
1978         add_to_large_array(state, sizeof(pid), (void *)&pid,
1979                            &state->pids, &state->num_pids,
1980                            &state->array_size);
1981 }
1982
1983 /*
1984  * qsort callback to sort the processes
1985  */
1986
1987 static int compare_procids(const void *p1, const void *p2)
1988 {
1989         const struct server_id *i1 = (struct server_id *)p1;
1990         const struct server_id *i2 = (struct server_id *)p2;
1991
1992         if (i1->pid < i2->pid) return -1;
1993         if (i2->pid > i2->pid) return 1;
1994         return 0;
1995 }
1996
1997 /*
1998  * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
1999  * locks so that they retry. Mainly used in the cluster code after a node has
2000  * died.
2001  *
2002  * Done in two steps to avoid double-sends: First we collect all entries in an
2003  * array, then qsort that array and only send to non-dupes.
2004  */
2005
2006 static void brl_revalidate(struct messaging_context *msg_ctx,
2007                            void *private_data,
2008                            uint32_t msg_type,
2009                            struct server_id server_id,
2010                            DATA_BLOB *data)
2011 {
2012         struct brl_revalidate_state *state;
2013         uint32 i;
2014         struct server_id last_pid;
2015
2016         if (!(state = TALLOC_ZERO_P(NULL, struct brl_revalidate_state))) {
2017                 DEBUG(0, ("talloc failed\n"));
2018                 return;
2019         }
2020
2021         brl_forall(brl_revalidate_collect, state);
2022
2023         if (state->array_size == -1) {
2024                 DEBUG(0, ("talloc failed\n"));
2025                 goto done;
2026         }
2027
2028         if (state->num_pids == 0) {
2029                 goto done;
2030         }
2031
2032         TYPESAFE_QSORT(state->pids, state->num_pids, compare_procids);
2033
2034         ZERO_STRUCT(last_pid);
2035
2036         for (i=0; i<state->num_pids; i++) {
2037                 if (procid_equal(&last_pid, &state->pids[i])) {
2038                         /*
2039                          * We've seen that one already
2040                          */
2041                         continue;
2042                 }
2043
2044                 messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
2045                                &data_blob_null);
2046                 last_pid = state->pids[i];
2047         }
2048
2049  done:
2050         TALLOC_FREE(state);
2051         return;
2052 }
2053
2054 void brl_register_msgs(struct messaging_context *msg_ctx)
2055 {
2056         messaging_register(msg_ctx, NULL, MSG_SMB_BRL_VALIDATE,
2057                            brl_revalidate);
2058 }