s3: use TDB_INCOMPATIBLE_HASH (the jenkins hash) on all TDB_CLEAR_IF_FIRST tdb's.
[obnox/samba-ctdb.git] / source3 / locking / brlock.c
1 /* 
2    Unix SMB/CIFS implementation.
3    byte range locking code
4    Updated to handle range splits/merges.
5
6    Copyright (C) Andrew Tridgell 1992-2000
7    Copyright (C) Jeremy Allison 1992-2000
8    
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13    
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18    
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 /* This module implements a tdb based byte range locking service,
24    replacing the fcntl() based byte range locking previously
25    used. This allows us to provide the same semantics as NT */
26
27 #include "includes.h"
28
29 #undef DBGC_CLASS
30 #define DBGC_CLASS DBGC_LOCKING
31
32 #define ZERO_ZERO 0
33
34 /* The open brlock.tdb database. */
35
36 static struct db_context *brlock_db;
37
38 /****************************************************************************
39  Debug info at level 10 for lock struct.
40 ****************************************************************************/
41
42 static void print_lock_struct(unsigned int i, struct lock_struct *pls)
43 {
44         DEBUG(10,("[%u]: smbpid = %u, tid = %u, pid = %s, ",
45                         i,
46                         (unsigned int)pls->context.smbpid,
47                         (unsigned int)pls->context.tid,
48                         procid_str(debug_ctx(), &pls->context.pid) ));
49         
50         DEBUG(10,("start = %.0f, size = %.0f, fnum = %d, %s %s\n",
51                 (double)pls->start,
52                 (double)pls->size,
53                 pls->fnum,
54                 lock_type_name(pls->lock_type),
55                 lock_flav_name(pls->lock_flav) ));
56 }
57
58 /****************************************************************************
59  See if two locking contexts are equal.
60 ****************************************************************************/
61
62 bool brl_same_context(const struct lock_context *ctx1, 
63                              const struct lock_context *ctx2)
64 {
65         return (procid_equal(&ctx1->pid, &ctx2->pid) &&
66                 (ctx1->smbpid == ctx2->smbpid) &&
67                 (ctx1->tid == ctx2->tid));
68 }
69
70 /****************************************************************************
71  See if lck1 and lck2 overlap.
72 ****************************************************************************/
73
74 static bool brl_overlap(const struct lock_struct *lck1,
75                         const struct lock_struct *lck2)
76 {
77         /* XXX Remove for Win7 compatibility. */
78         /* this extra check is not redundent - it copes with locks
79            that go beyond the end of 64 bit file space */
80         if (lck1->size != 0 &&
81             lck1->start == lck2->start &&
82             lck1->size == lck2->size) {
83                 return True;
84         }
85
86         if (lck1->start >= (lck2->start+lck2->size) ||
87             lck2->start >= (lck1->start+lck1->size)) {
88                 return False;
89         }
90         return True;
91 }
92
93 /****************************************************************************
94  See if lock2 can be added when lock1 is in place.
95 ****************************************************************************/
96
97 static bool brl_conflict(const struct lock_struct *lck1, 
98                          const struct lock_struct *lck2)
99 {
100         /* Ignore PENDING locks. */
101         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
102                 return False;
103
104         /* Read locks never conflict. */
105         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
106                 return False;
107         }
108
109         /* A READ lock can stack on top of a WRITE lock if they have the same
110          * context & fnum. */
111         if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK &&
112             brl_same_context(&lck1->context, &lck2->context) &&
113             lck1->fnum == lck2->fnum) {
114                 return False;
115         }
116
117         return brl_overlap(lck1, lck2);
118
119
120 /****************************************************************************
121  See if lock2 can be added when lock1 is in place - when both locks are POSIX
122  flavour. POSIX locks ignore fnum - they only care about dev/ino which we
123  know already match.
124 ****************************************************************************/
125
126 static bool brl_conflict_posix(const struct lock_struct *lck1, 
127                                 const struct lock_struct *lck2)
128 {
129 #if defined(DEVELOPER)
130         SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
131         SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
132 #endif
133
134         /* Ignore PENDING locks. */
135         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
136                 return False;
137
138         /* Read locks never conflict. */
139         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
140                 return False;
141         }
142
143         /* Locks on the same context con't conflict. Ignore fnum. */
144         if (brl_same_context(&lck1->context, &lck2->context)) {
145                 return False;
146         }
147
148         /* One is read, the other write, or the context is different,
149            do they overlap ? */
150         return brl_overlap(lck1, lck2);
151
152
153 #if ZERO_ZERO
154 static bool brl_conflict1(const struct lock_struct *lck1, 
155                          const struct lock_struct *lck2)
156 {
157         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
158                 return False;
159
160         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
161                 return False;
162         }
163
164         if (brl_same_context(&lck1->context, &lck2->context) &&
165             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
166                 return False;
167         }
168
169         if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
170                 return True;
171         }
172
173         if (lck1->start >= (lck2->start + lck2->size) ||
174             lck2->start >= (lck1->start + lck1->size)) {
175                 return False;
176         }
177             
178         return True;
179
180 #endif
181
182 /****************************************************************************
183  Check to see if this lock conflicts, but ignore our own locks on the
184  same fnum only. This is the read/write lock check code path.
185  This is never used in the POSIX lock case.
186 ****************************************************************************/
187
188 static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
189 {
190         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
191                 return False;
192
193         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
194                 return False;
195
196         /* POSIX flavour locks never conflict here - this is only called
197            in the read/write path. */
198
199         if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
200                 return False;
201
202         /*
203          * Incoming WRITE locks conflict with existing READ locks even
204          * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
205          */
206
207         if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
208                 if (brl_same_context(&lck1->context, &lck2->context) &&
209                                         lck1->fnum == lck2->fnum)
210                         return False;
211         }
212
213         return brl_overlap(lck1, lck2);
214
215
216 /****************************************************************************
217  Check if an unlock overlaps a pending lock.
218 ****************************************************************************/
219
220 static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
221 {
222         if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
223                 return True;
224         if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
225                 return True;
226         return False;
227 }
228
229 /****************************************************************************
230  Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
231  is the same as this one and changes its error code. I wonder if any
232  app depends on this ?
233 ****************************************************************************/
234
235 NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool blocking_lock)
236 {
237         if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
238                 /* amazing the little things you learn with a test
239                    suite. Locks beyond this offset (as a 64 bit
240                    number!) always generate the conflict error code,
241                    unless the top bit is set */
242                 if (!blocking_lock) {
243                         fsp->last_lock_failure = *lock;
244                 }
245                 return NT_STATUS_FILE_LOCK_CONFLICT;
246         }
247
248         if (procid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
249                         lock->context.tid == fsp->last_lock_failure.context.tid &&
250                         lock->fnum == fsp->last_lock_failure.fnum &&
251                         lock->start == fsp->last_lock_failure.start) {
252                 return NT_STATUS_FILE_LOCK_CONFLICT;
253         }
254
255         if (!blocking_lock) {
256                 fsp->last_lock_failure = *lock;
257         }
258         return NT_STATUS_LOCK_NOT_GRANTED;
259 }
260
261 /****************************************************************************
262  Open up the brlock.tdb database.
263 ****************************************************************************/
264
265 void brl_init(bool read_only)
266 {
267         if (brlock_db) {
268                 return;
269         }
270         brlock_db = db_open(NULL, lock_path("brlock.tdb"),
271                             lp_open_files_db_hash_size(),
272                             TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH,
273                             read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
274         if (!brlock_db) {
275                 DEBUG(0,("Failed to open byte range locking database %s\n",
276                         lock_path("brlock.tdb")));
277                 return;
278         }
279 }
280
281 /****************************************************************************
282  Close down the brlock.tdb database.
283 ****************************************************************************/
284
285 void brl_shutdown(void)
286 {
287         TALLOC_FREE(brlock_db);
288 }
289
290 #if ZERO_ZERO
291 /****************************************************************************
292  Compare two locks for sorting.
293 ****************************************************************************/
294
295 static int lock_compare(const struct lock_struct *lck1, 
296                          const struct lock_struct *lck2)
297 {
298         if (lck1->start != lck2->start) {
299                 return (lck1->start - lck2->start);
300         }
301         if (lck2->size != lck1->size) {
302                 return ((int)lck1->size - (int)lck2->size);
303         }
304         return 0;
305 }
306 #endif
307
308 /****************************************************************************
309  Lock a range of bytes - Windows lock semantics.
310 ****************************************************************************/
311
312 NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
313     struct lock_struct *plock, bool blocking_lock)
314 {
315         unsigned int i;
316         files_struct *fsp = br_lck->fsp;
317         struct lock_struct *locks = br_lck->lock_data;
318         NTSTATUS status;
319
320         SMB_ASSERT(plock->lock_type != UNLOCK_LOCK);
321
322         for (i=0; i < br_lck->num_locks; i++) {
323                 /* Do any Windows or POSIX locks conflict ? */
324                 if (brl_conflict(&locks[i], plock)) {
325                         /* Remember who blocked us. */
326                         plock->context.smbpid = locks[i].context.smbpid;
327                         return brl_lock_failed(fsp,plock,blocking_lock);
328                 }
329 #if ZERO_ZERO
330                 if (plock->start == 0 && plock->size == 0 && 
331                                 locks[i].size == 0) {
332                         break;
333                 }
334 #endif
335         }
336
337         if (!IS_PENDING_LOCK(plock->lock_type)) {
338                 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
339         }
340
341         /* We can get the Windows lock, now see if it needs to
342            be mapped into a lower level POSIX one, and if so can
343            we get it ? */
344
345         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
346                 int errno_ret;
347                 if (!set_posix_lock_windows_flavour(fsp,
348                                 plock->start,
349                                 plock->size,
350                                 plock->lock_type,
351                                 &plock->context,
352                                 locks,
353                                 br_lck->num_locks,
354                                 &errno_ret)) {
355
356                         /* We don't know who blocked us. */
357                         plock->context.smbpid = 0xFFFFFFFF;
358
359                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
360                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
361                                 goto fail;
362                         } else {
363                                 status = map_nt_error_from_unix(errno);
364                                 goto fail;
365                         }
366                 }
367         }
368
369         /* no conflicts - add it to the list of locks */
370         locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
371         if (!locks) {
372                 status = NT_STATUS_NO_MEMORY;
373                 goto fail;
374         }
375
376         memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
377         br_lck->num_locks += 1;
378         br_lck->lock_data = locks;
379         br_lck->modified = True;
380
381         return NT_STATUS_OK;
382  fail:
383         if (!IS_PENDING_LOCK(plock->lock_type)) {
384                 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
385         }
386         return status;
387 }
388
389 /****************************************************************************
390  Cope with POSIX range splits and merges.
391 ****************************************************************************/
392
393 static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,       /* Output array. */
394                                                 struct lock_struct *ex,         /* existing lock. */
395                                                 struct lock_struct *plock)      /* proposed lock. */
396 {
397         bool lock_types_differ = (ex->lock_type != plock->lock_type);
398
399         /* We can't merge non-conflicting locks on different context - ignore fnum. */
400
401         if (!brl_same_context(&ex->context, &plock->context)) {
402                 /* Just copy. */
403                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
404                 return 1;
405         }
406
407         /* We now know we have the same context. */
408
409         /* Did we overlap ? */
410
411 /*********************************************
412                                         +---------+
413                                         | ex      |
414                                         +---------+
415                          +-------+
416                          | plock |
417                          +-------+
418 OR....
419         +---------+
420         |  ex     |
421         +---------+
422 **********************************************/
423
424         if ( (ex->start > (plock->start + plock->size)) ||
425                 (plock->start > (ex->start + ex->size))) {
426
427                 /* No overlap with this lock - copy existing. */
428
429                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
430                 return 1;
431         }
432
433 /*********************************************
434         +---------------------------+
435         |          ex               |
436         +---------------------------+
437         +---------------------------+
438         |       plock               | -> replace with plock.
439         +---------------------------+
440 OR
441              +---------------+
442              |       ex      |
443              +---------------+
444         +---------------------------+
445         |       plock               | -> replace with plock.
446         +---------------------------+
447
448 **********************************************/
449
450         if ( (ex->start >= plock->start) &&
451                 (ex->start + ex->size <= plock->start + plock->size) ) {
452
453                 /* Replace - discard existing lock. */
454
455                 return 0;
456         }
457
458 /*********************************************
459 Adjacent after.
460                         +-------+
461                         |  ex   |
462                         +-------+
463         +---------------+
464         |   plock       |
465         +---------------+
466
467 BECOMES....
468         +---------------+-------+
469         |   plock       | ex    | - different lock types.
470         +---------------+-------+
471 OR.... (merge)
472         +-----------------------+
473         |   plock               | - same lock type.
474         +-----------------------+
475 **********************************************/
476
477         if (plock->start + plock->size == ex->start) {
478
479                 /* If the lock types are the same, we merge, if different, we
480                    add the remainder of the old lock. */
481
482                 if (lock_types_differ) {
483                         /* Add existing. */
484                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
485                         return 1;
486                 } else {
487                         /* Merge - adjust incoming lock as we may have more
488                          * merging to come. */
489                         plock->size += ex->size;
490                         return 0;
491                 }
492         }
493
494 /*********************************************
495 Adjacent before.
496         +-------+
497         |  ex   |
498         +-------+
499                 +---------------+
500                 |   plock       |
501                 +---------------+
502 BECOMES....
503         +-------+---------------+
504         | ex    |   plock       | - different lock types
505         +-------+---------------+
506
507 OR.... (merge)
508         +-----------------------+
509         |      plock            | - same lock type.
510         +-----------------------+
511
512 **********************************************/
513
514         if (ex->start + ex->size == plock->start) {
515
516                 /* If the lock types are the same, we merge, if different, we
517                    add the existing lock. */
518
519                 if (lock_types_differ) {
520                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
521                         return 1;
522                 } else {
523                         /* Merge - adjust incoming lock as we may have more
524                          * merging to come. */
525                         plock->start = ex->start;
526                         plock->size += ex->size;
527                         return 0;
528                 }
529         }
530
531 /*********************************************
532 Overlap after.
533         +-----------------------+
534         |          ex           |
535         +-----------------------+
536         +---------------+
537         |   plock       |
538         +---------------+
539 OR
540                +----------------+
541                |       ex       |
542                +----------------+
543         +---------------+
544         |   plock       |
545         +---------------+
546
547 BECOMES....
548         +---------------+-------+
549         |   plock       | ex    | - different lock types.
550         +---------------+-------+
551 OR.... (merge)
552         +-----------------------+
553         |   plock               | - same lock type.
554         +-----------------------+
555 **********************************************/
556
557         if ( (ex->start >= plock->start) &&
558                 (ex->start <= plock->start + plock->size) &&
559                 (ex->start + ex->size > plock->start + plock->size) ) {
560
561                 /* If the lock types are the same, we merge, if different, we
562                    add the remainder of the old lock. */
563
564                 if (lock_types_differ) {
565                         /* Add remaining existing. */
566                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
567                         /* Adjust existing start and size. */
568                         lck_arr[0].start = plock->start + plock->size;
569                         lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size);
570                         return 1;
571                 } else {
572                         /* Merge - adjust incoming lock as we may have more
573                          * merging to come. */
574                         plock->size += (ex->start + ex->size) - (plock->start + plock->size);
575                         return 0;
576                 }
577         }
578
579 /*********************************************
580 Overlap before.
581         +-----------------------+
582         |  ex                   |
583         +-----------------------+
584                 +---------------+
585                 |   plock       |
586                 +---------------+
587 OR
588         +-------------+
589         |  ex         |
590         +-------------+
591                 +---------------+
592                 |   plock       |
593                 +---------------+
594
595 BECOMES....
596         +-------+---------------+
597         | ex    |   plock       | - different lock types
598         +-------+---------------+
599
600 OR.... (merge)
601         +-----------------------+
602         |      plock            | - same lock type.
603         +-----------------------+
604
605 **********************************************/
606
607         if ( (ex->start < plock->start) &&
608                         (ex->start + ex->size >= plock->start) &&
609                         (ex->start + ex->size <= plock->start + plock->size) ) {
610
611                 /* If the lock types are the same, we merge, if different, we
612                    add the truncated old lock. */
613
614                 if (lock_types_differ) {
615                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
616                         /* Adjust existing size. */
617                         lck_arr[0].size = plock->start - ex->start;
618                         return 1;
619                 } else {
620                         /* Merge - adjust incoming lock as we may have more
621                          * merging to come. MUST ADJUST plock SIZE FIRST ! */
622                         plock->size += (plock->start - ex->start);
623                         plock->start = ex->start;
624                         return 0;
625                 }
626         }
627
628 /*********************************************
629 Complete overlap.
630         +---------------------------+
631         |        ex                 |
632         +---------------------------+
633                 +---------+
634                 |  plock  |
635                 +---------+
636 BECOMES.....
637         +-------+---------+---------+
638         | ex    |  plock  | ex      | - different lock types.
639         +-------+---------+---------+
640 OR
641         +---------------------------+
642         |        plock              | - same lock type.
643         +---------------------------+
644 **********************************************/
645
646         if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
647
648                 if (lock_types_differ) {
649
650                         /* We have to split ex into two locks here. */
651
652                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
653                         memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
654
655                         /* Adjust first existing size. */
656                         lck_arr[0].size = plock->start - ex->start;
657
658                         /* Adjust second existing start and size. */
659                         lck_arr[1].start = plock->start + plock->size;
660                         lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
661                         return 2;
662                 } else {
663                         /* Just eat the existing locks, merge them into plock. */
664                         plock->start = ex->start;
665                         plock->size = ex->size;
666                         return 0;
667                 }
668         }
669
670         /* Never get here. */
671         smb_panic("brlock_posix_split_merge");
672         /* Notreached. */
673
674         /* Keep some compilers happy. */
675         return 0;
676 }
677
678 /****************************************************************************
679  Lock a range of bytes - POSIX lock semantics.
680  We must cope with range splits and merges.
681 ****************************************************************************/
682
683 static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
684                                struct byte_range_lock *br_lck,
685                                struct lock_struct *plock)
686 {
687         unsigned int i, count, posix_count;
688         struct lock_struct *locks = br_lck->lock_data;
689         struct lock_struct *tp;
690         bool signal_pending_read = False;
691         bool break_oplocks = false;
692         NTSTATUS status;
693
694         /* No zero-zero locks for POSIX. */
695         if (plock->start == 0 && plock->size == 0) {
696                 return NT_STATUS_INVALID_PARAMETER;
697         }
698
699         /* Don't allow 64-bit lock wrap. */
700         if (plock->start + plock->size < plock->start ||
701                         plock->start + plock->size < plock->size) {
702                 return NT_STATUS_INVALID_PARAMETER;
703         }
704
705         /* The worst case scenario here is we have to split an
706            existing POSIX lock range into two, and add our lock,
707            so we need at most 2 more entries. */
708
709         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 2));
710         if (!tp) {
711                 return NT_STATUS_NO_MEMORY;
712         }
713
714         count = posix_count = 0;
715
716         for (i=0; i < br_lck->num_locks; i++) {
717                 struct lock_struct *curr_lock = &locks[i];
718
719                 /* If we have a pending read lock, a lock downgrade should
720                    trigger a lock re-evaluation. */
721                 if (curr_lock->lock_type == PENDING_READ_LOCK &&
722                                 brl_pending_overlap(plock, curr_lock)) {
723                         signal_pending_read = True;
724                 }
725
726                 if (curr_lock->lock_flav == WINDOWS_LOCK) {
727                         /* Do any Windows flavour locks conflict ? */
728                         if (brl_conflict(curr_lock, plock)) {
729                                 /* No games with error messages. */
730                                 SAFE_FREE(tp);
731                                 /* Remember who blocked us. */
732                                 plock->context.smbpid = curr_lock->context.smbpid;
733                                 return NT_STATUS_FILE_LOCK_CONFLICT;
734                         }
735                         /* Just copy the Windows lock into the new array. */
736                         memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
737                         count++;
738                 } else {
739                         unsigned int tmp_count = 0;
740
741                         /* POSIX conflict semantics are different. */
742                         if (brl_conflict_posix(curr_lock, plock)) {
743                                 /* Can't block ourselves with POSIX locks. */
744                                 /* No games with error messages. */
745                                 SAFE_FREE(tp);
746                                 /* Remember who blocked us. */
747                                 plock->context.smbpid = curr_lock->context.smbpid;
748                                 return NT_STATUS_FILE_LOCK_CONFLICT;
749                         }
750
751                         /* Work out overlaps. */
752                         tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock);
753                         posix_count += tmp_count;
754                         count += tmp_count;
755                 }
756         }
757
758         /*
759          * Break oplocks while we hold a brl. Since lock() and unlock() calls
760          * are not symetric with POSIX semantics, we cannot guarantee our
761          * contend_level2_oplocks_begin/end calls will be acquired and
762          * released one-for-one as with Windows semantics. Therefore we only
763          * call contend_level2_oplocks_begin if this is the first POSIX brl on
764          * the file.
765          */
766         break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
767                          posix_count == 0);
768         if (break_oplocks) {
769                 contend_level2_oplocks_begin(br_lck->fsp,
770                                              LEVEL2_CONTEND_POSIX_BRL);
771         }
772
773         /* Try and add the lock in order, sorted by lock start. */
774         for (i=0; i < count; i++) {
775                 struct lock_struct *curr_lock = &tp[i];
776
777                 if (curr_lock->start <= plock->start) {
778                         continue;
779                 }
780         }
781
782         if (i < count) {
783                 memmove(&tp[i+1], &tp[i],
784                         (count - i)*sizeof(struct lock_struct));
785         }
786         memcpy(&tp[i], plock, sizeof(struct lock_struct));
787         count++;
788
789         /* We can get the POSIX lock, now see if it needs to
790            be mapped into a lower level POSIX one, and if so can
791            we get it ? */
792
793         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
794                 int errno_ret;
795
796                 /* The lower layer just needs to attempt to
797                    get the system POSIX lock. We've weeded out
798                    any conflicts above. */
799
800                 if (!set_posix_lock_posix_flavour(br_lck->fsp,
801                                 plock->start,
802                                 plock->size,
803                                 plock->lock_type,
804                                 &errno_ret)) {
805
806                         /* We don't know who blocked us. */
807                         plock->context.smbpid = 0xFFFFFFFF;
808
809                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
810                                 SAFE_FREE(tp);
811                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
812                                 goto fail;
813                         } else {
814                                 SAFE_FREE(tp);
815                                 status = map_nt_error_from_unix(errno);
816                                 goto fail;
817                         }
818                 }
819         }
820
821         /* If we didn't use all the allocated size,
822          * Realloc so we don't leak entries per lock call. */
823         if (count < br_lck->num_locks + 2) {
824                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
825                 if (!tp) {
826                         status = NT_STATUS_NO_MEMORY;
827                         goto fail;
828                 }
829         }
830
831         br_lck->num_locks = count;
832         SAFE_FREE(br_lck->lock_data);
833         br_lck->lock_data = tp;
834         locks = tp;
835         br_lck->modified = True;
836
837         /* A successful downgrade from write to read lock can trigger a lock
838            re-evalutation where waiting readers can now proceed. */
839
840         if (signal_pending_read) {
841                 /* Send unlock messages to any pending read waiters that overlap. */
842                 for (i=0; i < br_lck->num_locks; i++) {
843                         struct lock_struct *pend_lock = &locks[i];
844
845                         /* Ignore non-pending locks. */
846                         if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
847                                 continue;
848                         }
849
850                         if (pend_lock->lock_type == PENDING_READ_LOCK &&
851                                         brl_pending_overlap(plock, pend_lock)) {
852                                 DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
853                                         procid_str_static(&pend_lock->context.pid )));
854
855                                 messaging_send(msg_ctx, pend_lock->context.pid,
856                                                MSG_SMB_UNLOCK, &data_blob_null);
857                         }
858                 }
859         }
860
861         return NT_STATUS_OK;
862  fail:
863         if (break_oplocks) {
864                 contend_level2_oplocks_end(br_lck->fsp,
865                                            LEVEL2_CONTEND_POSIX_BRL);
866         }
867         return status;
868 }
869
870 /****************************************************************************
871  Lock a range of bytes.
872 ****************************************************************************/
873
874 NTSTATUS brl_lock(struct messaging_context *msg_ctx,
875                 struct byte_range_lock *br_lck,
876                 uint32 smbpid,
877                 struct server_id pid,
878                 br_off start,
879                 br_off size, 
880                 enum brl_type lock_type,
881                 enum brl_flavour lock_flav,
882                 bool blocking_lock,
883                 uint32 *psmbpid,
884                 struct blocking_lock_record *blr)
885 {
886         NTSTATUS ret;
887         struct lock_struct lock;
888
889 #if !ZERO_ZERO
890         if (start == 0 && size == 0) {
891                 DEBUG(0,("client sent 0/0 lock - please report this\n"));
892         }
893 #endif
894
895 #ifdef DEVELOPER
896         /* Quieten valgrind on test. */
897         memset(&lock, '\0', sizeof(lock));
898 #endif
899
900         lock.context.smbpid = smbpid;
901         lock.context.pid = pid;
902         lock.context.tid = br_lck->fsp->conn->cnum;
903         lock.start = start;
904         lock.size = size;
905         lock.fnum = br_lck->fsp->fnum;
906         lock.lock_type = lock_type;
907         lock.lock_flav = lock_flav;
908
909         if (lock_flav == WINDOWS_LOCK) {
910                 ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck,
911                     &lock, blocking_lock, blr);
912         } else {
913                 ret = brl_lock_posix(msg_ctx, br_lck, &lock);
914         }
915
916 #if ZERO_ZERO
917         /* sort the lock list */
918         qsort(br_lck->lock_data, (size_t)br_lck->num_locks, sizeof(lock), lock_compare);
919 #endif
920
921         /* If we're returning an error, return who blocked us. */
922         if (!NT_STATUS_IS_OK(ret) && psmbpid) {
923                 *psmbpid = lock.context.smbpid;
924         }
925         return ret;
926 }
927
928 /****************************************************************************
929  Unlock a range of bytes - Windows semantics.
930 ****************************************************************************/
931
932 bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
933                                struct byte_range_lock *br_lck,
934                                const struct lock_struct *plock)
935 {
936         unsigned int i, j;
937         struct lock_struct *locks = br_lck->lock_data;
938         enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
939
940         SMB_ASSERT(plock->lock_type == UNLOCK_LOCK);
941
942 #if ZERO_ZERO
943         /* Delete write locks by preference... The lock list
944            is sorted in the zero zero case. */
945
946         for (i = 0; i < br_lck->num_locks; i++) {
947                 struct lock_struct *lock = &locks[i];
948
949                 if (lock->lock_type == WRITE_LOCK &&
950                     brl_same_context(&lock->context, &plock->context) &&
951                     lock->fnum == plock->fnum &&
952                     lock->lock_flav == WINDOWS_LOCK &&
953                     lock->start == plock->start &&
954                     lock->size == plock->size) {
955
956                         /* found it - delete it */
957                         deleted_lock_type = lock->lock_type;
958                         break;
959                 }
960         }
961
962         if (i != br_lck->num_locks) {
963                 /* We found it - don't search again. */
964                 goto unlock_continue;
965         }
966 #endif
967
968         for (i = 0; i < br_lck->num_locks; i++) {
969                 struct lock_struct *lock = &locks[i];
970
971                 /* Only remove our own locks that match in start, size, and flavour. */
972                 if (brl_same_context(&lock->context, &plock->context) &&
973                                         lock->fnum == plock->fnum &&
974                                         lock->lock_flav == WINDOWS_LOCK &&
975                                         lock->start == plock->start &&
976                                         lock->size == plock->size ) {
977                         deleted_lock_type = lock->lock_type;
978                         break;
979                 }
980         }
981
982         if (i == br_lck->num_locks) {
983                 /* we didn't find it */
984                 return False;
985         }
986
987 #if ZERO_ZERO
988   unlock_continue:
989 #endif
990
991         /* Actually delete the lock. */
992         if (i < br_lck->num_locks - 1) {
993                 memmove(&locks[i], &locks[i+1], 
994                         sizeof(*locks)*((br_lck->num_locks-1) - i));
995         }
996
997         br_lck->num_locks -= 1;
998         br_lck->modified = True;
999
1000         /* Unlock the underlying POSIX regions. */
1001         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1002                 release_posix_lock_windows_flavour(br_lck->fsp,
1003                                 plock->start,
1004                                 plock->size,
1005                                 deleted_lock_type,
1006                                 &plock->context,
1007                                 locks,
1008                                 br_lck->num_locks);
1009         }
1010
1011         /* Send unlock messages to any pending waiters that overlap. */
1012         for (j=0; j < br_lck->num_locks; j++) {
1013                 struct lock_struct *pend_lock = &locks[j];
1014
1015                 /* Ignore non-pending locks. */
1016                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1017                         continue;
1018                 }
1019
1020                 /* We could send specific lock info here... */
1021                 if (brl_pending_overlap(plock, pend_lock)) {
1022                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1023                                 procid_str_static(&pend_lock->context.pid )));
1024
1025                         messaging_send(msg_ctx, pend_lock->context.pid,
1026                                        MSG_SMB_UNLOCK, &data_blob_null);
1027                 }
1028         }
1029
1030         contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
1031         return True;
1032 }
1033
1034 /****************************************************************************
1035  Unlock a range of bytes - POSIX semantics.
1036 ****************************************************************************/
1037
1038 static bool brl_unlock_posix(struct messaging_context *msg_ctx,
1039                              struct byte_range_lock *br_lck,
1040                              struct lock_struct *plock)
1041 {
1042         unsigned int i, j, count;
1043         struct lock_struct *tp;
1044         struct lock_struct *locks = br_lck->lock_data;
1045         bool overlap_found = False;
1046
1047         /* No zero-zero locks for POSIX. */
1048         if (plock->start == 0 && plock->size == 0) {
1049                 return False;
1050         }
1051
1052         /* Don't allow 64-bit lock wrap. */
1053         if (plock->start + plock->size < plock->start ||
1054                         plock->start + plock->size < plock->size) {
1055                 DEBUG(10,("brl_unlock_posix: lock wrap\n"));
1056                 return False;
1057         }
1058
1059         /* The worst case scenario here is we have to split an
1060            existing POSIX lock range into two, so we need at most
1061            1 more entry. */
1062
1063         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 1));
1064         if (!tp) {
1065                 DEBUG(10,("brl_unlock_posix: malloc fail\n"));
1066                 return False;
1067         }
1068
1069         count = 0;
1070         for (i = 0; i < br_lck->num_locks; i++) {
1071                 struct lock_struct *lock = &locks[i];
1072                 unsigned int tmp_count;
1073
1074                 /* Only remove our own locks - ignore fnum. */
1075                 if (IS_PENDING_LOCK(lock->lock_type) ||
1076                                 !brl_same_context(&lock->context, &plock->context)) {
1077                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1078                         count++;
1079                         continue;
1080                 }
1081
1082                 if (lock->lock_flav == WINDOWS_LOCK) {
1083                         /* Do any Windows flavour locks conflict ? */
1084                         if (brl_conflict(lock, plock)) {
1085                                 SAFE_FREE(tp);
1086                                 return false;
1087                         }
1088                         /* Just copy the Windows lock into the new array. */
1089                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1090                         count++;
1091                         continue;
1092                 }
1093
1094                 /* Work out overlaps. */
1095                 tmp_count = brlock_posix_split_merge(&tp[count], lock, plock);
1096
1097                 if (tmp_count == 0) {
1098                         /* plock overlapped the existing lock completely,
1099                            or replaced it. Don't copy the existing lock. */
1100                         overlap_found = true;
1101                 } else if (tmp_count == 1) {
1102                         /* Either no overlap, (simple copy of existing lock) or
1103                          * an overlap of an existing lock. */
1104                         /* If the lock changed size, we had an overlap. */
1105                         if (tp[count].size != lock->size) {
1106                                 overlap_found = true;
1107                         }
1108                         count += tmp_count;
1109                 } else if (tmp_count == 2) {
1110                         /* We split a lock range in two. */
1111                         overlap_found = true;
1112                         count += tmp_count;
1113
1114                         /* Optimisation... */
1115                         /* We know we're finished here as we can't overlap any
1116                            more POSIX locks. Copy the rest of the lock array. */
1117
1118                         if (i < br_lck->num_locks - 1) {
1119                                 memcpy(&tp[count], &locks[i+1],
1120                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1121                                 count += ((br_lck->num_locks-1) - i);
1122                         }
1123                         break;
1124                 }
1125
1126         }
1127
1128         if (!overlap_found) {
1129                 /* Just ignore - no change. */
1130                 SAFE_FREE(tp);
1131                 DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
1132                 return True;
1133         }
1134
1135         /* Unlock any POSIX regions. */
1136         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1137                 release_posix_lock_posix_flavour(br_lck->fsp,
1138                                                 plock->start,
1139                                                 plock->size,
1140                                                 &plock->context,
1141                                                 tp,
1142                                                 count);
1143         }
1144
1145         /* Realloc so we don't leak entries per unlock call. */
1146         if (count) {
1147                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
1148                 if (!tp) {
1149                         DEBUG(10,("brl_unlock_posix: realloc fail\n"));
1150                         return False;
1151                 }
1152         } else {
1153                 /* We deleted the last lock. */
1154                 SAFE_FREE(tp);
1155                 tp = NULL;
1156         }
1157
1158         contend_level2_oplocks_end(br_lck->fsp,
1159                                    LEVEL2_CONTEND_POSIX_BRL);
1160
1161         br_lck->num_locks = count;
1162         SAFE_FREE(br_lck->lock_data);
1163         locks = tp;
1164         br_lck->lock_data = tp;
1165         br_lck->modified = True;
1166
1167         /* Send unlock messages to any pending waiters that overlap. */
1168
1169         for (j=0; j < br_lck->num_locks; j++) {
1170                 struct lock_struct *pend_lock = &locks[j];
1171
1172                 /* Ignore non-pending locks. */
1173                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1174                         continue;
1175                 }
1176
1177                 /* We could send specific lock info here... */
1178                 if (brl_pending_overlap(plock, pend_lock)) {
1179                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1180                                 procid_str_static(&pend_lock->context.pid )));
1181
1182                         messaging_send(msg_ctx, pend_lock->context.pid,
1183                                        MSG_SMB_UNLOCK, &data_blob_null);
1184                 }
1185         }
1186
1187         return True;
1188 }
1189
1190 /****************************************************************************
1191  Unlock a range of bytes.
1192 ****************************************************************************/
1193
1194 bool brl_unlock(struct messaging_context *msg_ctx,
1195                 struct byte_range_lock *br_lck,
1196                 uint32 smbpid,
1197                 struct server_id pid,
1198                 br_off start,
1199                 br_off size,
1200                 enum brl_flavour lock_flav)
1201 {
1202         struct lock_struct lock;
1203
1204         lock.context.smbpid = smbpid;
1205         lock.context.pid = pid;
1206         lock.context.tid = br_lck->fsp->conn->cnum;
1207         lock.start = start;
1208         lock.size = size;
1209         lock.fnum = br_lck->fsp->fnum;
1210         lock.lock_type = UNLOCK_LOCK;
1211         lock.lock_flav = lock_flav;
1212
1213         if (lock_flav == WINDOWS_LOCK) {
1214                 return SMB_VFS_BRL_UNLOCK_WINDOWS(br_lck->fsp->conn, msg_ctx,
1215                     br_lck, &lock);
1216         } else {
1217                 return brl_unlock_posix(msg_ctx, br_lck, &lock);
1218         }
1219 }
1220
1221 /****************************************************************************
1222  Test if we could add a lock if we wanted to.
1223  Returns True if the region required is currently unlocked, False if locked.
1224 ****************************************************************************/
1225
1226 bool brl_locktest(struct byte_range_lock *br_lck,
1227                 uint32 smbpid,
1228                 struct server_id pid,
1229                 br_off start,
1230                 br_off size, 
1231                 enum brl_type lock_type,
1232                 enum brl_flavour lock_flav)
1233 {
1234         bool ret = True;
1235         unsigned int i;
1236         struct lock_struct lock;
1237         const struct lock_struct *locks = br_lck->lock_data;
1238         files_struct *fsp = br_lck->fsp;
1239
1240         lock.context.smbpid = smbpid;
1241         lock.context.pid = pid;
1242         lock.context.tid = br_lck->fsp->conn->cnum;
1243         lock.start = start;
1244         lock.size = size;
1245         lock.fnum = fsp->fnum;
1246         lock.lock_type = lock_type;
1247         lock.lock_flav = lock_flav;
1248
1249         /* Make sure existing locks don't conflict */
1250         for (i=0; i < br_lck->num_locks; i++) {
1251                 /*
1252                  * Our own locks don't conflict.
1253                  */
1254                 if (brl_conflict_other(&locks[i], &lock)) {
1255                         return False;
1256                 }
1257         }
1258
1259         /*
1260          * There is no lock held by an SMB daemon, check to
1261          * see if there is a POSIX lock from a UNIX or NFS process.
1262          * This only conflicts with Windows locks, not POSIX locks.
1263          */
1264
1265         if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
1266                 ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
1267
1268                 DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1269                         (double)start, (double)size, ret ? "locked" : "unlocked",
1270                         fsp->fnum, fsp->fsp_name ));
1271
1272                 /* We need to return the inverse of is_posix_locked. */
1273                 ret = !ret;
1274         }
1275
1276         /* no conflicts - we could have added it */
1277         return ret;
1278 }
1279
1280 /****************************************************************************
1281  Query for existing locks.
1282 ****************************************************************************/
1283
1284 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
1285                 uint32 *psmbpid,
1286                 struct server_id pid,
1287                 br_off *pstart,
1288                 br_off *psize, 
1289                 enum brl_type *plock_type,
1290                 enum brl_flavour lock_flav)
1291 {
1292         unsigned int i;
1293         struct lock_struct lock;
1294         const struct lock_struct *locks = br_lck->lock_data;
1295         files_struct *fsp = br_lck->fsp;
1296
1297         lock.context.smbpid = *psmbpid;
1298         lock.context.pid = pid;
1299         lock.context.tid = br_lck->fsp->conn->cnum;
1300         lock.start = *pstart;
1301         lock.size = *psize;
1302         lock.fnum = fsp->fnum;
1303         lock.lock_type = *plock_type;
1304         lock.lock_flav = lock_flav;
1305
1306         /* Make sure existing locks don't conflict */
1307         for (i=0; i < br_lck->num_locks; i++) {
1308                 const struct lock_struct *exlock = &locks[i];
1309                 bool conflict = False;
1310
1311                 if (exlock->lock_flav == WINDOWS_LOCK) {
1312                         conflict = brl_conflict(exlock, &lock);
1313                 } else {        
1314                         conflict = brl_conflict_posix(exlock, &lock);
1315                 }
1316
1317                 if (conflict) {
1318                         *psmbpid = exlock->context.smbpid;
1319                         *pstart = exlock->start;
1320                         *psize = exlock->size;
1321                         *plock_type = exlock->lock_type;
1322                         return NT_STATUS_LOCK_NOT_GRANTED;
1323                 }
1324         }
1325
1326         /*
1327          * There is no lock held by an SMB daemon, check to
1328          * see if there is a POSIX lock from a UNIX or NFS process.
1329          */
1330
1331         if(lp_posix_locking(fsp->conn->params)) {
1332                 bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
1333
1334                 DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1335                         (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
1336                         fsp->fnum, fsp->fsp_name ));
1337
1338                 if (ret) {
1339                         /* Hmmm. No clue what to set smbpid to - use -1. */
1340                         *psmbpid = 0xFFFF;
1341                         return NT_STATUS_LOCK_NOT_GRANTED;
1342                 }
1343         }
1344
1345         return NT_STATUS_OK;
1346 }
1347
1348 /****************************************************************************
1349  Remove a particular pending lock.
1350 ****************************************************************************/
1351 bool brl_lock_cancel(struct byte_range_lock *br_lck,
1352                 uint32 smbpid,
1353                 struct server_id pid,
1354                 br_off start,
1355                 br_off size,
1356                 enum brl_flavour lock_flav,
1357                 struct blocking_lock_record *blr)
1358 {
1359         bool ret;
1360         struct lock_struct lock;
1361
1362         lock.context.smbpid = smbpid;
1363         lock.context.pid = pid;
1364         lock.context.tid = br_lck->fsp->conn->cnum;
1365         lock.start = start;
1366         lock.size = size;
1367         lock.fnum = br_lck->fsp->fnum;
1368         lock.lock_flav = lock_flav;
1369         /* lock.lock_type doesn't matter */
1370
1371         if (lock_flav == WINDOWS_LOCK) {
1372                 ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck,
1373                     &lock, blr);
1374         } else {
1375                 ret = brl_lock_cancel_default(br_lck, &lock);
1376         }
1377
1378         return ret;
1379 }
1380
1381 bool brl_lock_cancel_default(struct byte_range_lock *br_lck,
1382                 struct lock_struct *plock)
1383 {
1384         unsigned int i;
1385         struct lock_struct *locks = br_lck->lock_data;
1386
1387         SMB_ASSERT(plock);
1388
1389         for (i = 0; i < br_lck->num_locks; i++) {
1390                 struct lock_struct *lock = &locks[i];
1391
1392                 /* For pending locks we *always* care about the fnum. */
1393                 if (brl_same_context(&lock->context, &plock->context) &&
1394                                 lock->fnum == plock->fnum &&
1395                                 IS_PENDING_LOCK(lock->lock_type) &&
1396                                 lock->lock_flav == plock->lock_flav &&
1397                                 lock->start == plock->start &&
1398                                 lock->size == plock->size) {
1399                         break;
1400                 }
1401         }
1402
1403         if (i == br_lck->num_locks) {
1404                 /* Didn't find it. */
1405                 return False;
1406         }
1407
1408         if (i < br_lck->num_locks - 1) {
1409                 /* Found this particular pending lock - delete it */
1410                 memmove(&locks[i], &locks[i+1], 
1411                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1412         }
1413
1414         br_lck->num_locks -= 1;
1415         br_lck->modified = True;
1416         return True;
1417 }
1418
1419 /****************************************************************************
1420  Remove any locks associated with a open file.
1421  We return True if this process owns any other Windows locks on this
1422  fd and so we should not immediately close the fd.
1423 ****************************************************************************/
1424
1425 void brl_close_fnum(struct messaging_context *msg_ctx,
1426                     struct byte_range_lock *br_lck)
1427 {
1428         files_struct *fsp = br_lck->fsp;
1429         uint16 tid = fsp->conn->cnum;
1430         int fnum = fsp->fnum;
1431         unsigned int i, j, dcount=0;
1432         int num_deleted_windows_locks = 0;
1433         struct lock_struct *locks = br_lck->lock_data;
1434         struct server_id pid = procid_self();
1435         bool unlock_individually = False;
1436         bool posix_level2_contention_ended = false;
1437
1438         if(lp_posix_locking(fsp->conn->params)) {
1439
1440                 /* Check if there are any Windows locks associated with this dev/ino
1441                    pair that are not this fnum. If so we need to call unlock on each
1442                    one in order to release the system POSIX locks correctly. */
1443
1444                 for (i=0; i < br_lck->num_locks; i++) {
1445                         struct lock_struct *lock = &locks[i];
1446
1447                         if (!procid_equal(&lock->context.pid, &pid)) {
1448                                 continue;
1449                         }
1450
1451                         if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1452                                 continue; /* Ignore pending. */
1453                         }
1454
1455                         if (lock->context.tid != tid || lock->fnum != fnum) {
1456                                 unlock_individually = True;
1457                                 break;
1458                         }
1459                 }
1460
1461                 if (unlock_individually) {
1462                         struct lock_struct *locks_copy;
1463                         unsigned int num_locks_copy;
1464
1465                         /* Copy the current lock array. */
1466                         if (br_lck->num_locks) {
1467                                 locks_copy = (struct lock_struct *)TALLOC_MEMDUP(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
1468                                 if (!locks_copy) {
1469                                         smb_panic("brl_close_fnum: talloc failed");
1470                                 }
1471                         } else {        
1472                                 locks_copy = NULL;
1473                         }
1474
1475                         num_locks_copy = br_lck->num_locks;
1476
1477                         for (i=0; i < num_locks_copy; i++) {
1478                                 struct lock_struct *lock = &locks_copy[i];
1479
1480                                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid) &&
1481                                                 (lock->fnum == fnum)) {
1482                                         brl_unlock(msg_ctx,
1483                                                 br_lck,
1484                                                 lock->context.smbpid,
1485                                                 pid,
1486                                                 lock->start,
1487                                                 lock->size,
1488                                                 lock->lock_flav);
1489                                 }
1490                         }
1491                         return;
1492                 }
1493         }
1494
1495         /* We can bulk delete - any POSIX locks will be removed when the fd closes. */
1496
1497         /* Remove any existing locks for this fnum (or any fnum if they're POSIX). */
1498
1499         for (i=0; i < br_lck->num_locks; i++) {
1500                 struct lock_struct *lock = &locks[i];
1501                 bool del_this_lock = False;
1502
1503                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid)) {
1504                         if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
1505                                 del_this_lock = True;
1506                                 num_deleted_windows_locks++;
1507                                 contend_level2_oplocks_end(br_lck->fsp,
1508                                     LEVEL2_CONTEND_WINDOWS_BRL);
1509                         } else if (lock->lock_flav == POSIX_LOCK) {
1510                                 del_this_lock = True;
1511
1512                                 /* Only end level2 contention once for posix */
1513                                 if (!posix_level2_contention_ended) {
1514                                         posix_level2_contention_ended = true;
1515                                         contend_level2_oplocks_end(br_lck->fsp,
1516                                             LEVEL2_CONTEND_POSIX_BRL);
1517                                 }
1518                         }
1519                 }
1520
1521                 if (del_this_lock) {
1522                         /* Send unlock messages to any pending waiters that overlap. */
1523                         for (j=0; j < br_lck->num_locks; j++) {
1524                                 struct lock_struct *pend_lock = &locks[j];
1525
1526                                 /* Ignore our own or non-pending locks. */
1527                                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1528                                         continue;
1529                                 }
1530
1531                                 /* Optimisation - don't send to this fnum as we're
1532                                    closing it. */
1533                                 if (pend_lock->context.tid == tid &&
1534                                     procid_equal(&pend_lock->context.pid, &pid) &&
1535                                     pend_lock->fnum == fnum) {
1536                                         continue;
1537                                 }
1538
1539                                 /* We could send specific lock info here... */
1540                                 if (brl_pending_overlap(lock, pend_lock)) {
1541                                         messaging_send(msg_ctx, pend_lock->context.pid,
1542                                                        MSG_SMB_UNLOCK, &data_blob_null);
1543                                 }
1544                         }
1545
1546                         /* found it - delete it */
1547                         if (br_lck->num_locks > 1 && i < br_lck->num_locks - 1) {
1548                                 memmove(&locks[i], &locks[i+1], 
1549                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1550                         }
1551                         br_lck->num_locks--;
1552                         br_lck->modified = True;
1553                         i--;
1554                         dcount++;
1555                 }
1556         }
1557
1558         if(lp_posix_locking(fsp->conn->params) && num_deleted_windows_locks) {
1559                 /* Reduce the Windows lock POSIX reference count on this dev/ino pair. */
1560                 reduce_windows_lock_ref_count(fsp, num_deleted_windows_locks);
1561         }
1562 }
1563
1564 /****************************************************************************
1565  Ensure this set of lock entries is valid.
1566 ****************************************************************************/
1567 static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
1568 {
1569         unsigned int i;
1570         unsigned int num_valid_entries = 0;
1571         struct lock_struct *locks = *pplocks;
1572
1573         for (i = 0; i < *pnum_entries; i++) {
1574                 struct lock_struct *lock_data = &locks[i];
1575                 if (!serverid_exists(&lock_data->context.pid)) {
1576                         /* This process no longer exists - mark this
1577                            entry as invalid by zeroing it. */
1578                         ZERO_STRUCTP(lock_data);
1579                 } else {
1580                         num_valid_entries++;
1581                 }
1582         }
1583
1584         if (num_valid_entries != *pnum_entries) {
1585                 struct lock_struct *new_lock_data = NULL;
1586
1587                 if (num_valid_entries) {
1588                         new_lock_data = SMB_MALLOC_ARRAY(struct lock_struct, num_valid_entries);
1589                         if (!new_lock_data) {
1590                                 DEBUG(3, ("malloc fail\n"));
1591                                 return False;
1592                         }
1593
1594                         num_valid_entries = 0;
1595                         for (i = 0; i < *pnum_entries; i++) {
1596                                 struct lock_struct *lock_data = &locks[i];
1597                                 if (lock_data->context.smbpid &&
1598                                                 lock_data->context.tid) {
1599                                         /* Valid (nonzero) entry - copy it. */
1600                                         memcpy(&new_lock_data[num_valid_entries],
1601                                                 lock_data, sizeof(struct lock_struct));
1602                                         num_valid_entries++;
1603                                 }
1604                         }
1605                 }
1606
1607                 SAFE_FREE(*pplocks);
1608                 *pplocks = new_lock_data;
1609                 *pnum_entries = num_valid_entries;
1610         }
1611
1612         return True;
1613 }
1614
1615 struct brl_forall_cb {
1616         void (*fn)(struct file_id id, struct server_id pid,
1617                    enum brl_type lock_type,
1618                    enum brl_flavour lock_flav,
1619                    br_off start, br_off size,
1620                    void *private_data);
1621         void *private_data;
1622 };
1623
1624 /****************************************************************************
1625  Traverse the whole database with this function, calling traverse_callback
1626  on each lock.
1627 ****************************************************************************/
1628
1629 static int traverse_fn(struct db_record *rec, void *state)
1630 {
1631         struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
1632         struct lock_struct *locks;
1633         struct file_id *key;
1634         unsigned int i;
1635         unsigned int num_locks = 0;
1636         unsigned int orig_num_locks = 0;
1637
1638         /* In a traverse function we must make a copy of
1639            dbuf before modifying it. */
1640
1641         locks = (struct lock_struct *)memdup(rec->value.dptr,
1642                                              rec->value.dsize);
1643         if (!locks) {
1644                 return -1; /* Terminate traversal. */
1645         }
1646
1647         key = (struct file_id *)rec->key.dptr;
1648         orig_num_locks = num_locks = rec->value.dsize/sizeof(*locks);
1649
1650         /* Ensure the lock db is clean of entries from invalid processes. */
1651
1652         if (!validate_lock_entries(&num_locks, &locks)) {
1653                 SAFE_FREE(locks);
1654                 return -1; /* Terminate traversal */
1655         }
1656
1657         if (orig_num_locks != num_locks) {
1658                 if (num_locks) {
1659                         TDB_DATA data;
1660                         data.dptr = (uint8_t *)locks;
1661                         data.dsize = num_locks*sizeof(struct lock_struct);
1662                         rec->store(rec, data, TDB_REPLACE);
1663                 } else {
1664                         rec->delete_rec(rec);
1665                 }
1666         }
1667
1668         if (cb->fn) {
1669                 for ( i=0; i<num_locks; i++) {
1670                         cb->fn(*key,
1671                                 locks[i].context.pid,
1672                                 locks[i].lock_type,
1673                                 locks[i].lock_flav,
1674                                 locks[i].start,
1675                                 locks[i].size,
1676                                 cb->private_data);
1677                 }
1678         }
1679
1680         SAFE_FREE(locks);
1681         return 0;
1682 }
1683
1684 /*******************************************************************
1685  Call the specified function on each lock in the database.
1686 ********************************************************************/
1687
1688 int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
1689                           enum brl_type lock_type,
1690                           enum brl_flavour lock_flav,
1691                           br_off start, br_off size,
1692                           void *private_data),
1693                void *private_data)
1694 {
1695         struct brl_forall_cb cb;
1696
1697         if (!brlock_db) {
1698                 return 0;
1699         }
1700         cb.fn = fn;
1701         cb.private_data = private_data;
1702         return brlock_db->traverse(brlock_db, traverse_fn, &cb);
1703 }
1704
1705 /*******************************************************************
1706  Store a potentially modified set of byte range lock data back into
1707  the database.
1708  Unlock the record.
1709 ********************************************************************/
1710
1711 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
1712 {
1713         if (br_lck->read_only) {
1714                 SMB_ASSERT(!br_lck->modified);
1715         }
1716
1717         if (!br_lck->modified) {
1718                 goto done;
1719         }
1720
1721         if (br_lck->num_locks == 0) {
1722                 /* No locks - delete this entry. */
1723                 NTSTATUS status = br_lck->record->delete_rec(br_lck->record);
1724                 if (!NT_STATUS_IS_OK(status)) {
1725                         DEBUG(0, ("delete_rec returned %s\n",
1726                                   nt_errstr(status)));
1727                         smb_panic("Could not delete byte range lock entry");
1728                 }
1729         } else {
1730                 TDB_DATA data;
1731                 NTSTATUS status;
1732
1733                 data.dptr = (uint8 *)br_lck->lock_data;
1734                 data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
1735
1736                 status = br_lck->record->store(br_lck->record, data,
1737                                                TDB_REPLACE);
1738                 if (!NT_STATUS_IS_OK(status)) {
1739                         DEBUG(0, ("store returned %s\n", nt_errstr(status)));
1740                         smb_panic("Could not store byte range mode entry");
1741                 }
1742         }
1743
1744  done:
1745
1746         SAFE_FREE(br_lck->lock_data);
1747         TALLOC_FREE(br_lck->record);
1748         return 0;
1749 }
1750
1751 /*******************************************************************
1752  Fetch a set of byte range lock data from the database.
1753  Leave the record locked.
1754  TALLOC_FREE(brl) will release the lock in the destructor.
1755 ********************************************************************/
1756
1757 static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
1758                                         files_struct *fsp, bool read_only)
1759 {
1760         TDB_DATA key, data;
1761         struct byte_range_lock *br_lck = TALLOC_P(mem_ctx, struct byte_range_lock);
1762
1763         if (br_lck == NULL) {
1764                 return NULL;
1765         }
1766
1767         br_lck->fsp = fsp;
1768         br_lck->num_locks = 0;
1769         br_lck->modified = False;
1770         memset(&br_lck->key, '\0', sizeof(struct file_id));
1771         br_lck->key = fsp->file_id;
1772
1773         key.dptr = (uint8 *)&br_lck->key;
1774         key.dsize = sizeof(struct file_id);
1775
1776         if (!fsp->lockdb_clean) {
1777                 /* We must be read/write to clean
1778                    the dead entries. */
1779                 read_only = False;
1780         }
1781
1782         if (read_only) {
1783                 if (brlock_db->fetch(brlock_db, br_lck, key, &data) == -1) {
1784                         DEBUG(3, ("Could not fetch byte range lock record\n"));
1785                         TALLOC_FREE(br_lck);
1786                         return NULL;
1787                 }
1788                 br_lck->record = NULL;
1789         }
1790         else {
1791                 br_lck->record = brlock_db->fetch_locked(brlock_db, br_lck, key);
1792
1793                 if (br_lck->record == NULL) {
1794                         DEBUG(3, ("Could not lock byte range lock entry\n"));
1795                         TALLOC_FREE(br_lck);
1796                         return NULL;
1797                 }
1798
1799                 data = br_lck->record->value;
1800         }
1801
1802         br_lck->read_only = read_only;
1803         br_lck->lock_data = NULL;
1804
1805         talloc_set_destructor(br_lck, byte_range_lock_destructor);
1806
1807         br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
1808
1809         if (br_lck->num_locks != 0) {
1810                 br_lck->lock_data = SMB_MALLOC_ARRAY(struct lock_struct,
1811                                                      br_lck->num_locks);
1812                 if (br_lck->lock_data == NULL) {
1813                         DEBUG(0, ("malloc failed\n"));
1814                         TALLOC_FREE(br_lck);
1815                         return NULL;
1816                 }
1817
1818                 memcpy(br_lck->lock_data, data.dptr, data.dsize);
1819         }
1820         
1821         if (!fsp->lockdb_clean) {
1822                 int orig_num_locks = br_lck->num_locks;
1823
1824                 /* This is the first time we've accessed this. */
1825                 /* Go through and ensure all entries exist - remove any that don't. */
1826                 /* Makes the lockdb self cleaning at low cost. */
1827
1828                 if (!validate_lock_entries(&br_lck->num_locks,
1829                                            &br_lck->lock_data)) {
1830                         SAFE_FREE(br_lck->lock_data);
1831                         TALLOC_FREE(br_lck);
1832                         return NULL;
1833                 }
1834
1835                 /* Ensure invalid locks are cleaned up in the destructor. */
1836                 if (orig_num_locks != br_lck->num_locks) {
1837                         br_lck->modified = True;
1838                 }
1839
1840                 /* Mark the lockdb as "clean" as seen from this open file. */
1841                 fsp->lockdb_clean = True;
1842         }
1843
1844         if (DEBUGLEVEL >= 10) {
1845                 unsigned int i;
1846                 struct lock_struct *locks = br_lck->lock_data;
1847                 DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
1848                         br_lck->num_locks,
1849                           file_id_string_tos(&fsp->file_id)));
1850                 for( i = 0; i < br_lck->num_locks; i++) {
1851                         print_lock_struct(i, &locks[i]);
1852                 }
1853         }
1854         return br_lck;
1855 }
1856
1857 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
1858                                         files_struct *fsp)
1859 {
1860         return brl_get_locks_internal(mem_ctx, fsp, False);
1861 }
1862
1863 struct byte_range_lock *brl_get_locks_readonly(TALLOC_CTX *mem_ctx,
1864                                         files_struct *fsp)
1865 {
1866         return brl_get_locks_internal(mem_ctx, fsp, True);
1867 }
1868
1869 struct brl_revalidate_state {
1870         ssize_t array_size;
1871         uint32 num_pids;
1872         struct server_id *pids;
1873 };
1874
1875 /*
1876  * Collect PIDs of all processes with pending entries
1877  */
1878
1879 static void brl_revalidate_collect(struct file_id id, struct server_id pid,
1880                                    enum brl_type lock_type,
1881                                    enum brl_flavour lock_flav,
1882                                    br_off start, br_off size,
1883                                    void *private_data)
1884 {
1885         struct brl_revalidate_state *state =
1886                 (struct brl_revalidate_state *)private_data;
1887
1888         if (!IS_PENDING_LOCK(lock_type)) {
1889                 return;
1890         }
1891
1892         add_to_large_array(state, sizeof(pid), (void *)&pid,
1893                            &state->pids, &state->num_pids,
1894                            &state->array_size);
1895 }
1896
1897 /*
1898  * qsort callback to sort the processes
1899  */
1900
1901 static int compare_procids(const void *p1, const void *p2)
1902 {
1903         const struct server_id *i1 = (struct server_id *)p1;
1904         const struct server_id *i2 = (struct server_id *)p2;
1905
1906         if (i1->pid < i2->pid) return -1;
1907         if (i2->pid > i2->pid) return 1;
1908         return 0;
1909 }
1910
1911 /*
1912  * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
1913  * locks so that they retry. Mainly used in the cluster code after a node has
1914  * died.
1915  *
1916  * Done in two steps to avoid double-sends: First we collect all entries in an
1917  * array, then qsort that array and only send to non-dupes.
1918  */
1919
1920 static void brl_revalidate(struct messaging_context *msg_ctx,
1921                            void *private_data,
1922                            uint32_t msg_type,
1923                            struct server_id server_id,
1924                            DATA_BLOB *data)
1925 {
1926         struct brl_revalidate_state *state;
1927         uint32 i;
1928         struct server_id last_pid;
1929
1930         if (!(state = TALLOC_ZERO_P(NULL, struct brl_revalidate_state))) {
1931                 DEBUG(0, ("talloc failed\n"));
1932                 return;
1933         }
1934
1935         brl_forall(brl_revalidate_collect, state);
1936
1937         if (state->array_size == -1) {
1938                 DEBUG(0, ("talloc failed\n"));
1939                 goto done;
1940         }
1941
1942         if (state->num_pids == 0) {
1943                 goto done;
1944         }
1945
1946         qsort(state->pids, state->num_pids, sizeof(state->pids[0]),
1947               compare_procids);
1948
1949         ZERO_STRUCT(last_pid);
1950
1951         for (i=0; i<state->num_pids; i++) {
1952                 if (procid_equal(&last_pid, &state->pids[i])) {
1953                         /*
1954                          * We've seen that one already
1955                          */
1956                         continue;
1957                 }
1958
1959                 messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
1960                                &data_blob_null);
1961                 last_pid = state->pids[i];
1962         }
1963
1964  done:
1965         TALLOC_FREE(state);
1966         return;
1967 }
1968
1969 void brl_register_msgs(struct messaging_context *msg_ctx)
1970 {
1971         messaging_register(msg_ctx, NULL, MSG_SMB_BRL_VALIDATE,
1972                            brl_revalidate);
1973 }