s3-locking: move locking prototypes out of proto.h.
[ddiss/samba.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2006
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19    Revision History:
20
21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
22 */
23
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "locking/proto.h"
27 #include "dbwrap.h"
28
29 #undef DBGC_CLASS
30 #define DBGC_CLASS DBGC_LOCKING
31
32 /*
33  * The pending close database handle.
34  */
35
36 static struct db_context *posix_pending_close_db;
37
38 /****************************************************************************
39  First - the functions that deal with the underlying system locks - these
40  functions are used no matter if we're mapping CIFS Windows locks or CIFS
41  POSIX locks onto POSIX.
42 ****************************************************************************/
43
44 /****************************************************************************
45  Utility function to map a lock type correctly depending on the open
46  mode of a file.
47 ****************************************************************************/
48
49 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
50 {
51         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
52                 /*
53                  * Many UNIX's cannot get a write lock on a file opened read-only.
54                  * Win32 locking semantics allow this.
55                  * Do the best we can and attempt a read-only lock.
56                  */
57                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
58                 return F_RDLCK;
59         }
60
61         /*
62          * This return should be the most normal, as we attempt
63          * to always open files read/write.
64          */
65
66         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
67 }
68
69 /****************************************************************************
70  Debugging aid :-).
71 ****************************************************************************/
72
73 static const char *posix_lock_type_name(int lock_type)
74 {
75         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
76 }
77
78 /****************************************************************************
79  Check to see if the given unsigned lock range is within the possible POSIX
80  range. Modifies the given args to be in range if possible, just returns
81  False if not.
82 ****************************************************************************/
83
84 static bool posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
85                                 uint64_t u_offset, uint64_t u_count)
86 {
87         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
88         SMB_OFF_T count = (SMB_OFF_T)u_count;
89
90         /*
91          * For the type of system we are, attempt to
92          * find the maximum positive lock offset as an SMB_OFF_T.
93          */
94
95 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
96
97         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
98
99 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
100
101         /*
102          * In this case SMB_OFF_T is 64 bits,
103          * and the underlying system can handle 64 bit signed locks.
104          */
105
106         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
107         SMB_OFF_T mask = (mask2<<1);
108         SMB_OFF_T max_positive_lock_offset = ~mask;
109
110 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
111
112         /*
113          * In this case either SMB_OFF_T is 32 bits,
114          * or the underlying system cannot handle 64 bit signed locks.
115          * All offsets & counts must be 2^31 or less.
116          */
117
118         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
119
120 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
121
122         /*
123          * POSIX locks of length zero mean lock to end-of-file.
124          * Win32 locks of length zero are point probes. Ignore
125          * any Win32 locks of length zero. JRA.
126          */
127
128         if (count == (SMB_OFF_T)0) {
129                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
130                 return False;
131         }
132
133         /*
134          * If the given offset was > max_positive_lock_offset then we cannot map this at all
135          * ignore this lock.
136          */
137
138         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
139                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
140                                 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
141                 return False;
142         }
143
144         /*
145          * We must truncate the count to less than max_positive_lock_offset.
146          */
147
148         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
149                 count = max_positive_lock_offset;
150         }
151
152         /*
153          * Truncate count to end at max lock offset.
154          */
155
156         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
157                 count = max_positive_lock_offset - offset;
158         }
159
160         /*
161          * If we ate all the count, ignore this lock.
162          */
163
164         if (count == 0) {
165                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
166                                 (double)u_offset, (double)u_count ));
167                 return False;
168         }
169
170         /*
171          * The mapping was successful.
172          */
173
174         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
175                         (double)offset, (double)count ));
176
177         *offset_out = offset;
178         *count_out = count;
179         
180         return True;
181 }
182
183 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
184                        struct files_struct *fsp, int op, SMB_OFF_T offset,
185                        SMB_OFF_T count, int type)
186 {
187         VFS_FIND(lock);
188         return handle->fns->lock(handle, fsp, op, offset, count, type);
189 }
190
191 /****************************************************************************
192  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
193  broken NFS implementations.
194 ****************************************************************************/
195
196 static bool posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
197 {
198         bool ret;
199
200         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
201
202         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
203
204         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
205
206                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
207                                         (double)offset,(double)count));
208                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
209                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
210
211                 /*
212                  * If the offset is > 0x7FFFFFFF then this will cause problems on
213                  * 32 bit NFS mounted filesystems. Just ignore it.
214                  */
215
216                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
217                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
218                         return True;
219                 }
220
221                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
222                         /* 32 bit NFS file system, retry with smaller offset */
223                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
224                         errno = 0;
225                         count &= 0x7fffffff;
226                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
227                 }
228         }
229
230         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
231         return ret;
232 }
233
234 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
235                           struct files_struct *fsp, SMB_OFF_T *poffset,
236                           SMB_OFF_T *pcount, int *ptype, pid_t *ppid)
237 {
238         VFS_FIND(getlock);
239         return handle->fns->getlock(handle, fsp, poffset, pcount, ptype, ppid);
240 }
241
242 /****************************************************************************
243  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
244  broken NFS implementations.
245 ****************************************************************************/
246
247 static bool posix_fcntl_getlock(files_struct *fsp, SMB_OFF_T *poffset, SMB_OFF_T *pcount, int *ptype)
248 {
249         pid_t pid;
250         bool ret;
251
252         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
253                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
254
255         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
256
257         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
258
259                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
260                                         (double)*poffset,(double)*pcount));
261                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
262                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
263
264                 /*
265                  * If the offset is > 0x7FFFFFFF then this will cause problems on
266                  * 32 bit NFS mounted filesystems. Just ignore it.
267                  */
268
269                 if (*poffset & ~((SMB_OFF_T)0x7fffffff)) {
270                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
271                         return True;
272                 }
273
274                 if (*pcount & ~((SMB_OFF_T)0x7fffffff)) {
275                         /* 32 bit NFS file system, retry with smaller offset */
276                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
277                         errno = 0;
278                         *pcount &= 0x7fffffff;
279                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
280                 }
281         }
282
283         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
284         return ret;
285 }
286
287 /****************************************************************************
288  POSIX function to see if a file region is locked. Returns True if the
289  region is locked, False otherwise.
290 ****************************************************************************/
291
292 bool is_posix_locked(files_struct *fsp,
293                         uint64_t *pu_offset,
294                         uint64_t *pu_count,
295                         enum brl_type *plock_type,
296                         enum brl_flavour lock_flav)
297 {
298         SMB_OFF_T offset;
299         SMB_OFF_T count;
300         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
301
302         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
303                   "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
304                   (double)*pu_count,  posix_lock_type_name(*plock_type)));
305
306         /*
307          * If the requested lock won't fit in the POSIX range, we will
308          * never set it, so presume it is not locked.
309          */
310
311         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
312                 return False;
313         }
314
315         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
316                 return False;
317         }
318
319         if (posix_lock_type == F_UNLCK) {
320                 return False;
321         }
322
323         if (lock_flav == POSIX_LOCK) {
324                 /* Only POSIX lock queries need to know the details. */
325                 *pu_offset = (uint64_t)offset;
326                 *pu_count = (uint64_t)count;
327                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
328         }
329         return True;
330 }
331
332 /****************************************************************************
333  Next - the functions that deal with in memory database storing representations
334  of either Windows CIFS locks or POSIX CIFS locks.
335 ****************************************************************************/
336
337 /* The key used in the in-memory POSIX databases. */
338
339 struct lock_ref_count_key {
340         struct file_id id;
341         char r;
342 }; 
343
344 /*******************************************************************
345  Form a static locking key for a dev/inode pair for the lock ref count
346 ******************************************************************/
347
348 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
349                                           struct lock_ref_count_key *tmp)
350 {
351         ZERO_STRUCTP(tmp);
352         tmp->id = fsp->file_id;
353         tmp->r = 'r';
354         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
355 }
356
357 /*******************************************************************
358  Convenience function to get an fd_array key from an fsp.
359 ******************************************************************/
360
361 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
362 {
363         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
364 }
365
366 /*******************************************************************
367  Create the in-memory POSIX lock databases.
368 ********************************************************************/
369
370 bool posix_locking_init(bool read_only)
371 {
372         if (posix_pending_close_db != NULL) {
373                 return true;
374         }
375
376         posix_pending_close_db = db_open_rbt(NULL);
377
378         if (posix_pending_close_db == NULL) {
379                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
380                 return false;
381         }
382
383         return true;
384 }
385
386 /*******************************************************************
387  Delete the in-memory POSIX lock databases.
388 ********************************************************************/
389
390 bool posix_locking_end(void)
391 {
392         /*
393          * Shouldn't we close all fd's here?
394          */
395         TALLOC_FREE(posix_pending_close_db);
396         return true;
397 }
398
399 /****************************************************************************
400  Next - the functions that deal with storing fd's that have outstanding
401  POSIX locks when closed.
402 ****************************************************************************/
403
404 /****************************************************************************
405  The records in posix_pending_close_tdb are composed of an array of ints
406  keyed by dev/ino pair.
407  The first int is a reference count of the number of outstanding locks on
408  all open fd's on this dev/ino pair. Any subsequent ints are the fd's that
409  were open on this dev/ino pair that should have been closed, but can't as
410  the lock ref count is non zero.
411 ****************************************************************************/
412
413 /****************************************************************************
414  Keep a reference count of the number of Windows locks open on this dev/ino
415  pair. Creates entry if it doesn't exist.
416 ****************************************************************************/
417
418 static void increment_windows_lock_ref_count(files_struct *fsp)
419 {
420         struct lock_ref_count_key tmp;
421         struct db_record *rec;
422         int lock_ref_count = 0;
423         NTSTATUS status;
424
425         rec = posix_pending_close_db->fetch_locked(
426                 posix_pending_close_db, talloc_tos(),
427                 locking_ref_count_key_fsp(fsp, &tmp));
428
429         SMB_ASSERT(rec != NULL);
430
431         if (rec->value.dptr != NULL) {
432                 SMB_ASSERT(rec->value.dsize == sizeof(lock_ref_count));
433                 memcpy(&lock_ref_count, rec->value.dptr,
434                        sizeof(lock_ref_count));
435         }
436
437         lock_ref_count++;
438
439         status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
440                                                sizeof(lock_ref_count)), 0);
441
442         SMB_ASSERT(NT_STATUS_IS_OK(status));
443
444         TALLOC_FREE(rec);
445
446         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
447                   fsp_str_dbg(fsp), lock_ref_count));
448 }
449
450 /****************************************************************************
451  Bulk delete - subtract as many locks as we've just deleted.
452 ****************************************************************************/
453
454 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
455 {
456         struct lock_ref_count_key tmp;
457         struct db_record *rec;
458         int lock_ref_count = 0;
459         NTSTATUS status;
460
461         rec = posix_pending_close_db->fetch_locked(
462                 posix_pending_close_db, talloc_tos(),
463                 locking_ref_count_key_fsp(fsp, &tmp));
464
465         SMB_ASSERT((rec != NULL)
466                    && (rec->value.dptr != NULL)
467                    && (rec->value.dsize == sizeof(lock_ref_count)));
468
469         memcpy(&lock_ref_count, rec->value.dptr, sizeof(lock_ref_count));
470
471         SMB_ASSERT(lock_ref_count > 0);
472
473         lock_ref_count -= dcount;
474
475         status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
476                                                sizeof(lock_ref_count)), 0);
477
478         SMB_ASSERT(NT_STATUS_IS_OK(status));
479
480         TALLOC_FREE(rec);
481
482         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
483                   fsp_str_dbg(fsp), lock_ref_count));
484 }
485
486 static void decrement_windows_lock_ref_count(files_struct *fsp)
487 {
488         reduce_windows_lock_ref_count(fsp, 1);
489 }
490
491 /****************************************************************************
492  Fetch the lock ref count.
493 ****************************************************************************/
494
495 static int get_windows_lock_ref_count(files_struct *fsp)
496 {
497         struct lock_ref_count_key tmp;
498         TDB_DATA dbuf;
499         int res;
500         int lock_ref_count = 0;
501
502         res = posix_pending_close_db->fetch(
503                 posix_pending_close_db, talloc_tos(),
504                 locking_ref_count_key_fsp(fsp, &tmp), &dbuf);
505
506         SMB_ASSERT(res == 0);
507
508         if (dbuf.dsize != 0) {
509                 SMB_ASSERT(dbuf.dsize == sizeof(lock_ref_count));
510                 memcpy(&lock_ref_count, dbuf.dptr, sizeof(lock_ref_count));
511                 TALLOC_FREE(dbuf.dptr);
512         }
513
514         DEBUG(10,("get_windows_lock_count for file %s = %d\n",
515                   fsp_str_dbg(fsp), lock_ref_count));
516
517         return lock_ref_count;
518 }
519
520 /****************************************************************************
521  Delete a lock_ref_count entry.
522 ****************************************************************************/
523
524 static void delete_windows_lock_ref_count(files_struct *fsp)
525 {
526         struct lock_ref_count_key tmp;
527         struct db_record *rec;
528
529         rec = posix_pending_close_db->fetch_locked(
530                 posix_pending_close_db, talloc_tos(),
531                 locking_ref_count_key_fsp(fsp, &tmp));
532
533         SMB_ASSERT(rec != NULL);
534
535         /* Not a bug if it doesn't exist - no locks were ever granted. */
536
537         rec->delete_rec(rec);
538         TALLOC_FREE(rec);
539
540         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
541                   fsp_str_dbg(fsp)));
542 }
543
544 /****************************************************************************
545  Add an fd to the pending close tdb.
546 ****************************************************************************/
547
548 static void add_fd_to_close_entry(files_struct *fsp)
549 {
550         struct db_record *rec;
551         uint8_t *new_data;
552         NTSTATUS status;
553
554         rec = posix_pending_close_db->fetch_locked(
555                 posix_pending_close_db, talloc_tos(),
556                 fd_array_key_fsp(fsp));
557
558         SMB_ASSERT(rec != NULL);
559
560         new_data = TALLOC_ARRAY(
561                 rec, uint8_t, rec->value.dsize + sizeof(fsp->fh->fd));
562
563         SMB_ASSERT(new_data != NULL);
564
565         memcpy(new_data, rec->value.dptr, rec->value.dsize);
566         memcpy(new_data + rec->value.dsize,
567                &fsp->fh->fd, sizeof(fsp->fh->fd));
568
569         status = rec->store(
570                 rec, make_tdb_data(new_data,
571                                    rec->value.dsize + sizeof(fsp->fh->fd)), 0);
572
573         SMB_ASSERT(NT_STATUS_IS_OK(status));
574
575         TALLOC_FREE(rec);
576
577         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
578                   fsp->fh->fd, fsp_str_dbg(fsp)));
579 }
580
581 /****************************************************************************
582  Remove all fd entries for a specific dev/inode pair from the tdb.
583 ****************************************************************************/
584
585 static void delete_close_entries(files_struct *fsp)
586 {
587         struct db_record *rec;
588
589         rec = posix_pending_close_db->fetch_locked(
590                 posix_pending_close_db, talloc_tos(),
591                 fd_array_key_fsp(fsp));
592
593         SMB_ASSERT(rec != NULL);
594         rec->delete_rec(rec);
595         TALLOC_FREE(rec);
596 }
597
598 /****************************************************************************
599  Get the array of POSIX pending close records for an open fsp. Returns number
600  of entries.
601 ****************************************************************************/
602
603 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
604                                               files_struct *fsp, int **entries)
605 {
606         TDB_DATA dbuf;
607         int res;
608
609         res = posix_pending_close_db->fetch(
610                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
611                 &dbuf);
612
613         SMB_ASSERT(res == 0);
614
615         if (dbuf.dsize == 0) {
616                 *entries = NULL;
617                 return 0;
618         }
619
620         *entries = (int *)dbuf.dptr;
621         return (size_t)(dbuf.dsize / sizeof(int));
622 }
623
624 /****************************************************************************
625  Deal with pending closes needed by POSIX locking support.
626  Note that posix_locking_close_file() is expected to have been called
627  to delete all locks on this fsp before this function is called.
628 ****************************************************************************/
629
630 int fd_close_posix(struct files_struct *fsp)
631 {
632         int saved_errno = 0;
633         int ret;
634         int *fd_array = NULL;
635         size_t count, i;
636
637         if (!lp_locking(fsp->conn->params) ||
638             !lp_posix_locking(fsp->conn->params))
639         {
640                 /*
641                  * No locking or POSIX to worry about or we want POSIX semantics
642                  * which will lose all locks on all fd's open on this dev/inode,
643                  * just close.
644                  */
645                 return close(fsp->fh->fd);
646         }
647
648         if (get_windows_lock_ref_count(fsp)) {
649
650                 /*
651                  * There are outstanding locks on this dev/inode pair on
652                  * other fds. Add our fd to the pending close tdb and set
653                  * fsp->fh->fd to -1.
654                  */
655
656                 add_fd_to_close_entry(fsp);
657                 return 0;
658         }
659
660         /*
661          * No outstanding locks. Get the pending close fd's
662          * from the tdb and close them all.
663          */
664
665         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
666
667         if (count) {
668                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
669                           (unsigned int)count));
670
671                 for(i = 0; i < count; i++) {
672                         if (close(fd_array[i]) == -1) {
673                                 saved_errno = errno;
674                         }
675                 }
676
677                 /*
678                  * Delete all fd's stored in the tdb
679                  * for this dev/inode pair.
680                  */
681
682                 delete_close_entries(fsp);
683         }
684
685         TALLOC_FREE(fd_array);
686
687         /* Don't need a lock ref count on this dev/ino anymore. */
688         delete_windows_lock_ref_count(fsp);
689
690         /*
691          * Finally close the fd associated with this fsp.
692          */
693
694         ret = close(fsp->fh->fd);
695
696         if (ret == 0 && saved_errno != 0) {
697                 errno = saved_errno;
698                 ret = -1;
699         }
700
701         return ret;
702 }
703
704 /****************************************************************************
705  Next - the functions that deal with the mapping CIFS Windows locks onto
706  the underlying system POSIX locks.
707 ****************************************************************************/
708
709 /*
710  * Structure used when splitting a lock range
711  * into a POSIX lock range. Doubly linked list.
712  */
713
714 struct lock_list {
715         struct lock_list *next;
716         struct lock_list *prev;
717         SMB_OFF_T start;
718         SMB_OFF_T size;
719 };
720
721 /****************************************************************************
722  Create a list of lock ranges that don't overlap a given range. Used in calculating
723  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
724  understand it :-).
725 ****************************************************************************/
726
727 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
728                                                 struct lock_list *lhead,
729                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
730                                                 files_struct *fsp,
731                                                 const struct lock_struct *plocks,
732                                                 int num_locks)
733 {
734         int i;
735
736         /*
737          * Check the current lock list on this dev/inode pair.
738          * Quit if the list is deleted.
739          */
740
741         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
742                 (double)lhead->start, (double)lhead->size ));
743
744         for (i=0; i<num_locks && lhead; i++) {
745                 const struct lock_struct *lock = &plocks[i];
746                 struct lock_list *l_curr;
747
748                 /* Ignore all but read/write locks. */
749                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
750                         continue;
751                 }
752
753                 /* Ignore locks not owned by this process. */
754                 if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
755                         continue;
756                 }
757
758                 /*
759                  * Walk the lock list, checking for overlaps. Note that
760                  * the lock list can expand within this loop if the current
761                  * range being examined needs to be split.
762                  */
763
764                 for (l_curr = lhead; l_curr;) {
765
766                         DEBUG(10,("posix_lock_list: lock: fnum=%d: start=%.0f,size=%.0f:type=%s", lock->fnum,
767                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
768
769                         if ( (l_curr->start >= (lock->start + lock->size)) ||
770                                  (lock->start >= (l_curr->start + l_curr->size))) {
771
772                                 /* No overlap with existing lock - leave this range alone. */
773 /*********************************************
774                                              +---------+
775                                              | l_curr  |
776                                              +---------+
777                                 +-------+
778                                 | lock  |
779                                 +-------+
780 OR....
781              +---------+
782              |  l_curr |
783              +---------+
784 **********************************************/
785
786                                 DEBUG(10,(" no overlap case.\n" ));
787
788                                 l_curr = l_curr->next;
789
790                         } else if ( (l_curr->start >= lock->start) &&
791                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
792
793                                 /*
794                                  * This range is completely overlapped by this existing lock range
795                                  * and thus should have no effect. Delete it from the list.
796                                  */
797 /*********************************************
798                 +---------+
799                 |  l_curr |
800                 +---------+
801         +---------------------------+
802         |       lock                |
803         +---------------------------+
804 **********************************************/
805                                 /* Save the next pointer */
806                                 struct lock_list *ul_next = l_curr->next;
807
808                                 DEBUG(10,(" delete case.\n" ));
809
810                                 DLIST_REMOVE(lhead, l_curr);
811                                 if(lhead == NULL) {
812                                         break; /* No more list... */
813                                 }
814
815                                 l_curr = ul_next;
816                                 
817                         } else if ( (l_curr->start >= lock->start) &&
818                                                 (l_curr->start < lock->start + lock->size) &&
819                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
820
821                                 /*
822                                  * This range overlaps the existing lock range at the high end.
823                                  * Truncate by moving start to existing range end and reducing size.
824                                  */
825 /*********************************************
826                 +---------------+
827                 |  l_curr       |
828                 +---------------+
829         +---------------+
830         |    lock       |
831         +---------------+
832 BECOMES....
833                         +-------+
834                         | l_curr|
835                         +-------+
836 **********************************************/
837
838                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
839                                 l_curr->start = lock->start + lock->size;
840
841                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
842                                                                 (double)l_curr->start, (double)l_curr->size ));
843
844                                 l_curr = l_curr->next;
845
846                         } else if ( (l_curr->start < lock->start) &&
847                                                 (l_curr->start + l_curr->size > lock->start) &&
848                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
849
850                                 /*
851                                  * This range overlaps the existing lock range at the low end.
852                                  * Truncate by reducing size.
853                                  */
854 /*********************************************
855    +---------------+
856    |  l_curr       |
857    +---------------+
858            +---------------+
859            |    lock       |
860            +---------------+
861 BECOMES....
862    +-------+
863    | l_curr|
864    +-------+
865 **********************************************/
866
867                                 l_curr->size = lock->start - l_curr->start;
868
869                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
870                                                                 (double)l_curr->start, (double)l_curr->size ));
871
872                                 l_curr = l_curr->next;
873                 
874                         } else if ( (l_curr->start < lock->start) &&
875                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
876                                 /*
877                                  * Worst case scenario. Range completely overlaps an existing
878                                  * lock range. Split the request into two, push the new (upper) request
879                                  * into the dlink list, and continue with the entry after l_new (as we
880                                  * know that l_new will not overlap with this lock).
881                                  */
882 /*********************************************
883         +---------------------------+
884         |        l_curr             |
885         +---------------------------+
886                 +---------+
887                 | lock    |
888                 +---------+
889 BECOMES.....
890         +-------+         +---------+
891         | l_curr|         | l_new   |
892         +-------+         +---------+
893 **********************************************/
894                                 struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
895
896                                 if(l_new == NULL) {
897                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
898                                         return NULL; /* The talloc_destroy takes care of cleanup. */
899                                 }
900
901                                 ZERO_STRUCTP(l_new);
902                                 l_new->start = lock->start + lock->size;
903                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
904
905                                 /* Truncate the l_curr. */
906                                 l_curr->size = lock->start - l_curr->start;
907
908                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
909 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
910                                                                 (double)l_new->start, (double)l_new->size ));
911
912                                 /*
913                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
914                                  */
915                                 DLIST_ADD_AFTER(lhead, l_new, l_curr);
916
917                                 /* And move after the link we added. */
918                                 l_curr = l_new->next;
919
920                         } else {
921
922                                 /*
923                                  * This logic case should never happen. Ensure this is the
924                                  * case by forcing an abort.... Remove in production.
925                                  */
926                                 char *msg = NULL;
927
928                                 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
929 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
930                                         smb_panic(msg);
931                                 } else {
932                                         smb_panic("posix_lock_list");
933                                 }
934                         }
935                 } /* end for ( l_curr = lhead; l_curr;) */
936         } /* end for (i=0; i<num_locks && ul_head; i++) */
937
938         return lhead;
939 }
940
941 /****************************************************************************
942  POSIX function to acquire a lock. Returns True if the
943  lock could be granted, False if not.
944 ****************************************************************************/
945
946 bool set_posix_lock_windows_flavour(files_struct *fsp,
947                         uint64_t u_offset,
948                         uint64_t u_count,
949                         enum brl_type lock_type,
950                         const struct lock_context *lock_ctx,
951                         const struct lock_struct *plocks,
952                         int num_locks,
953                         int *errno_ret)
954 {
955         SMB_OFF_T offset;
956         SMB_OFF_T count;
957         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
958         bool ret = True;
959         size_t lock_count;
960         TALLOC_CTX *l_ctx = NULL;
961         struct lock_list *llist = NULL;
962         struct lock_list *ll = NULL;
963
964         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
965                  "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
966                  (double)u_offset, (double)u_count,
967                  posix_lock_type_name(lock_type)));
968
969         /*
970          * If the requested lock won't fit in the POSIX range, we will
971          * pretend it was successful.
972          */
973
974         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
975                 increment_windows_lock_ref_count(fsp);
976                 return True;
977         }
978
979         /*
980          * Windows is very strange. It allows read locks to be overlayed
981          * (even over a write lock), but leaves the write lock in force until the first
982          * unlock. It also reference counts the locks. This means the following sequence :
983          *
984          * process1                                      process2
985          * ------------------------------------------------------------------------
986          * WRITE LOCK : start = 2, len = 10
987          *                                            READ LOCK: start =0, len = 10 - FAIL
988          * READ LOCK : start = 0, len = 14 
989          *                                            READ LOCK: start =0, len = 10 - FAIL
990          * UNLOCK : start = 2, len = 10
991          *                                            READ LOCK: start =0, len = 10 - OK
992          *
993          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
994          * would leave a single read lock over the 0-14 region.
995          */
996         
997         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
998                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
999                 return False;
1000         }
1001
1002         if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
1003                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1004                 talloc_destroy(l_ctx);
1005                 return False;
1006         }
1007
1008         /*
1009          * Create the initial list entry containing the
1010          * lock we want to add.
1011          */
1012
1013         ZERO_STRUCTP(ll);
1014         ll->start = offset;
1015         ll->size = count;
1016
1017         DLIST_ADD(llist, ll);
1018
1019         /*
1020          * The following call calculates if there are any
1021          * overlapping locks held by this process on
1022          * fd's open on the same file and splits this list
1023          * into a list of lock ranges that do not overlap with existing
1024          * POSIX locks.
1025          */
1026
1027         llist = posix_lock_list(l_ctx,
1028                                 llist,
1029                                 lock_ctx, /* Lock context llist belongs to. */
1030                                 fsp,
1031                                 plocks,
1032                                 num_locks);
1033
1034         /*
1035          * Add the POSIX locks on the list of ranges returned.
1036          * As the lock is supposed to be added atomically, we need to
1037          * back out all the locks if any one of these calls fail.
1038          */
1039
1040         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1041                 offset = ll->start;
1042                 count = ll->size;
1043
1044                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1045                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1046
1047                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1048                         *errno_ret = errno;
1049                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1050                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1051                         ret = False;
1052                         break;
1053                 }
1054         }
1055
1056         if (!ret) {
1057
1058                 /*
1059                  * Back out all the POSIX locks we have on fail.
1060                  */
1061
1062                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1063                         offset = ll->start;
1064                         count = ll->size;
1065
1066                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1067                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1068
1069                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1070                 }
1071         } else {
1072                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1073                 increment_windows_lock_ref_count(fsp);
1074         }
1075
1076         talloc_destroy(l_ctx);
1077         return ret;
1078 }
1079
1080 /****************************************************************************
1081  POSIX function to release a lock. Returns True if the
1082  lock could be released, False if not.
1083 ****************************************************************************/
1084
1085 bool release_posix_lock_windows_flavour(files_struct *fsp,
1086                                 uint64_t u_offset,
1087                                 uint64_t u_count,
1088                                 enum brl_type deleted_lock_type,
1089                                 const struct lock_context *lock_ctx,
1090                                 const struct lock_struct *plocks,
1091                                 int num_locks)
1092 {
1093         SMB_OFF_T offset;
1094         SMB_OFF_T count;
1095         bool ret = True;
1096         TALLOC_CTX *ul_ctx = NULL;
1097         struct lock_list *ulist = NULL;
1098         struct lock_list *ul = NULL;
1099
1100         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1101                  "count = %.0f\n", fsp_str_dbg(fsp),
1102                  (double)u_offset, (double)u_count));
1103
1104         /* Remember the number of Windows locks we have on this dev/ino pair. */
1105         decrement_windows_lock_ref_count(fsp);
1106
1107         /*
1108          * If the requested lock won't fit in the POSIX range, we will
1109          * pretend it was successful.
1110          */
1111
1112         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1113                 return True;
1114         }
1115
1116         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1117                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1118                 return False;
1119         }
1120
1121         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1122                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1123                 talloc_destroy(ul_ctx);
1124                 return False;
1125         }
1126
1127         /*
1128          * Create the initial list entry containing the
1129          * lock we want to remove.
1130          */
1131
1132         ZERO_STRUCTP(ul);
1133         ul->start = offset;
1134         ul->size = count;
1135
1136         DLIST_ADD(ulist, ul);
1137
1138         /*
1139          * The following call calculates if there are any
1140          * overlapping locks held by this process on
1141          * fd's open on the same file and creates a
1142          * list of unlock ranges that will allow
1143          * POSIX lock ranges to remain on the file whilst the
1144          * unlocks are performed.
1145          */
1146
1147         ulist = posix_lock_list(ul_ctx,
1148                                 ulist,
1149                                 lock_ctx, /* Lock context ulist belongs to. */
1150                                 fsp,
1151                                 plocks,
1152                                 num_locks);
1153
1154         /*
1155          * If there were any overlapped entries (list is > 1 or size or start have changed),
1156          * and the lock_type we just deleted from
1157          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1158          * the POSIX lock to a read lock. This allows any overlapping read locks
1159          * to be atomically maintained.
1160          */
1161
1162         if (deleted_lock_type == WRITE_LOCK &&
1163                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1164
1165                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1166                         (double)offset, (double)count ));
1167
1168                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1169                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1170                         talloc_destroy(ul_ctx);
1171                         return False;
1172                 }
1173         }
1174
1175         /*
1176          * Release the POSIX locks on the list of ranges returned.
1177          */
1178
1179         for(; ulist; ulist = ulist->next) {
1180                 offset = ulist->start;
1181                 count = ulist->size;
1182
1183                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1184                         (double)offset, (double)count ));
1185
1186                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1187                         ret = False;
1188                 }
1189         }
1190
1191         talloc_destroy(ul_ctx);
1192         return ret;
1193 }
1194
1195 /****************************************************************************
1196  Next - the functions that deal with mapping CIFS POSIX locks onto
1197  the underlying system POSIX locks.
1198 ****************************************************************************/
1199
1200 /****************************************************************************
1201  POSIX function to acquire a lock. Returns True if the
1202  lock could be granted, False if not.
1203  As POSIX locks don't stack or conflict (they just overwrite)
1204  we can map the requested lock directly onto a system one. We
1205  know it doesn't conflict with locks on other contexts as the
1206  upper layer would have refused it.
1207 ****************************************************************************/
1208
1209 bool set_posix_lock_posix_flavour(files_struct *fsp,
1210                         uint64_t u_offset,
1211                         uint64_t u_count,
1212                         enum brl_type lock_type,
1213                         int *errno_ret)
1214 {
1215         SMB_OFF_T offset;
1216         SMB_OFF_T count;
1217         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1218
1219         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count "
1220                  "= %.0f, type = %s\n", fsp_str_dbg(fsp),
1221                  (double)u_offset, (double)u_count,
1222                  posix_lock_type_name(lock_type)));
1223
1224         /*
1225          * If the requested lock won't fit in the POSIX range, we will
1226          * pretend it was successful.
1227          */
1228
1229         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1230                 return True;
1231         }
1232
1233         if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1234                 *errno_ret = errno;
1235                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1236                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1237                 return False;
1238         }
1239         return True;
1240 }
1241
1242 /****************************************************************************
1243  POSIX function to release a lock. Returns True if the
1244  lock could be released, False if not.
1245  We are given a complete lock state from the upper layer which is what the lock
1246  state should be after the unlock has already been done, so what
1247  we do is punch out holes in the unlock range where locks owned by this process
1248  have a different lock context.
1249 ****************************************************************************/
1250
1251 bool release_posix_lock_posix_flavour(files_struct *fsp,
1252                                 uint64_t u_offset,
1253                                 uint64_t u_count,
1254                                 const struct lock_context *lock_ctx,
1255                                 const struct lock_struct *plocks,
1256                                 int num_locks)
1257 {
1258         bool ret = True;
1259         SMB_OFF_T offset;
1260         SMB_OFF_T count;
1261         TALLOC_CTX *ul_ctx = NULL;
1262         struct lock_list *ulist = NULL;
1263         struct lock_list *ul = NULL;
1264
1265         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1266                  "count = %.0f\n", fsp_str_dbg(fsp),
1267                  (double)u_offset, (double)u_count));
1268
1269         /*
1270          * If the requested lock won't fit in the POSIX range, we will
1271          * pretend it was successful.
1272          */
1273
1274         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1275                 return True;
1276         }
1277
1278         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1279                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1280                 return False;
1281         }
1282
1283         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1284                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1285                 talloc_destroy(ul_ctx);
1286                 return False;
1287         }
1288
1289         /*
1290          * Create the initial list entry containing the
1291          * lock we want to remove.
1292          */
1293
1294         ZERO_STRUCTP(ul);
1295         ul->start = offset;
1296         ul->size = count;
1297
1298         DLIST_ADD(ulist, ul);
1299
1300         /*
1301          * Walk the given array creating a linked list
1302          * of unlock requests.
1303          */
1304
1305         ulist = posix_lock_list(ul_ctx,
1306                                 ulist,
1307                                 lock_ctx, /* Lock context ulist belongs to. */
1308                                 fsp,
1309                                 plocks,
1310                                 num_locks);
1311
1312         /*
1313          * Release the POSIX locks on the list of ranges returned.
1314          */
1315
1316         for(; ulist; ulist = ulist->next) {
1317                 offset = ulist->start;
1318                 count = ulist->size;
1319
1320                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1321                         (double)offset, (double)count ));
1322
1323                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1324                         ret = False;
1325                 }
1326         }
1327
1328         talloc_destroy(ul_ctx);
1329         return ret;
1330 }