r22009: change TDB_DATA from char * to unsigned char *
[ddiss/samba.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2006
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20    Revision History:
21
22    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
23 */
24
25 #include "includes.h"
26
27 #undef DBGC_CLASS
28 #define DBGC_CLASS DBGC_LOCKING
29
30 /*
31  * The pending close database handle.
32  */
33
34 static TDB_CONTEXT *posix_pending_close_tdb;
35
36 /****************************************************************************
37  First - the functions that deal with the underlying system locks - these
38  functions are used no matter if we're mapping CIFS Windows locks or CIFS
39  POSIX locks onto POSIX.
40 ****************************************************************************/
41
42 /****************************************************************************
43  Utility function to map a lock type correctly depending on the open
44  mode of a file.
45 ****************************************************************************/
46
47 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
48 {
49         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
50                 /*
51                  * Many UNIX's cannot get a write lock on a file opened read-only.
52                  * Win32 locking semantics allow this.
53                  * Do the best we can and attempt a read-only lock.
54                  */
55                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
56                 return F_RDLCK;
57         }
58
59         /*
60          * This return should be the most normal, as we attempt
61          * to always open files read/write.
62          */
63
64         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
65 }
66
67 /****************************************************************************
68  Debugging aid :-).
69 ****************************************************************************/
70
71 static const char *posix_lock_type_name(int lock_type)
72 {
73         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
74 }
75
76 /****************************************************************************
77  Check to see if the given unsigned lock range is within the possible POSIX
78  range. Modifies the given args to be in range if possible, just returns
79  False if not.
80 ****************************************************************************/
81
82 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
83                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
84 {
85         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
86         SMB_OFF_T count = (SMB_OFF_T)u_count;
87
88         /*
89          * For the type of system we are, attempt to
90          * find the maximum positive lock offset as an SMB_OFF_T.
91          */
92
93 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
94
95         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
96
97 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
98
99         /*
100          * In this case SMB_OFF_T is 64 bits,
101          * and the underlying system can handle 64 bit signed locks.
102          */
103
104         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
105         SMB_OFF_T mask = (mask2<<1);
106         SMB_OFF_T max_positive_lock_offset = ~mask;
107
108 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
109
110         /*
111          * In this case either SMB_OFF_T is 32 bits,
112          * or the underlying system cannot handle 64 bit signed locks.
113          * All offsets & counts must be 2^31 or less.
114          */
115
116         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
117
118 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
119
120         /*
121          * POSIX locks of length zero mean lock to end-of-file.
122          * Win32 locks of length zero are point probes. Ignore
123          * any Win32 locks of length zero. JRA.
124          */
125
126         if (count == (SMB_OFF_T)0) {
127                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
128                 return False;
129         }
130
131         /*
132          * If the given offset was > max_positive_lock_offset then we cannot map this at all
133          * ignore this lock.
134          */
135
136         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
137                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
138                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
139                 return False;
140         }
141
142         /*
143          * We must truncate the count to less than max_positive_lock_offset.
144          */
145
146         if (u_count & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
147                 count = max_positive_lock_offset;
148         }
149
150         /*
151          * Truncate count to end at max lock offset.
152          */
153
154         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
155                 count = max_positive_lock_offset - offset;
156         }
157
158         /*
159          * If we ate all the count, ignore this lock.
160          */
161
162         if (count == 0) {
163                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
164                                 (double)u_offset, (double)u_count ));
165                 return False;
166         }
167
168         /*
169          * The mapping was successful.
170          */
171
172         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
173                         (double)offset, (double)count ));
174
175         *offset_out = offset;
176         *count_out = count;
177         
178         return True;
179 }
180
181 /****************************************************************************
182  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
183  broken NFS implementations.
184 ****************************************************************************/
185
186 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
187 {
188         BOOL ret;
189
190         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
191
192         ret = SMB_VFS_LOCK(fsp,fsp->fh->fd,op,offset,count,type);
193
194         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
195
196                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
197                                         (double)offset,(double)count));
198                 DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
199                 DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
200
201                 /*
202                  * If the offset is > 0x7FFFFFFF then this will cause problems on
203                  * 32 bit NFS mounted filesystems. Just ignore it.
204                  */
205
206                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
207                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
208                         return True;
209                 }
210
211                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
212                         /* 32 bit NFS file system, retry with smaller offset */
213                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
214                         errno = 0;
215                         count &= 0x7fffffff;
216                         ret = SMB_VFS_LOCK(fsp,fsp->fh->fd,op,offset,count,type);
217                 }
218         }
219
220         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
221         return ret;
222 }
223
224 /****************************************************************************
225  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
226  broken NFS implementations.
227 ****************************************************************************/
228
229 static BOOL posix_fcntl_getlock(files_struct *fsp, SMB_OFF_T *poffset, SMB_OFF_T *pcount, int *ptype)
230 {
231         pid_t pid;
232         BOOL ret;
233
234         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
235                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
236
237         ret = SMB_VFS_GETLOCK(fsp,fsp->fh->fd,poffset,pcount,ptype,&pid);
238
239         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
240
241                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
242                                         (double)*poffset,(double)*pcount));
243                 DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
244                 DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
245
246                 /*
247                  * If the offset is > 0x7FFFFFFF then this will cause problems on
248                  * 32 bit NFS mounted filesystems. Just ignore it.
249                  */
250
251                 if (*poffset & ~((SMB_OFF_T)0x7fffffff)) {
252                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
253                         return True;
254                 }
255
256                 if (*pcount & ~((SMB_OFF_T)0x7fffffff)) {
257                         /* 32 bit NFS file system, retry with smaller offset */
258                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
259                         errno = 0;
260                         *pcount &= 0x7fffffff;
261                         ret = SMB_VFS_GETLOCK(fsp,fsp->fh->fd,poffset,pcount,ptype,&pid);
262                 }
263         }
264
265         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
266         return ret;
267 }
268
269 /****************************************************************************
270  POSIX function to see if a file region is locked. Returns True if the
271  region is locked, False otherwise.
272 ****************************************************************************/
273
274 BOOL is_posix_locked(files_struct *fsp,
275                         SMB_BIG_UINT *pu_offset,
276                         SMB_BIG_UINT *pu_count,
277                         enum brl_type *plock_type,
278                         enum brl_flavour lock_flav)
279 {
280         SMB_OFF_T offset;
281         SMB_OFF_T count;
282         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
283
284         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
285                 fsp->fsp_name, (double)*pu_offset, (double)*pu_count, posix_lock_type_name(*plock_type) ));
286
287         /*
288          * If the requested lock won't fit in the POSIX range, we will
289          * never set it, so presume it is not locked.
290          */
291
292         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
293                 return False;
294         }
295
296         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
297                 return False;
298         }
299
300         if (posix_lock_type == F_UNLCK) {
301                 return False;
302         }
303
304         if (lock_flav == POSIX_LOCK) {
305                 /* Only POSIX lock queries need to know the details. */
306                 *pu_offset = (SMB_BIG_UINT)offset;
307                 *pu_count = (SMB_BIG_UINT)count;
308                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
309         }
310         return True;
311 }
312
313 /****************************************************************************
314  Next - the functions that deal with in memory database storing representations
315  of either Windows CIFS locks or POSIX CIFS locks.
316 ****************************************************************************/
317
318 /* The key used in the in-memory POSIX databases. */
319
320 struct lock_ref_count_key {
321         SMB_DEV_T device;
322         SMB_INO_T inode;
323         char r;
324 }; 
325
326 struct fd_key {
327         SMB_DEV_T device;
328         SMB_INO_T inode;
329 }; 
330
331 /*******************************************************************
332  Form a static locking key for a dev/inode pair for the fd array.
333 ******************************************************************/
334
335 static TDB_DATA fd_array_key(SMB_DEV_T dev, SMB_INO_T inode)
336 {
337         static struct fd_key key;
338         TDB_DATA kbuf;
339
340         memset(&key, '\0', sizeof(key));
341         key.device = dev;
342         key.inode = inode;
343         kbuf.dptr = (uint8 *)&key;
344         kbuf.dsize = sizeof(key);
345         return kbuf;
346 }
347
348 /*******************************************************************
349  Form a static locking key for a dev/inode pair for the lock ref count
350 ******************************************************************/
351
352 static TDB_DATA locking_ref_count_key(SMB_DEV_T dev, SMB_INO_T inode)
353 {
354         static struct lock_ref_count_key key;
355         TDB_DATA kbuf;
356
357         memset(&key, '\0', sizeof(key));
358         key.device = dev;
359         key.inode = inode;
360         key.r = 'r';
361         kbuf.dptr = (uint8 *)&key;
362         kbuf.dsize = sizeof(key);
363         return kbuf;
364 }
365
366 /*******************************************************************
367  Convenience function to get an fd_array key from an fsp.
368 ******************************************************************/
369
370 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
371 {
372         return fd_array_key(fsp->dev, fsp->inode);
373 }
374
375 /*******************************************************************
376  Convenience function to get a lock ref count key from an fsp.
377 ******************************************************************/
378
379 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp)
380 {
381         return locking_ref_count_key(fsp->dev, fsp->inode);
382 }
383
384 /*******************************************************************
385  Create the in-memory POSIX lock databases.
386 ********************************************************************/
387
388 BOOL posix_locking_init(int read_only)
389 {
390         if (posix_pending_close_tdb) {
391                 return True;
392         }
393         
394         if (!posix_pending_close_tdb) {
395                 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
396                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
397         }
398         if (!posix_pending_close_tdb) {
399                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
400                 return False;
401         }
402
403         return True;
404 }
405
406 /*******************************************************************
407  Delete the in-memory POSIX lock databases.
408 ********************************************************************/
409
410 BOOL posix_locking_end(void)
411 {
412         if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0) {
413                 return False;
414         }
415         return True;
416 }
417
418 /****************************************************************************
419  Next - the functions that deal with storing fd's that have outstanding
420  POSIX locks when closed.
421 ****************************************************************************/
422
423 /****************************************************************************
424  The records in posix_pending_close_tdb are composed of an array of ints
425  keyed by dev/ino pair.
426  The first int is a reference count of the number of outstanding locks on
427  all open fd's on this dev/ino pair. Any subsequent ints are the fd's that
428  were open on this dev/ino pair that should have been closed, but can't as
429  the lock ref count is non zero.
430 ****************************************************************************/
431
432 /****************************************************************************
433  Keep a reference count of the number of Windows locks open on this dev/ino
434  pair. Creates entry if it doesn't exist.
435 ****************************************************************************/
436
437 static void increment_windows_lock_ref_count(files_struct *fsp)
438 {
439         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
440         TDB_DATA dbuf;
441         int lock_ref_count;
442
443         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
444         if (dbuf.dptr == NULL) {
445                 dbuf.dptr = (uint8 *)SMB_MALLOC_P(int);
446                 if (!dbuf.dptr) {
447                         smb_panic("increment_windows_lock_ref_count: malloc fail.\n");
448                 }
449                 memset(dbuf.dptr, '\0', sizeof(int));
450                 dbuf.dsize = sizeof(int);
451         }
452
453         memcpy(&lock_ref_count, dbuf.dptr, sizeof(int));
454         lock_ref_count++;
455         memcpy(dbuf.dptr, &lock_ref_count, sizeof(int));
456         
457         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
458                 smb_panic("increment_windows_lock_ref_count: tdb_store_fail.\n");
459         }
460         SAFE_FREE(dbuf.dptr);
461
462         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
463                 fsp->fsp_name, lock_ref_count ));
464 }
465
466 static void decrement_windows_lock_ref_count(files_struct *fsp)
467 {
468         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
469         TDB_DATA dbuf;
470         int lock_ref_count;
471
472         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
473         if (!dbuf.dptr) {
474                 smb_panic("decrement_windows_lock_ref_count: logic error.\n");
475         }
476
477         memcpy(&lock_ref_count, dbuf.dptr, sizeof(int));
478         lock_ref_count--;
479         memcpy(dbuf.dptr, &lock_ref_count, sizeof(int));
480
481         if (lock_ref_count < 0) {
482                 smb_panic("decrement_windows_lock_ref_count: lock_count logic error.\n");
483         }
484
485         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
486                 smb_panic("decrement_windows_lock_ref_count: tdb_store_fail.\n");
487         }
488         SAFE_FREE(dbuf.dptr);
489
490         DEBUG(10,("decrement_windows_lock_ref_count for file now %s = %d\n",
491                 fsp->fsp_name, lock_ref_count ));
492 }
493
494 /****************************************************************************
495  Bulk delete - subtract as many locks as we've just deleted.
496 ****************************************************************************/
497
498 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
499 {
500         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
501         TDB_DATA dbuf;
502         int lock_ref_count;
503
504         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
505         if (!dbuf.dptr) {
506                 return;
507         }
508
509         memcpy(&lock_ref_count, dbuf.dptr, sizeof(int));
510         lock_ref_count -= dcount;
511
512         if (lock_ref_count < 0) {
513                 smb_panic("reduce_windows_lock_ref_count: lock_count logic error.\n");
514         }
515         memcpy(dbuf.dptr, &lock_ref_count, sizeof(int));
516         
517         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
518                 smb_panic("reduce_windows_lock_ref_count: tdb_store_fail.\n");
519         }
520         SAFE_FREE(dbuf.dptr);
521
522         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
523                 fsp->fsp_name, lock_ref_count ));
524 }
525
526 /****************************************************************************
527  Fetch the lock ref count.
528 ****************************************************************************/
529
530 static int get_windows_lock_ref_count(files_struct *fsp)
531 {
532         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
533         TDB_DATA dbuf;
534         int lock_ref_count;
535
536         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
537         if (!dbuf.dptr) {
538                 lock_ref_count = 0;
539         } else {
540                 memcpy(&lock_ref_count, dbuf.dptr, sizeof(int));
541         }
542         SAFE_FREE(dbuf.dptr);
543
544         DEBUG(10,("get_windows_lock_count for file %s = %d\n",
545                 fsp->fsp_name, lock_ref_count ));
546         return lock_ref_count;
547 }
548
549 /****************************************************************************
550  Delete a lock_ref_count entry.
551 ****************************************************************************/
552
553 static void delete_windows_lock_ref_count(files_struct *fsp)
554 {
555         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
556
557         /* Not a bug if it doesn't exist - no locks were ever granted. */
558         tdb_delete(posix_pending_close_tdb, kbuf);
559         DEBUG(10,("delete_windows_lock_ref_count for file %s\n", fsp->fsp_name));
560 }
561
562 /****************************************************************************
563  Add an fd to the pending close tdb.
564 ****************************************************************************/
565
566 static void add_fd_to_close_entry(files_struct *fsp)
567 {
568         TDB_DATA kbuf = fd_array_key_fsp(fsp);
569         TDB_DATA dbuf;
570
571         dbuf.dptr = NULL;
572         dbuf.dsize = 0;
573
574         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
575
576         dbuf.dptr = (uint8 *)SMB_REALLOC(dbuf.dptr, dbuf.dsize + sizeof(int));
577         if (!dbuf.dptr) {
578                 smb_panic("add_fd_to_close_entry: Realloc fail !\n");
579         }
580
581         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fh->fd, sizeof(int));
582         dbuf.dsize += sizeof(int);
583
584         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
585                 smb_panic("add_fd_to_close_entry: tdb_store_fail.\n");
586         }
587
588         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
589                 fsp->fh->fd, fsp->fsp_name ));
590
591         SAFE_FREE(dbuf.dptr);
592 }
593
594 /****************************************************************************
595  Remove all fd entries for a specific dev/inode pair from the tdb.
596 ****************************************************************************/
597
598 static void delete_close_entries(files_struct *fsp)
599 {
600         TDB_DATA kbuf = fd_array_key_fsp(fsp);
601
602         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1) {
603                 smb_panic("delete_close_entries: tdb_delete fail !\n");
604         }
605 }
606
607 /****************************************************************************
608  Get the array of POSIX pending close records for an open fsp. Caller must
609  free. Returns number of entries.
610 ****************************************************************************/
611
612 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
613 {
614         TDB_DATA kbuf = fd_array_key_fsp(fsp);
615         TDB_DATA dbuf;
616         size_t count = 0;
617
618         *entries = NULL;
619         dbuf.dptr = NULL;
620
621         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
622
623         if (!dbuf.dptr) {
624                 return 0;
625         }
626
627         *entries = (int *)dbuf.dptr;
628         count = (size_t)(dbuf.dsize / sizeof(int));
629
630         return count;
631 }
632
633 /****************************************************************************
634  Deal with pending closes needed by POSIX locking support.
635  Note that posix_locking_close_file() is expected to have been called
636  to delete all locks on this fsp before this function is called.
637 ****************************************************************************/
638
639 NTSTATUS fd_close_posix(struct connection_struct *conn, files_struct *fsp)
640 {
641         int saved_errno = 0;
642         int ret;
643         int *fd_array = NULL;
644         size_t count, i;
645
646         if (!lp_locking(fsp->conn->params) || !lp_posix_locking(conn->params)) {
647                 /*
648                  * No locking or POSIX to worry about or we want POSIX semantics
649                  * which will lose all locks on all fd's open on this dev/inode,
650                  * just close.
651                  */
652                 ret = SMB_VFS_CLOSE(fsp,fsp->fh->fd);
653                 fsp->fh->fd = -1;
654                 return map_nt_error_from_unix(errno);
655         }
656
657         if (get_windows_lock_ref_count(fsp)) {
658
659                 /*
660                  * There are outstanding locks on this dev/inode pair on other fds.
661                  * Add our fd to the pending close tdb and set fsp->fh->fd to -1.
662                  */
663
664                 add_fd_to_close_entry(fsp);
665                 fsp->fh->fd = -1;
666                 return NT_STATUS_OK;
667         }
668
669         /*
670          * No outstanding locks. Get the pending close fd's
671          * from the tdb and close them all.
672          */
673
674         count = get_posix_pending_close_entries(fsp, &fd_array);
675
676         if (count) {
677                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
678
679                 for(i = 0; i < count; i++) {
680                         if (SMB_VFS_CLOSE(fsp,fd_array[i]) == -1) {
681                                 saved_errno = errno;
682                         }
683                 }
684
685                 /*
686                  * Delete all fd's stored in the tdb
687                  * for this dev/inode pair.
688                  */
689
690                 delete_close_entries(fsp);
691         }
692
693         SAFE_FREE(fd_array);
694
695         /* Don't need a lock ref count on this dev/ino anymore. */
696         delete_windows_lock_ref_count(fsp);
697
698         /*
699          * Finally close the fd associated with this fsp.
700          */
701
702         ret = SMB_VFS_CLOSE(fsp,fsp->fh->fd);
703
704         if (ret == 0 && saved_errno != 0) {
705                 errno = saved_errno;
706                 ret = -1;
707         } 
708
709         fsp->fh->fd = -1;
710
711         if (ret == -1) {
712                 return map_nt_error_from_unix(errno);
713         }
714
715         return NT_STATUS_OK;
716 }
717
718 /****************************************************************************
719  Next - the functions that deal with the mapping CIFS Windows locks onto
720  the underlying system POSIX locks.
721 ****************************************************************************/
722
723 /*
724  * Structure used when splitting a lock range
725  * into a POSIX lock range. Doubly linked list.
726  */
727
728 struct lock_list {
729         struct lock_list *next;
730         struct lock_list *prev;
731         SMB_OFF_T start;
732         SMB_OFF_T size;
733 };
734
735 /****************************************************************************
736  Create a list of lock ranges that don't overlap a given range. Used in calculating
737  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
738  understand it :-).
739 ****************************************************************************/
740
741 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
742                                                 struct lock_list *lhead,
743                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
744                                                 files_struct *fsp,
745                                                 const struct lock_struct *plocks,
746                                                 int num_locks)
747 {
748         int i;
749
750         /*
751          * Check the current lock list on this dev/inode pair.
752          * Quit if the list is deleted.
753          */
754
755         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
756                 (double)lhead->start, (double)lhead->size ));
757
758         for (i=0; i<num_locks && lhead; i++) {
759                 const struct lock_struct *lock = &plocks[i];
760                 struct lock_list *l_curr;
761
762                 /* Ignore all but read/write locks. */
763                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
764                         continue;
765                 }
766
767                 /* Ignore locks not owned by this process. */
768                 if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
769                         continue;
770                 }
771
772                 /*
773                  * Walk the lock list, checking for overlaps. Note that
774                  * the lock list can expand within this loop if the current
775                  * range being examined needs to be split.
776                  */
777
778                 for (l_curr = lhead; l_curr;) {
779
780                         DEBUG(10,("posix_lock_list: lock: fnum=%d: start=%.0f,size=%.0f:type=%s", lock->fnum,
781                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
782
783                         if ( (l_curr->start >= (lock->start + lock->size)) ||
784                                  (lock->start >= (l_curr->start + l_curr->size))) {
785
786                                 /* No overlap with existing lock - leave this range alone. */
787 /*********************************************
788                                              +---------+
789                                              | l_curr  |
790                                              +---------+
791                                 +-------+
792                                 | lock  |
793                                 +-------+
794 OR....
795              +---------+
796              |  l_curr |
797              +---------+
798 **********************************************/
799
800                                 DEBUG(10,(" no overlap case.\n" ));
801
802                                 l_curr = l_curr->next;
803
804                         } else if ( (l_curr->start >= lock->start) &&
805                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
806
807                                 /*
808                                  * This range is completely overlapped by this existing lock range
809                                  * and thus should have no effect. Delete it from the list.
810                                  */
811 /*********************************************
812                 +---------+
813                 |  l_curr |
814                 +---------+
815         +---------------------------+
816         |       lock                |
817         +---------------------------+
818 **********************************************/
819                                 /* Save the next pointer */
820                                 struct lock_list *ul_next = l_curr->next;
821
822                                 DEBUG(10,(" delete case.\n" ));
823
824                                 DLIST_REMOVE(lhead, l_curr);
825                                 if(lhead == NULL) {
826                                         break; /* No more list... */
827                                 }
828
829                                 l_curr = ul_next;
830                                 
831                         } else if ( (l_curr->start >= lock->start) &&
832                                                 (l_curr->start < lock->start + lock->size) &&
833                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
834
835                                 /*
836                                  * This range overlaps the existing lock range at the high end.
837                                  * Truncate by moving start to existing range end and reducing size.
838                                  */
839 /*********************************************
840                 +---------------+
841                 |  l_curr       |
842                 +---------------+
843         +---------------+
844         |    lock       |
845         +---------------+
846 BECOMES....
847                         +-------+
848                         | l_curr|
849                         +-------+
850 **********************************************/
851
852                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
853                                 l_curr->start = lock->start + lock->size;
854
855                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
856                                                                 (double)l_curr->start, (double)l_curr->size ));
857
858                                 l_curr = l_curr->next;
859
860                         } else if ( (l_curr->start < lock->start) &&
861                                                 (l_curr->start + l_curr->size > lock->start) &&
862                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
863
864                                 /*
865                                  * This range overlaps the existing lock range at the low end.
866                                  * Truncate by reducing size.
867                                  */
868 /*********************************************
869    +---------------+
870    |  l_curr       |
871    +---------------+
872            +---------------+
873            |    lock       |
874            +---------------+
875 BECOMES....
876    +-------+
877    | l_curr|
878    +-------+
879 **********************************************/
880
881                                 l_curr->size = lock->start - l_curr->start;
882
883                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
884                                                                 (double)l_curr->start, (double)l_curr->size ));
885
886                                 l_curr = l_curr->next;
887                 
888                         } else if ( (l_curr->start < lock->start) &&
889                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
890                                 /*
891                                  * Worst case scenario. Range completely overlaps an existing
892                                  * lock range. Split the request into two, push the new (upper) request
893                                  * into the dlink list, and continue with the entry after l_new (as we
894                                  * know that l_new will not overlap with this lock).
895                                  */
896 /*********************************************
897         +---------------------------+
898         |        l_curr             |
899         +---------------------------+
900                 +---------+
901                 | lock    |
902                 +---------+
903 BECOMES.....
904         +-------+         +---------+
905         | l_curr|         | l_new   |
906         +-------+         +---------+
907 **********************************************/
908                                 struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
909
910                                 if(l_new == NULL) {
911                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
912                                         return NULL; /* The talloc_destroy takes care of cleanup. */
913                                 }
914
915                                 ZERO_STRUCTP(l_new);
916                                 l_new->start = lock->start + lock->size;
917                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
918
919                                 /* Truncate the l_curr. */
920                                 l_curr->size = lock->start - l_curr->start;
921
922                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
923 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
924                                                                 (double)l_new->start, (double)l_new->size ));
925
926                                 /*
927                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
928                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
929                                  */
930
931                                 l_new->prev = l_curr;
932                                 l_new->next = l_curr->next;
933                                 l_curr->next = l_new;
934
935                                 /* And move after the link we added. */
936                                 l_curr = l_new->next;
937
938                         } else {
939
940                                 /*
941                                  * This logic case should never happen. Ensure this is the
942                                  * case by forcing an abort.... Remove in production.
943                                  */
944                                 pstring msg;
945
946                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
947 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
948
949                                 smb_panic(msg);
950                         }
951                 } /* end for ( l_curr = lhead; l_curr;) */
952         } /* end for (i=0; i<num_locks && ul_head; i++) */
953
954         return lhead;
955 }
956
957 /****************************************************************************
958  POSIX function to acquire a lock. Returns True if the
959  lock could be granted, False if not.
960 ****************************************************************************/
961
962 BOOL set_posix_lock_windows_flavour(files_struct *fsp,
963                         SMB_BIG_UINT u_offset,
964                         SMB_BIG_UINT u_count,
965                         enum brl_type lock_type,
966                         const struct lock_context *lock_ctx,
967                         const struct lock_struct *plocks,
968                         int num_locks,
969                         int *errno_ret)
970 {
971         SMB_OFF_T offset;
972         SMB_OFF_T count;
973         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
974         BOOL ret = True;
975         size_t lock_count;
976         TALLOC_CTX *l_ctx = NULL;
977         struct lock_list *llist = NULL;
978         struct lock_list *ll = NULL;
979
980         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, count = %.0f, type = %s\n",
981                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
982
983         /*
984          * If the requested lock won't fit in the POSIX range, we will
985          * pretend it was successful.
986          */
987
988         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
989                 increment_windows_lock_ref_count(fsp);
990                 return True;
991         }
992
993         /*
994          * Windows is very strange. It allows read locks to be overlayed
995          * (even over a write lock), but leaves the write lock in force until the first
996          * unlock. It also reference counts the locks. This means the following sequence :
997          *
998          * process1                                      process2
999          * ------------------------------------------------------------------------
1000          * WRITE LOCK : start = 2, len = 10
1001          *                                            READ LOCK: start =0, len = 10 - FAIL
1002          * READ LOCK : start = 0, len = 14 
1003          *                                            READ LOCK: start =0, len = 10 - FAIL
1004          * UNLOCK : start = 2, len = 10
1005          *                                            READ LOCK: start =0, len = 10 - OK
1006          *
1007          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1008          * would leave a single read lock over the 0-14 region.
1009          */
1010         
1011         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1012                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
1013                 return False;
1014         }
1015
1016         if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
1017                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1018                 talloc_destroy(l_ctx);
1019                 return False;
1020         }
1021
1022         /*
1023          * Create the initial list entry containing the
1024          * lock we want to add.
1025          */
1026
1027         ZERO_STRUCTP(ll);
1028         ll->start = offset;
1029         ll->size = count;
1030
1031         DLIST_ADD(llist, ll);
1032
1033         /*
1034          * The following call calculates if there are any
1035          * overlapping locks held by this process on
1036          * fd's open on the same file and splits this list
1037          * into a list of lock ranges that do not overlap with existing
1038          * POSIX locks.
1039          */
1040
1041         llist = posix_lock_list(l_ctx,
1042                                 llist,
1043                                 lock_ctx, /* Lock context llist belongs to. */
1044                                 fsp,
1045                                 plocks,
1046                                 num_locks);
1047
1048         /*
1049          * Add the POSIX locks on the list of ranges returned.
1050          * As the lock is supposed to be added atomically, we need to
1051          * back out all the locks if any one of these calls fail.
1052          */
1053
1054         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1055                 offset = ll->start;
1056                 count = ll->size;
1057
1058                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1059                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1060
1061                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1062                         *errno_ret = errno;
1063                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1064                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1065                         ret = False;
1066                         break;
1067                 }
1068         }
1069
1070         if (!ret) {
1071
1072                 /*
1073                  * Back out all the POSIX locks we have on fail.
1074                  */
1075
1076                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1077                         offset = ll->start;
1078                         count = ll->size;
1079
1080                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1081                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1082
1083                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1084                 }
1085         } else {
1086                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1087                 increment_windows_lock_ref_count(fsp);
1088         }
1089
1090         talloc_destroy(l_ctx);
1091         return ret;
1092 }
1093
1094 /****************************************************************************
1095  POSIX function to release a lock. Returns True if the
1096  lock could be released, False if not.
1097 ****************************************************************************/
1098
1099 BOOL release_posix_lock_windows_flavour(files_struct *fsp,
1100                                 SMB_BIG_UINT u_offset,
1101                                 SMB_BIG_UINT u_count,
1102                                 enum brl_type deleted_lock_type,
1103                                 const struct lock_context *lock_ctx,
1104                                 const struct lock_struct *plocks,
1105                                 int num_locks)
1106 {
1107         SMB_OFF_T offset;
1108         SMB_OFF_T count;
1109         BOOL ret = True;
1110         TALLOC_CTX *ul_ctx = NULL;
1111         struct lock_list *ulist = NULL;
1112         struct lock_list *ul = NULL;
1113
1114         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, count = %.0f\n",
1115                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1116
1117         /* Remember the number of Windows locks we have on this dev/ino pair. */
1118         decrement_windows_lock_ref_count(fsp);
1119
1120         /*
1121          * If the requested lock won't fit in the POSIX range, we will
1122          * pretend it was successful.
1123          */
1124
1125         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1126                 return True;
1127         }
1128
1129         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1130                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1131                 return False;
1132         }
1133
1134         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1135                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1136                 talloc_destroy(ul_ctx);
1137                 return False;
1138         }
1139
1140         /*
1141          * Create the initial list entry containing the
1142          * lock we want to remove.
1143          */
1144
1145         ZERO_STRUCTP(ul);
1146         ul->start = offset;
1147         ul->size = count;
1148
1149         DLIST_ADD(ulist, ul);
1150
1151         /*
1152          * The following call calculates if there are any
1153          * overlapping locks held by this process on
1154          * fd's open on the same file and creates a
1155          * list of unlock ranges that will allow
1156          * POSIX lock ranges to remain on the file whilst the
1157          * unlocks are performed.
1158          */
1159
1160         ulist = posix_lock_list(ul_ctx,
1161                                 ulist,
1162                                 lock_ctx, /* Lock context ulist belongs to. */
1163                                 fsp,
1164                                 plocks,
1165                                 num_locks);
1166
1167         /*
1168          * If there were any overlapped entries (list is > 1 or size or start have changed),
1169          * and the lock_type we just deleted from
1170          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1171          * the POSIX lock to a read lock. This allows any overlapping read locks
1172          * to be atomically maintained.
1173          */
1174
1175         if (deleted_lock_type == WRITE_LOCK &&
1176                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1177
1178                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1179                         (double)offset, (double)count ));
1180
1181                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1182                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1183                         talloc_destroy(ul_ctx);
1184                         return False;
1185                 }
1186         }
1187
1188         /*
1189          * Release the POSIX locks on the list of ranges returned.
1190          */
1191
1192         for(; ulist; ulist = ulist->next) {
1193                 offset = ulist->start;
1194                 count = ulist->size;
1195
1196                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1197                         (double)offset, (double)count ));
1198
1199                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1200                         ret = False;
1201                 }
1202         }
1203
1204         talloc_destroy(ul_ctx);
1205         return ret;
1206 }
1207
1208 /****************************************************************************
1209  Next - the functions that deal with mapping CIFS POSIX locks onto
1210  the underlying system POSIX locks.
1211 ****************************************************************************/
1212
1213 /****************************************************************************
1214  POSIX function to acquire a lock. Returns True if the
1215  lock could be granted, False if not.
1216  As POSIX locks don't stack or conflict (they just overwrite)
1217  we can map the requested lock directly onto a system one. We
1218  know it doesn't conflict with locks on other contexts as the
1219  upper layer would have refused it.
1220 ****************************************************************************/
1221
1222 BOOL set_posix_lock_posix_flavour(files_struct *fsp,
1223                         SMB_BIG_UINT u_offset,
1224                         SMB_BIG_UINT u_count,
1225                         enum brl_type lock_type,
1226                         int *errno_ret)
1227 {
1228         SMB_OFF_T offset;
1229         SMB_OFF_T count;
1230         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1231
1232         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count = %.0f, type = %s\n",
1233                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
1234
1235         /*
1236          * If the requested lock won't fit in the POSIX range, we will
1237          * pretend it was successful.
1238          */
1239
1240         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1241                 return True;
1242         }
1243
1244         if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1245                 *errno_ret = errno;
1246                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1247                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1248                 return False;
1249         }
1250         return True;
1251 }
1252
1253 /****************************************************************************
1254  POSIX function to release a lock. Returns True if the
1255  lock could be released, False if not.
1256  We are given a complete lock state from the upper layer which is what the lock
1257  state should be after the unlock has already been done, so what
1258  we do is punch out holes in the unlock range where locks owned by this process
1259  have a different lock context.
1260 ****************************************************************************/
1261
1262 BOOL release_posix_lock_posix_flavour(files_struct *fsp,
1263                                 SMB_BIG_UINT u_offset,
1264                                 SMB_BIG_UINT u_count,
1265                                 const struct lock_context *lock_ctx,
1266                                 const struct lock_struct *plocks,
1267                                 int num_locks)
1268 {
1269         BOOL ret = True;
1270         SMB_OFF_T offset;
1271         SMB_OFF_T count;
1272         TALLOC_CTX *ul_ctx = NULL;
1273         struct lock_list *ulist = NULL;
1274         struct lock_list *ul = NULL;
1275
1276         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, count = %.0f\n",
1277                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1278
1279         /*
1280          * If the requested lock won't fit in the POSIX range, we will
1281          * pretend it was successful.
1282          */
1283
1284         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1285                 return True;
1286         }
1287
1288         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1289                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1290                 return False;
1291         }
1292
1293         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1294                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1295                 talloc_destroy(ul_ctx);
1296                 return False;
1297         }
1298
1299         /*
1300          * Create the initial list entry containing the
1301          * lock we want to remove.
1302          */
1303
1304         ZERO_STRUCTP(ul);
1305         ul->start = offset;
1306         ul->size = count;
1307
1308         DLIST_ADD(ulist, ul);
1309
1310         /*
1311          * Walk the given array creating a linked list
1312          * of unlock requests.
1313          */
1314
1315         ulist = posix_lock_list(ul_ctx,
1316                                 ulist,
1317                                 lock_ctx, /* Lock context ulist belongs to. */
1318                                 fsp,
1319                                 plocks,
1320                                 num_locks);
1321
1322         /*
1323          * Release the POSIX locks on the list of ranges returned.
1324          */
1325
1326         for(; ulist; ulist = ulist->next) {
1327                 offset = ulist->start;
1328                 count = ulist->size;
1329
1330                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1331                         (double)offset, (double)count ));
1332
1333                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1334                         ret = False;
1335                 }
1336         }
1337
1338         talloc_destroy(ul_ctx);
1339         return ret;
1340 }