s3:include: change lock_struct->fnum to uint64_t
[ddiss/samba.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2006
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19    Revision History:
20
21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
22 */
23
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "locking/proto.h"
27 #include "dbwrap/dbwrap.h"
28 #include "dbwrap/dbwrap_rbt.h"
29 #include "util_tdb.h"
30
31 #undef DBGC_CLASS
32 #define DBGC_CLASS DBGC_LOCKING
33
34 /*
35  * The pending close database handle.
36  */
37
38 static struct db_context *posix_pending_close_db;
39
40 /****************************************************************************
41  First - the functions that deal with the underlying system locks - these
42  functions are used no matter if we're mapping CIFS Windows locks or CIFS
43  POSIX locks onto POSIX.
44 ****************************************************************************/
45
46 /****************************************************************************
47  Utility function to map a lock type correctly depending on the open
48  mode of a file.
49 ****************************************************************************/
50
51 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
52 {
53         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
54                 /*
55                  * Many UNIX's cannot get a write lock on a file opened read-only.
56                  * Win32 locking semantics allow this.
57                  * Do the best we can and attempt a read-only lock.
58                  */
59                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
60                 return F_RDLCK;
61         }
62
63         /*
64          * This return should be the most normal, as we attempt
65          * to always open files read/write.
66          */
67
68         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
69 }
70
71 /****************************************************************************
72  Debugging aid :-).
73 ****************************************************************************/
74
75 static const char *posix_lock_type_name(int lock_type)
76 {
77         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
78 }
79
80 /****************************************************************************
81  Check to see if the given unsigned lock range is within the possible POSIX
82  range. Modifies the given args to be in range if possible, just returns
83  False if not.
84 ****************************************************************************/
85
86 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
87                                 uint64_t u_offset, uint64_t u_count)
88 {
89         off_t offset = (off_t)u_offset;
90         off_t count = (off_t)u_count;
91
92         /*
93          * For the type of system we are, attempt to
94          * find the maximum positive lock offset as an off_t.
95          */
96
97 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
98
99         off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
100 #else
101         /*
102          * In this case off_t is 64 bits,
103          * and the underlying system can handle 64 bit signed locks.
104          */
105
106         off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
107         off_t mask = (mask2<<1);
108         off_t max_positive_lock_offset = ~mask;
109
110 #endif
111         /*
112          * POSIX locks of length zero mean lock to end-of-file.
113          * Win32 locks of length zero are point probes. Ignore
114          * any Win32 locks of length zero. JRA.
115          */
116
117         if (count == (off_t)0) {
118                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
119                 return False;
120         }
121
122         /*
123          * If the given offset was > max_positive_lock_offset then we cannot map this at all
124          * ignore this lock.
125          */
126
127         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
128                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
129                                 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
130                 return False;
131         }
132
133         /*
134          * We must truncate the count to less than max_positive_lock_offset.
135          */
136
137         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
138                 count = max_positive_lock_offset;
139         }
140
141         /*
142          * Truncate count to end at max lock offset.
143          */
144
145         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
146                 count = max_positive_lock_offset - offset;
147         }
148
149         /*
150          * If we ate all the count, ignore this lock.
151          */
152
153         if (count == 0) {
154                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
155                                 (double)u_offset, (double)u_count ));
156                 return False;
157         }
158
159         /*
160          * The mapping was successful.
161          */
162
163         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
164                         (double)offset, (double)count ));
165
166         *offset_out = offset;
167         *count_out = count;
168         
169         return True;
170 }
171
172 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
173                        struct files_struct *fsp, int op, off_t offset,
174                        off_t count, int type)
175 {
176         VFS_FIND(lock);
177         return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
178 }
179
180 /****************************************************************************
181  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
182  broken NFS implementations.
183 ****************************************************************************/
184
185 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
186 {
187         bool ret;
188
189         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
190
191         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
192
193         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
194
195                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
196                                         (double)offset,(double)count));
197                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
198                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
199
200                 /*
201                  * If the offset is > 0x7FFFFFFF then this will cause problems on
202                  * 32 bit NFS mounted filesystems. Just ignore it.
203                  */
204
205                 if (offset & ~((off_t)0x7fffffff)) {
206                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
207                         return True;
208                 }
209
210                 if (count & ~((off_t)0x7fffffff)) {
211                         /* 32 bit NFS file system, retry with smaller offset */
212                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
213                         errno = 0;
214                         count &= 0x7fffffff;
215                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
216                 }
217         }
218
219         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
220         return ret;
221 }
222
223 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
224                           struct files_struct *fsp, off_t *poffset,
225                           off_t *pcount, int *ptype, pid_t *ppid)
226 {
227         VFS_FIND(getlock);
228         return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype, 
229                                        ppid);
230 }
231
232 /****************************************************************************
233  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
234  broken NFS implementations.
235 ****************************************************************************/
236
237 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
238 {
239         pid_t pid;
240         bool ret;
241
242         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
243                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
244
245         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
246
247         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
248
249                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
250                                         (double)*poffset,(double)*pcount));
251                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
252                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
253
254                 /*
255                  * If the offset is > 0x7FFFFFFF then this will cause problems on
256                  * 32 bit NFS mounted filesystems. Just ignore it.
257                  */
258
259                 if (*poffset & ~((off_t)0x7fffffff)) {
260                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
261                         return True;
262                 }
263
264                 if (*pcount & ~((off_t)0x7fffffff)) {
265                         /* 32 bit NFS file system, retry with smaller offset */
266                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
267                         errno = 0;
268                         *pcount &= 0x7fffffff;
269                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
270                 }
271         }
272
273         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
274         return ret;
275 }
276
277 /****************************************************************************
278  POSIX function to see if a file region is locked. Returns True if the
279  region is locked, False otherwise.
280 ****************************************************************************/
281
282 bool is_posix_locked(files_struct *fsp,
283                         uint64_t *pu_offset,
284                         uint64_t *pu_count,
285                         enum brl_type *plock_type,
286                         enum brl_flavour lock_flav)
287 {
288         off_t offset;
289         off_t count;
290         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
291
292         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
293                   "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
294                   (double)*pu_count,  posix_lock_type_name(*plock_type)));
295
296         /*
297          * If the requested lock won't fit in the POSIX range, we will
298          * never set it, so presume it is not locked.
299          */
300
301         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
302                 return False;
303         }
304
305         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
306                 return False;
307         }
308
309         if (posix_lock_type == F_UNLCK) {
310                 return False;
311         }
312
313         if (lock_flav == POSIX_LOCK) {
314                 /* Only POSIX lock queries need to know the details. */
315                 *pu_offset = (uint64_t)offset;
316                 *pu_count = (uint64_t)count;
317                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
318         }
319         return True;
320 }
321
322 /****************************************************************************
323  Next - the functions that deal with in memory database storing representations
324  of either Windows CIFS locks or POSIX CIFS locks.
325 ****************************************************************************/
326
327 /* The key used in the in-memory POSIX databases. */
328
329 struct lock_ref_count_key {
330         struct file_id id;
331         char r;
332 }; 
333
334 /*******************************************************************
335  Form a static locking key for a dev/inode pair for the lock ref count
336 ******************************************************************/
337
338 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
339                                           struct lock_ref_count_key *tmp)
340 {
341         ZERO_STRUCTP(tmp);
342         tmp->id = fsp->file_id;
343         tmp->r = 'r';
344         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
345 }
346
347 /*******************************************************************
348  Convenience function to get an fd_array key from an fsp.
349 ******************************************************************/
350
351 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
352 {
353         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
354 }
355
356 /*******************************************************************
357  Create the in-memory POSIX lock databases.
358 ********************************************************************/
359
360 bool posix_locking_init(bool read_only)
361 {
362         if (posix_pending_close_db != NULL) {
363                 return true;
364         }
365
366         posix_pending_close_db = db_open_rbt(NULL);
367
368         if (posix_pending_close_db == NULL) {
369                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
370                 return false;
371         }
372
373         return true;
374 }
375
376 /*******************************************************************
377  Delete the in-memory POSIX lock databases.
378 ********************************************************************/
379
380 bool posix_locking_end(void)
381 {
382         /*
383          * Shouldn't we close all fd's here?
384          */
385         TALLOC_FREE(posix_pending_close_db);
386         return true;
387 }
388
389 /****************************************************************************
390  Next - the functions that deal with storing fd's that have outstanding
391  POSIX locks when closed.
392 ****************************************************************************/
393
394 /****************************************************************************
395  The records in posix_pending_close_tdb are composed of an array of ints
396  keyed by dev/ino pair.
397  The first int is a reference count of the number of outstanding locks on
398  all open fd's on this dev/ino pair. Any subsequent ints are the fd's that
399  were open on this dev/ino pair that should have been closed, but can't as
400  the lock ref count is non zero.
401 ****************************************************************************/
402
403 /****************************************************************************
404  Keep a reference count of the number of Windows locks open on this dev/ino
405  pair. Creates entry if it doesn't exist.
406 ****************************************************************************/
407
408 static void increment_windows_lock_ref_count(files_struct *fsp)
409 {
410         struct lock_ref_count_key tmp;
411         struct db_record *rec;
412         int lock_ref_count = 0;
413         NTSTATUS status;
414         TDB_DATA value;
415
416         rec = dbwrap_fetch_locked(
417                 posix_pending_close_db, talloc_tos(),
418                 locking_ref_count_key_fsp(fsp, &tmp));
419
420         SMB_ASSERT(rec != NULL);
421
422         value = dbwrap_record_get_value(rec);
423
424         if (value.dptr != NULL) {
425                 SMB_ASSERT(value.dsize == sizeof(lock_ref_count));
426                 memcpy(&lock_ref_count, value.dptr,
427                        sizeof(lock_ref_count));
428         }
429
430         lock_ref_count++;
431
432         status = dbwrap_record_store(rec,
433                                      make_tdb_data((uint8 *)&lock_ref_count,
434                                      sizeof(lock_ref_count)), 0);
435
436         SMB_ASSERT(NT_STATUS_IS_OK(status));
437
438         TALLOC_FREE(rec);
439
440         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
441                   fsp_str_dbg(fsp), lock_ref_count));
442 }
443
444 /****************************************************************************
445  Bulk delete - subtract as many locks as we've just deleted.
446 ****************************************************************************/
447
448 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
449 {
450         struct lock_ref_count_key tmp;
451         struct db_record *rec;
452         int lock_ref_count = 0;
453         NTSTATUS status;
454         TDB_DATA value;
455
456         rec = dbwrap_fetch_locked(
457                 posix_pending_close_db, talloc_tos(),
458                 locking_ref_count_key_fsp(fsp, &tmp));
459
460         if (rec == NULL) {
461                 DEBUG(0, ("reduce_windows_lock_ref_count: rec not found\n"));
462                 return;
463         }
464
465         value = dbwrap_record_get_value(rec);
466
467         if ((value.dptr == NULL) ||  (value.dsize != sizeof(lock_ref_count))) {
468                 DEBUG(0, ("reduce_windows_lock_ref_count: wrong value\n"));
469                 TALLOC_FREE(rec);
470                 return;
471         }
472
473         memcpy(&lock_ref_count, value.dptr, sizeof(lock_ref_count));
474
475         SMB_ASSERT(lock_ref_count > 0);
476
477         lock_ref_count -= dcount;
478
479         status = dbwrap_record_store(rec,
480                                      make_tdb_data((uint8 *)&lock_ref_count,
481                                      sizeof(lock_ref_count)), 0);
482
483         SMB_ASSERT(NT_STATUS_IS_OK(status));
484
485         TALLOC_FREE(rec);
486
487         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
488                   fsp_str_dbg(fsp), lock_ref_count));
489 }
490
491 static void decrement_windows_lock_ref_count(files_struct *fsp)
492 {
493         reduce_windows_lock_ref_count(fsp, 1);
494 }
495
496 /****************************************************************************
497  Fetch the lock ref count.
498 ****************************************************************************/
499
500 static int get_windows_lock_ref_count(files_struct *fsp)
501 {
502         struct lock_ref_count_key tmp;
503         TDB_DATA dbuf;
504         NTSTATUS status;
505         int lock_ref_count = 0;
506
507         status = dbwrap_fetch(
508                 posix_pending_close_db, talloc_tos(),
509                 locking_ref_count_key_fsp(fsp, &tmp), &dbuf);
510
511         if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
512                 goto done;
513         }
514
515         if (!NT_STATUS_IS_OK(status)) {
516                 DEBUG(0, ("get_windows_lock_ref_count: Error fetching "
517                           "lock ref count for file %s: %s\n",
518                           fsp_str_dbg(fsp), nt_errstr(status)));
519                 goto done;
520         }
521
522         if (dbuf.dsize != sizeof(lock_ref_count)) {
523                 DEBUG(0, ("get_windows_lock_ref_count: invalid entry "
524                           "in lock ref count record for file %s: "
525                           "(invalid data size %u)\n",
526                           fsp_str_dbg(fsp), (unsigned int)dbuf.dsize));
527                 goto done;
528         }
529
530         memcpy(&lock_ref_count, dbuf.dptr, sizeof(lock_ref_count));
531         TALLOC_FREE(dbuf.dptr);
532
533 done:
534         DEBUG(10,("get_windows_lock_count for file %s = %d\n",
535                   fsp_str_dbg(fsp), lock_ref_count));
536
537         return lock_ref_count;
538 }
539
540 /****************************************************************************
541  Delete a lock_ref_count entry.
542 ****************************************************************************/
543
544 static void delete_windows_lock_ref_count(files_struct *fsp)
545 {
546         struct lock_ref_count_key tmp;
547         struct db_record *rec;
548
549         rec = dbwrap_fetch_locked(
550                 posix_pending_close_db, talloc_tos(),
551                 locking_ref_count_key_fsp(fsp, &tmp));
552
553         SMB_ASSERT(rec != NULL);
554
555         /* Not a bug if it doesn't exist - no locks were ever granted. */
556
557         dbwrap_record_delete(rec);
558         TALLOC_FREE(rec);
559
560         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
561                   fsp_str_dbg(fsp)));
562 }
563
564 /****************************************************************************
565  Add an fd to the pending close tdb.
566 ****************************************************************************/
567
568 static void add_fd_to_close_entry(files_struct *fsp)
569 {
570         struct db_record *rec;
571         uint8_t *new_data;
572         NTSTATUS status;
573         TDB_DATA value;
574
575         rec = dbwrap_fetch_locked(
576                 posix_pending_close_db, talloc_tos(),
577                 fd_array_key_fsp(fsp));
578
579         SMB_ASSERT(rec != NULL);
580
581         value = dbwrap_record_get_value(rec);
582
583         new_data = talloc_array(rec, uint8_t,
584                                 value.dsize + sizeof(fsp->fh->fd));
585
586         SMB_ASSERT(new_data != NULL);
587
588         memcpy(new_data, value.dptr, value.dsize);
589         memcpy(new_data + value.dsize,
590                &fsp->fh->fd, sizeof(fsp->fh->fd));
591
592         status = dbwrap_record_store(
593                 rec, make_tdb_data(new_data,
594                                    value.dsize + sizeof(fsp->fh->fd)), 0);
595
596         SMB_ASSERT(NT_STATUS_IS_OK(status));
597
598         TALLOC_FREE(rec);
599
600         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
601                   fsp->fh->fd, fsp_str_dbg(fsp)));
602 }
603
604 /****************************************************************************
605  Remove all fd entries for a specific dev/inode pair from the tdb.
606 ****************************************************************************/
607
608 static void delete_close_entries(files_struct *fsp)
609 {
610         struct db_record *rec;
611
612         rec = dbwrap_fetch_locked(
613                 posix_pending_close_db, talloc_tos(),
614                 fd_array_key_fsp(fsp));
615
616         SMB_ASSERT(rec != NULL);
617         dbwrap_record_delete(rec);
618         TALLOC_FREE(rec);
619 }
620
621 /****************************************************************************
622  Get the array of POSIX pending close records for an open fsp. Returns number
623  of entries.
624 ****************************************************************************/
625
626 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
627                                               files_struct *fsp, int **entries)
628 {
629         TDB_DATA dbuf;
630         NTSTATUS status;
631
632         status = dbwrap_fetch(
633                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
634                 &dbuf);
635
636         if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
637                 *entries = NULL;
638                 return 0;
639         }
640
641         SMB_ASSERT(NT_STATUS_IS_OK(status));
642
643         if (dbuf.dsize == 0) {
644                 *entries = NULL;
645                 return 0;
646         }
647
648         *entries = (int *)dbuf.dptr;
649         return (size_t)(dbuf.dsize / sizeof(int));
650 }
651
652 /****************************************************************************
653  Deal with pending closes needed by POSIX locking support.
654  Note that posix_locking_close_file() is expected to have been called
655  to delete all locks on this fsp before this function is called.
656 ****************************************************************************/
657
658 int fd_close_posix(struct files_struct *fsp)
659 {
660         int saved_errno = 0;
661         int ret;
662         int *fd_array = NULL;
663         size_t count, i;
664
665         if (!lp_locking(fsp->conn->params) ||
666             !lp_posix_locking(fsp->conn->params))
667         {
668                 /*
669                  * No locking or POSIX to worry about or we want POSIX semantics
670                  * which will lose all locks on all fd's open on this dev/inode,
671                  * just close.
672                  */
673                 return close(fsp->fh->fd);
674         }
675
676         if (get_windows_lock_ref_count(fsp)) {
677
678                 /*
679                  * There are outstanding locks on this dev/inode pair on
680                  * other fds. Add our fd to the pending close tdb and set
681                  * fsp->fh->fd to -1.
682                  */
683
684                 add_fd_to_close_entry(fsp);
685                 return 0;
686         }
687
688         /*
689          * No outstanding locks. Get the pending close fd's
690          * from the tdb and close them all.
691          */
692
693         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
694
695         if (count) {
696                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
697                           (unsigned int)count));
698
699                 for(i = 0; i < count; i++) {
700                         if (close(fd_array[i]) == -1) {
701                                 saved_errno = errno;
702                         }
703                 }
704
705                 /*
706                  * Delete all fd's stored in the tdb
707                  * for this dev/inode pair.
708                  */
709
710                 delete_close_entries(fsp);
711         }
712
713         TALLOC_FREE(fd_array);
714
715         /* Don't need a lock ref count on this dev/ino anymore. */
716         delete_windows_lock_ref_count(fsp);
717
718         /*
719          * Finally close the fd associated with this fsp.
720          */
721
722         ret = close(fsp->fh->fd);
723
724         if (ret == 0 && saved_errno != 0) {
725                 errno = saved_errno;
726                 ret = -1;
727         }
728
729         return ret;
730 }
731
732 /****************************************************************************
733  Next - the functions that deal with the mapping CIFS Windows locks onto
734  the underlying system POSIX locks.
735 ****************************************************************************/
736
737 /*
738  * Structure used when splitting a lock range
739  * into a POSIX lock range. Doubly linked list.
740  */
741
742 struct lock_list {
743         struct lock_list *next;
744         struct lock_list *prev;
745         off_t start;
746         off_t size;
747 };
748
749 /****************************************************************************
750  Create a list of lock ranges that don't overlap a given range. Used in calculating
751  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
752  understand it :-).
753 ****************************************************************************/
754
755 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
756                                                 struct lock_list *lhead,
757                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
758                                                 files_struct *fsp,
759                                                 const struct lock_struct *plocks,
760                                                 int num_locks)
761 {
762         int i;
763
764         /*
765          * Check the current lock list on this dev/inode pair.
766          * Quit if the list is deleted.
767          */
768
769         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
770                 (double)lhead->start, (double)lhead->size ));
771
772         for (i=0; i<num_locks && lhead; i++) {
773                 const struct lock_struct *lock = &plocks[i];
774                 struct lock_list *l_curr;
775
776                 /* Ignore all but read/write locks. */
777                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
778                         continue;
779                 }
780
781                 /* Ignore locks not owned by this process. */
782                 if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
783                         continue;
784                 }
785
786                 /*
787                  * Walk the lock list, checking for overlaps. Note that
788                  * the lock list can expand within this loop if the current
789                  * range being examined needs to be split.
790                  */
791
792                 for (l_curr = lhead; l_curr;) {
793
794                         DEBUG(10,("posix_lock_list: lock: fnum=%llu: start=%.0f,size=%.0f:type=%s",
795                                 (unsigned long long)lock->fnum,
796                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
797
798                         if ( (l_curr->start >= (lock->start + lock->size)) ||
799                                  (lock->start >= (l_curr->start + l_curr->size))) {
800
801                                 /* No overlap with existing lock - leave this range alone. */
802 /*********************************************
803                                              +---------+
804                                              | l_curr  |
805                                              +---------+
806                                 +-------+
807                                 | lock  |
808                                 +-------+
809 OR....
810              +---------+
811              |  l_curr |
812              +---------+
813 **********************************************/
814
815                                 DEBUG(10,(" no overlap case.\n" ));
816
817                                 l_curr = l_curr->next;
818
819                         } else if ( (l_curr->start >= lock->start) &&
820                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
821
822                                 /*
823                                  * This range is completely overlapped by this existing lock range
824                                  * and thus should have no effect. Delete it from the list.
825                                  */
826 /*********************************************
827                 +---------+
828                 |  l_curr |
829                 +---------+
830         +---------------------------+
831         |       lock                |
832         +---------------------------+
833 **********************************************/
834                                 /* Save the next pointer */
835                                 struct lock_list *ul_next = l_curr->next;
836
837                                 DEBUG(10,(" delete case.\n" ));
838
839                                 DLIST_REMOVE(lhead, l_curr);
840                                 if(lhead == NULL) {
841                                         break; /* No more list... */
842                                 }
843
844                                 l_curr = ul_next;
845                                 
846                         } else if ( (l_curr->start >= lock->start) &&
847                                                 (l_curr->start < lock->start + lock->size) &&
848                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
849
850                                 /*
851                                  * This range overlaps the existing lock range at the high end.
852                                  * Truncate by moving start to existing range end and reducing size.
853                                  */
854 /*********************************************
855                 +---------------+
856                 |  l_curr       |
857                 +---------------+
858         +---------------+
859         |    lock       |
860         +---------------+
861 BECOMES....
862                         +-------+
863                         | l_curr|
864                         +-------+
865 **********************************************/
866
867                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
868                                 l_curr->start = lock->start + lock->size;
869
870                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
871                                                                 (double)l_curr->start, (double)l_curr->size ));
872
873                                 l_curr = l_curr->next;
874
875                         } else if ( (l_curr->start < lock->start) &&
876                                                 (l_curr->start + l_curr->size > lock->start) &&
877                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
878
879                                 /*
880                                  * This range overlaps the existing lock range at the low end.
881                                  * Truncate by reducing size.
882                                  */
883 /*********************************************
884    +---------------+
885    |  l_curr       |
886    +---------------+
887            +---------------+
888            |    lock       |
889            +---------------+
890 BECOMES....
891    +-------+
892    | l_curr|
893    +-------+
894 **********************************************/
895
896                                 l_curr->size = lock->start - l_curr->start;
897
898                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
899                                                                 (double)l_curr->start, (double)l_curr->size ));
900
901                                 l_curr = l_curr->next;
902                 
903                         } else if ( (l_curr->start < lock->start) &&
904                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
905                                 /*
906                                  * Worst case scenario. Range completely overlaps an existing
907                                  * lock range. Split the request into two, push the new (upper) request
908                                  * into the dlink list, and continue with the entry after l_new (as we
909                                  * know that l_new will not overlap with this lock).
910                                  */
911 /*********************************************
912         +---------------------------+
913         |        l_curr             |
914         +---------------------------+
915                 +---------+
916                 | lock    |
917                 +---------+
918 BECOMES.....
919         +-------+         +---------+
920         | l_curr|         | l_new   |
921         +-------+         +---------+
922 **********************************************/
923                                 struct lock_list *l_new = talloc(ctx, struct lock_list);
924
925                                 if(l_new == NULL) {
926                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
927                                         return NULL; /* The talloc_destroy takes care of cleanup. */
928                                 }
929
930                                 ZERO_STRUCTP(l_new);
931                                 l_new->start = lock->start + lock->size;
932                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
933
934                                 /* Truncate the l_curr. */
935                                 l_curr->size = lock->start - l_curr->start;
936
937                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
938 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
939                                                                 (double)l_new->start, (double)l_new->size ));
940
941                                 /*
942                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
943                                  */
944                                 DLIST_ADD_AFTER(lhead, l_new, l_curr);
945
946                                 /* And move after the link we added. */
947                                 l_curr = l_new->next;
948
949                         } else {
950
951                                 /*
952                                  * This logic case should never happen. Ensure this is the
953                                  * case by forcing an abort.... Remove in production.
954                                  */
955                                 char *msg = NULL;
956
957                                 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
958 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
959                                         smb_panic(msg);
960                                 } else {
961                                         smb_panic("posix_lock_list");
962                                 }
963                         }
964                 } /* end for ( l_curr = lhead; l_curr;) */
965         } /* end for (i=0; i<num_locks && ul_head; i++) */
966
967         return lhead;
968 }
969
970 /****************************************************************************
971  POSIX function to acquire a lock. Returns True if the
972  lock could be granted, False if not.
973 ****************************************************************************/
974
975 bool set_posix_lock_windows_flavour(files_struct *fsp,
976                         uint64_t u_offset,
977                         uint64_t u_count,
978                         enum brl_type lock_type,
979                         const struct lock_context *lock_ctx,
980                         const struct lock_struct *plocks,
981                         int num_locks,
982                         int *errno_ret)
983 {
984         off_t offset;
985         off_t count;
986         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
987         bool ret = True;
988         size_t lock_count;
989         TALLOC_CTX *l_ctx = NULL;
990         struct lock_list *llist = NULL;
991         struct lock_list *ll = NULL;
992
993         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
994                  "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
995                  (double)u_offset, (double)u_count,
996                  posix_lock_type_name(lock_type)));
997
998         /*
999          * If the requested lock won't fit in the POSIX range, we will
1000          * pretend it was successful.
1001          */
1002
1003         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1004                 increment_windows_lock_ref_count(fsp);
1005                 return True;
1006         }
1007
1008         /*
1009          * Windows is very strange. It allows read locks to be overlayed
1010          * (even over a write lock), but leaves the write lock in force until the first
1011          * unlock. It also reference counts the locks. This means the following sequence :
1012          *
1013          * process1                                      process2
1014          * ------------------------------------------------------------------------
1015          * WRITE LOCK : start = 2, len = 10
1016          *                                            READ LOCK: start =0, len = 10 - FAIL
1017          * READ LOCK : start = 0, len = 14 
1018          *                                            READ LOCK: start =0, len = 10 - FAIL
1019          * UNLOCK : start = 2, len = 10
1020          *                                            READ LOCK: start =0, len = 10 - OK
1021          *
1022          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1023          * would leave a single read lock over the 0-14 region.
1024          */
1025         
1026         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1027                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
1028                 return False;
1029         }
1030
1031         if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
1032                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1033                 talloc_destroy(l_ctx);
1034                 return False;
1035         }
1036
1037         /*
1038          * Create the initial list entry containing the
1039          * lock we want to add.
1040          */
1041
1042         ZERO_STRUCTP(ll);
1043         ll->start = offset;
1044         ll->size = count;
1045
1046         DLIST_ADD(llist, ll);
1047
1048         /*
1049          * The following call calculates if there are any
1050          * overlapping locks held by this process on
1051          * fd's open on the same file and splits this list
1052          * into a list of lock ranges that do not overlap with existing
1053          * POSIX locks.
1054          */
1055
1056         llist = posix_lock_list(l_ctx,
1057                                 llist,
1058                                 lock_ctx, /* Lock context llist belongs to. */
1059                                 fsp,
1060                                 plocks,
1061                                 num_locks);
1062
1063         /*
1064          * Add the POSIX locks on the list of ranges returned.
1065          * As the lock is supposed to be added atomically, we need to
1066          * back out all the locks if any one of these calls fail.
1067          */
1068
1069         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1070                 offset = ll->start;
1071                 count = ll->size;
1072
1073                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1074                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1075
1076                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1077                         *errno_ret = errno;
1078                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1079                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1080                         ret = False;
1081                         break;
1082                 }
1083         }
1084
1085         if (!ret) {
1086
1087                 /*
1088                  * Back out all the POSIX locks we have on fail.
1089                  */
1090
1091                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1092                         offset = ll->start;
1093                         count = ll->size;
1094
1095                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1096                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1097
1098                         posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1099                 }
1100         } else {
1101                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1102                 increment_windows_lock_ref_count(fsp);
1103         }
1104
1105         talloc_destroy(l_ctx);
1106         return ret;
1107 }
1108
1109 /****************************************************************************
1110  POSIX function to release a lock. Returns True if the
1111  lock could be released, False if not.
1112 ****************************************************************************/
1113
1114 bool release_posix_lock_windows_flavour(files_struct *fsp,
1115                                 uint64_t u_offset,
1116                                 uint64_t u_count,
1117                                 enum brl_type deleted_lock_type,
1118                                 const struct lock_context *lock_ctx,
1119                                 const struct lock_struct *plocks,
1120                                 int num_locks)
1121 {
1122         off_t offset;
1123         off_t count;
1124         bool ret = True;
1125         TALLOC_CTX *ul_ctx = NULL;
1126         struct lock_list *ulist = NULL;
1127         struct lock_list *ul = NULL;
1128
1129         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1130                  "count = %.0f\n", fsp_str_dbg(fsp),
1131                  (double)u_offset, (double)u_count));
1132
1133         /* Remember the number of Windows locks we have on this dev/ino pair. */
1134         decrement_windows_lock_ref_count(fsp);
1135
1136         /*
1137          * If the requested lock won't fit in the POSIX range, we will
1138          * pretend it was successful.
1139          */
1140
1141         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1142                 return True;
1143         }
1144
1145         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1146                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1147                 return False;
1148         }
1149
1150         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1151                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1152                 talloc_destroy(ul_ctx);
1153                 return False;
1154         }
1155
1156         /*
1157          * Create the initial list entry containing the
1158          * lock we want to remove.
1159          */
1160
1161         ZERO_STRUCTP(ul);
1162         ul->start = offset;
1163         ul->size = count;
1164
1165         DLIST_ADD(ulist, ul);
1166
1167         /*
1168          * The following call calculates if there are any
1169          * overlapping locks held by this process on
1170          * fd's open on the same file and creates a
1171          * list of unlock ranges that will allow
1172          * POSIX lock ranges to remain on the file whilst the
1173          * unlocks are performed.
1174          */
1175
1176         ulist = posix_lock_list(ul_ctx,
1177                                 ulist,
1178                                 lock_ctx, /* Lock context ulist belongs to. */
1179                                 fsp,
1180                                 plocks,
1181                                 num_locks);
1182
1183         /*
1184          * If there were any overlapped entries (list is > 1 or size or start have changed),
1185          * and the lock_type we just deleted from
1186          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1187          * the POSIX lock to a read lock. This allows any overlapping read locks
1188          * to be atomically maintained.
1189          */
1190
1191         if (deleted_lock_type == WRITE_LOCK &&
1192                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1193
1194                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1195                         (double)offset, (double)count ));
1196
1197                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1198                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1199                         talloc_destroy(ul_ctx);
1200                         return False;
1201                 }
1202         }
1203
1204         /*
1205          * Release the POSIX locks on the list of ranges returned.
1206          */
1207
1208         for(; ulist; ulist = ulist->next) {
1209                 offset = ulist->start;
1210                 count = ulist->size;
1211
1212                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1213                         (double)offset, (double)count ));
1214
1215                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1216                         ret = False;
1217                 }
1218         }
1219
1220         talloc_destroy(ul_ctx);
1221         return ret;
1222 }
1223
1224 /****************************************************************************
1225  Next - the functions that deal with mapping CIFS POSIX locks onto
1226  the underlying system POSIX locks.
1227 ****************************************************************************/
1228
1229 /****************************************************************************
1230  POSIX function to acquire a lock. Returns True if the
1231  lock could be granted, False if not.
1232  As POSIX locks don't stack or conflict (they just overwrite)
1233  we can map the requested lock directly onto a system one. We
1234  know it doesn't conflict with locks on other contexts as the
1235  upper layer would have refused it.
1236 ****************************************************************************/
1237
1238 bool set_posix_lock_posix_flavour(files_struct *fsp,
1239                         uint64_t u_offset,
1240                         uint64_t u_count,
1241                         enum brl_type lock_type,
1242                         int *errno_ret)
1243 {
1244         off_t offset;
1245         off_t count;
1246         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1247
1248         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count "
1249                  "= %.0f, type = %s\n", fsp_str_dbg(fsp),
1250                  (double)u_offset, (double)u_count,
1251                  posix_lock_type_name(lock_type)));
1252
1253         /*
1254          * If the requested lock won't fit in the POSIX range, we will
1255          * pretend it was successful.
1256          */
1257
1258         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1259                 return True;
1260         }
1261
1262         if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1263                 *errno_ret = errno;
1264                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1265                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1266                 return False;
1267         }
1268         return True;
1269 }
1270
1271 /****************************************************************************
1272  POSIX function to release a lock. Returns True if the
1273  lock could be released, False if not.
1274  We are given a complete lock state from the upper layer which is what the lock
1275  state should be after the unlock has already been done, so what
1276  we do is punch out holes in the unlock range where locks owned by this process
1277  have a different lock context.
1278 ****************************************************************************/
1279
1280 bool release_posix_lock_posix_flavour(files_struct *fsp,
1281                                 uint64_t u_offset,
1282                                 uint64_t u_count,
1283                                 const struct lock_context *lock_ctx,
1284                                 const struct lock_struct *plocks,
1285                                 int num_locks)
1286 {
1287         bool ret = True;
1288         off_t offset;
1289         off_t count;
1290         TALLOC_CTX *ul_ctx = NULL;
1291         struct lock_list *ulist = NULL;
1292         struct lock_list *ul = NULL;
1293
1294         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1295                  "count = %.0f\n", fsp_str_dbg(fsp),
1296                  (double)u_offset, (double)u_count));
1297
1298         /*
1299          * If the requested lock won't fit in the POSIX range, we will
1300          * pretend it was successful.
1301          */
1302
1303         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1304                 return True;
1305         }
1306
1307         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1308                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1309                 return False;
1310         }
1311
1312         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1313                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1314                 talloc_destroy(ul_ctx);
1315                 return False;
1316         }
1317
1318         /*
1319          * Create the initial list entry containing the
1320          * lock we want to remove.
1321          */
1322
1323         ZERO_STRUCTP(ul);
1324         ul->start = offset;
1325         ul->size = count;
1326
1327         DLIST_ADD(ulist, ul);
1328
1329         /*
1330          * Walk the given array creating a linked list
1331          * of unlock requests.
1332          */
1333
1334         ulist = posix_lock_list(ul_ctx,
1335                                 ulist,
1336                                 lock_ctx, /* Lock context ulist belongs to. */
1337                                 fsp,
1338                                 plocks,
1339                                 num_locks);
1340
1341         /*
1342          * Release the POSIX locks on the list of ranges returned.
1343          */
1344
1345         for(; ulist; ulist = ulist->next) {
1346                 offset = ulist->start;
1347                 count = ulist->size;
1348
1349                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1350                         (double)offset, (double)count ));
1351
1352                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1353                         ret = False;
1354                 }
1355         }
1356
1357         talloc_destroy(ul_ctx);
1358         return ret;
1359 }