build: Remove SMB_OFF_T, replace with off_t
[mat/samba.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2006
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19    Revision History:
20
21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
22 */
23
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "locking/proto.h"
27 #include "dbwrap/dbwrap.h"
28 #include "dbwrap/dbwrap_rbt.h"
29 #include "util_tdb.h"
30
31 #undef DBGC_CLASS
32 #define DBGC_CLASS DBGC_LOCKING
33
34 /*
35  * The pending close database handle.
36  */
37
38 static struct db_context *posix_pending_close_db;
39
40 /****************************************************************************
41  First - the functions that deal with the underlying system locks - these
42  functions are used no matter if we're mapping CIFS Windows locks or CIFS
43  POSIX locks onto POSIX.
44 ****************************************************************************/
45
46 /****************************************************************************
47  Utility function to map a lock type correctly depending on the open
48  mode of a file.
49 ****************************************************************************/
50
51 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
52 {
53         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
54                 /*
55                  * Many UNIX's cannot get a write lock on a file opened read-only.
56                  * Win32 locking semantics allow this.
57                  * Do the best we can and attempt a read-only lock.
58                  */
59                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
60                 return F_RDLCK;
61         }
62
63         /*
64          * This return should be the most normal, as we attempt
65          * to always open files read/write.
66          */
67
68         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
69 }
70
71 /****************************************************************************
72  Debugging aid :-).
73 ****************************************************************************/
74
75 static const char *posix_lock_type_name(int lock_type)
76 {
77         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
78 }
79
80 /****************************************************************************
81  Check to see if the given unsigned lock range is within the possible POSIX
82  range. Modifies the given args to be in range if possible, just returns
83  False if not.
84 ****************************************************************************/
85
86 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
87                                 uint64_t u_offset, uint64_t u_count)
88 {
89         off_t offset = (off_t)u_offset;
90         off_t count = (off_t)u_count;
91
92         /*
93          * For the type of system we are, attempt to
94          * find the maximum positive lock offset as an off_t.
95          */
96
97 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
98
99         off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
100 #else
101         /*
102          * In this case off_t is 64 bits,
103          * and the underlying system can handle 64 bit signed locks.
104          */
105
106         off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
107         off_t mask = (mask2<<1);
108         off_t max_positive_lock_offset = ~mask;
109
110 #endif
111         /*
112          * POSIX locks of length zero mean lock to end-of-file.
113          * Win32 locks of length zero are point probes. Ignore
114          * any Win32 locks of length zero. JRA.
115          */
116
117         if (count == (off_t)0) {
118                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
119                 return False;
120         }
121
122         /*
123          * If the given offset was > max_positive_lock_offset then we cannot map this at all
124          * ignore this lock.
125          */
126
127         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
128                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
129                                 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
130                 return False;
131         }
132
133         /*
134          * We must truncate the count to less than max_positive_lock_offset.
135          */
136
137         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
138                 count = max_positive_lock_offset;
139         }
140
141         /*
142          * Truncate count to end at max lock offset.
143          */
144
145         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
146                 count = max_positive_lock_offset - offset;
147         }
148
149         /*
150          * If we ate all the count, ignore this lock.
151          */
152
153         if (count == 0) {
154                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
155                                 (double)u_offset, (double)u_count ));
156                 return False;
157         }
158
159         /*
160          * The mapping was successful.
161          */
162
163         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
164                         (double)offset, (double)count ));
165
166         *offset_out = offset;
167         *count_out = count;
168         
169         return True;
170 }
171
172 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
173                        struct files_struct *fsp, int op, off_t offset,
174                        off_t count, int type)
175 {
176         VFS_FIND(lock);
177         return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
178 }
179
180 /****************************************************************************
181  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
182  broken NFS implementations.
183 ****************************************************************************/
184
185 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
186 {
187         bool ret;
188
189         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
190
191         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
192
193         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
194
195                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
196                                         (double)offset,(double)count));
197                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
198                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
199
200                 /*
201                  * If the offset is > 0x7FFFFFFF then this will cause problems on
202                  * 32 bit NFS mounted filesystems. Just ignore it.
203                  */
204
205                 if (offset & ~((off_t)0x7fffffff)) {
206                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
207                         return True;
208                 }
209
210                 if (count & ~((off_t)0x7fffffff)) {
211                         /* 32 bit NFS file system, retry with smaller offset */
212                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
213                         errno = 0;
214                         count &= 0x7fffffff;
215                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
216                 }
217         }
218
219         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
220         return ret;
221 }
222
223 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
224                           struct files_struct *fsp, off_t *poffset,
225                           off_t *pcount, int *ptype, pid_t *ppid)
226 {
227         VFS_FIND(getlock);
228         return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype, 
229                                        ppid);
230 }
231
232 /****************************************************************************
233  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
234  broken NFS implementations.
235 ****************************************************************************/
236
237 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
238 {
239         pid_t pid;
240         bool ret;
241
242         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
243                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
244
245         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
246
247         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
248
249                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
250                                         (double)*poffset,(double)*pcount));
251                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
252                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
253
254                 /*
255                  * If the offset is > 0x7FFFFFFF then this will cause problems on
256                  * 32 bit NFS mounted filesystems. Just ignore it.
257                  */
258
259                 if (*poffset & ~((off_t)0x7fffffff)) {
260                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
261                         return True;
262                 }
263
264                 if (*pcount & ~((off_t)0x7fffffff)) {
265                         /* 32 bit NFS file system, retry with smaller offset */
266                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
267                         errno = 0;
268                         *pcount &= 0x7fffffff;
269                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
270                 }
271         }
272
273         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
274         return ret;
275 }
276
277 /****************************************************************************
278  POSIX function to see if a file region is locked. Returns True if the
279  region is locked, False otherwise.
280 ****************************************************************************/
281
282 bool is_posix_locked(files_struct *fsp,
283                         uint64_t *pu_offset,
284                         uint64_t *pu_count,
285                         enum brl_type *plock_type,
286                         enum brl_flavour lock_flav)
287 {
288         off_t offset;
289         off_t count;
290         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
291
292         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
293                   "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
294                   (double)*pu_count,  posix_lock_type_name(*plock_type)));
295
296         /*
297          * If the requested lock won't fit in the POSIX range, we will
298          * never set it, so presume it is not locked.
299          */
300
301         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
302                 return False;
303         }
304
305         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
306                 return False;
307         }
308
309         if (posix_lock_type == F_UNLCK) {
310                 return False;
311         }
312
313         if (lock_flav == POSIX_LOCK) {
314                 /* Only POSIX lock queries need to know the details. */
315                 *pu_offset = (uint64_t)offset;
316                 *pu_count = (uint64_t)count;
317                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
318         }
319         return True;
320 }
321
322 /****************************************************************************
323  Next - the functions that deal with in memory database storing representations
324  of either Windows CIFS locks or POSIX CIFS locks.
325 ****************************************************************************/
326
327 /* The key used in the in-memory POSIX databases. */
328
329 struct lock_ref_count_key {
330         struct file_id id;
331         char r;
332 }; 
333
334 /*******************************************************************
335  Form a static locking key for a dev/inode pair for the lock ref count
336 ******************************************************************/
337
338 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
339                                           struct lock_ref_count_key *tmp)
340 {
341         ZERO_STRUCTP(tmp);
342         tmp->id = fsp->file_id;
343         tmp->r = 'r';
344         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
345 }
346
347 /*******************************************************************
348  Convenience function to get an fd_array key from an fsp.
349 ******************************************************************/
350
351 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
352 {
353         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
354 }
355
356 /*******************************************************************
357  Create the in-memory POSIX lock databases.
358 ********************************************************************/
359
360 bool posix_locking_init(bool read_only)
361 {
362         if (posix_pending_close_db != NULL) {
363                 return true;
364         }
365
366         posix_pending_close_db = db_open_rbt(NULL);
367
368         if (posix_pending_close_db == NULL) {
369                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
370                 return false;
371         }
372
373         return true;
374 }
375
376 /*******************************************************************
377  Delete the in-memory POSIX lock databases.
378 ********************************************************************/
379
380 bool posix_locking_end(void)
381 {
382         /*
383          * Shouldn't we close all fd's here?
384          */
385         TALLOC_FREE(posix_pending_close_db);
386         return true;
387 }
388
389 /****************************************************************************
390  Next - the functions that deal with storing fd's that have outstanding
391  POSIX locks when closed.
392 ****************************************************************************/
393
394 /****************************************************************************
395  The records in posix_pending_close_tdb are composed of an array of ints
396  keyed by dev/ino pair.
397  The first int is a reference count of the number of outstanding locks on
398  all open fd's on this dev/ino pair. Any subsequent ints are the fd's that
399  were open on this dev/ino pair that should have been closed, but can't as
400  the lock ref count is non zero.
401 ****************************************************************************/
402
403 /****************************************************************************
404  Keep a reference count of the number of Windows locks open on this dev/ino
405  pair. Creates entry if it doesn't exist.
406 ****************************************************************************/
407
408 static void increment_windows_lock_ref_count(files_struct *fsp)
409 {
410         struct lock_ref_count_key tmp;
411         struct db_record *rec;
412         int lock_ref_count = 0;
413         NTSTATUS status;
414         TDB_DATA value;
415
416         rec = dbwrap_fetch_locked(
417                 posix_pending_close_db, talloc_tos(),
418                 locking_ref_count_key_fsp(fsp, &tmp));
419
420         SMB_ASSERT(rec != NULL);
421
422         value = dbwrap_record_get_value(rec);
423
424         if (value.dptr != NULL) {
425                 SMB_ASSERT(value.dsize == sizeof(lock_ref_count));
426                 memcpy(&lock_ref_count, value.dptr,
427                        sizeof(lock_ref_count));
428         }
429
430         lock_ref_count++;
431
432         status = dbwrap_record_store(rec,
433                                      make_tdb_data((uint8 *)&lock_ref_count,
434                                      sizeof(lock_ref_count)), 0);
435
436         SMB_ASSERT(NT_STATUS_IS_OK(status));
437
438         TALLOC_FREE(rec);
439
440         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
441                   fsp_str_dbg(fsp), lock_ref_count));
442 }
443
444 /****************************************************************************
445  Bulk delete - subtract as many locks as we've just deleted.
446 ****************************************************************************/
447
448 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
449 {
450         struct lock_ref_count_key tmp;
451         struct db_record *rec;
452         int lock_ref_count = 0;
453         NTSTATUS status;
454         TDB_DATA value;
455
456         rec = dbwrap_fetch_locked(
457                 posix_pending_close_db, talloc_tos(),
458                 locking_ref_count_key_fsp(fsp, &tmp));
459
460         value = dbwrap_record_get_value(rec);
461
462         SMB_ASSERT((rec != NULL)
463                    && (value.dptr != NULL)
464                    && (value.dsize == sizeof(lock_ref_count)));
465
466         memcpy(&lock_ref_count, value.dptr, sizeof(lock_ref_count));
467
468         SMB_ASSERT(lock_ref_count > 0);
469
470         lock_ref_count -= dcount;
471
472         status = dbwrap_record_store(rec,
473                                      make_tdb_data((uint8 *)&lock_ref_count,
474                                      sizeof(lock_ref_count)), 0);
475
476         SMB_ASSERT(NT_STATUS_IS_OK(status));
477
478         TALLOC_FREE(rec);
479
480         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
481                   fsp_str_dbg(fsp), lock_ref_count));
482 }
483
484 static void decrement_windows_lock_ref_count(files_struct *fsp)
485 {
486         reduce_windows_lock_ref_count(fsp, 1);
487 }
488
489 /****************************************************************************
490  Fetch the lock ref count.
491 ****************************************************************************/
492
493 static int get_windows_lock_ref_count(files_struct *fsp)
494 {
495         struct lock_ref_count_key tmp;
496         TDB_DATA dbuf;
497         NTSTATUS status;
498         int lock_ref_count = 0;
499
500         status = dbwrap_fetch(
501                 posix_pending_close_db, talloc_tos(),
502                 locking_ref_count_key_fsp(fsp, &tmp), &dbuf);
503
504         if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
505                 goto done;
506         }
507
508         if (!NT_STATUS_IS_OK(status)) {
509                 DEBUG(0, ("get_windows_lock_ref_count: Error fetching "
510                           "lock ref count for file %s: %s\n",
511                           fsp_str_dbg(fsp), nt_errstr(status)));
512                 goto done;
513         }
514
515         if (dbuf.dsize != sizeof(lock_ref_count)) {
516                 DEBUG(0, ("get_windows_lock_ref_count: invalid entry "
517                           "in lock ref count record for file %s: "
518                           "(invalid data size %u)\n",
519                           fsp_str_dbg(fsp), (unsigned int)dbuf.dsize));
520                 goto done;
521         }
522
523         memcpy(&lock_ref_count, dbuf.dptr, sizeof(lock_ref_count));
524         TALLOC_FREE(dbuf.dptr);
525
526 done:
527         DEBUG(10,("get_windows_lock_count for file %s = %d\n",
528                   fsp_str_dbg(fsp), lock_ref_count));
529
530         return lock_ref_count;
531 }
532
533 /****************************************************************************
534  Delete a lock_ref_count entry.
535 ****************************************************************************/
536
537 static void delete_windows_lock_ref_count(files_struct *fsp)
538 {
539         struct lock_ref_count_key tmp;
540         struct db_record *rec;
541
542         rec = dbwrap_fetch_locked(
543                 posix_pending_close_db, talloc_tos(),
544                 locking_ref_count_key_fsp(fsp, &tmp));
545
546         SMB_ASSERT(rec != NULL);
547
548         /* Not a bug if it doesn't exist - no locks were ever granted. */
549
550         dbwrap_record_delete(rec);
551         TALLOC_FREE(rec);
552
553         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
554                   fsp_str_dbg(fsp)));
555 }
556
557 /****************************************************************************
558  Add an fd to the pending close tdb.
559 ****************************************************************************/
560
561 static void add_fd_to_close_entry(files_struct *fsp)
562 {
563         struct db_record *rec;
564         uint8_t *new_data;
565         NTSTATUS status;
566         TDB_DATA value;
567
568         rec = dbwrap_fetch_locked(
569                 posix_pending_close_db, talloc_tos(),
570                 fd_array_key_fsp(fsp));
571
572         SMB_ASSERT(rec != NULL);
573
574         value = dbwrap_record_get_value(rec);
575
576         new_data = talloc_array(rec, uint8_t,
577                                 value.dsize + sizeof(fsp->fh->fd));
578
579         SMB_ASSERT(new_data != NULL);
580
581         memcpy(new_data, value.dptr, value.dsize);
582         memcpy(new_data + value.dsize,
583                &fsp->fh->fd, sizeof(fsp->fh->fd));
584
585         status = dbwrap_record_store(
586                 rec, make_tdb_data(new_data,
587                                    value.dsize + sizeof(fsp->fh->fd)), 0);
588
589         SMB_ASSERT(NT_STATUS_IS_OK(status));
590
591         TALLOC_FREE(rec);
592
593         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
594                   fsp->fh->fd, fsp_str_dbg(fsp)));
595 }
596
597 /****************************************************************************
598  Remove all fd entries for a specific dev/inode pair from the tdb.
599 ****************************************************************************/
600
601 static void delete_close_entries(files_struct *fsp)
602 {
603         struct db_record *rec;
604
605         rec = dbwrap_fetch_locked(
606                 posix_pending_close_db, talloc_tos(),
607                 fd_array_key_fsp(fsp));
608
609         SMB_ASSERT(rec != NULL);
610         dbwrap_record_delete(rec);
611         TALLOC_FREE(rec);
612 }
613
614 /****************************************************************************
615  Get the array of POSIX pending close records for an open fsp. Returns number
616  of entries.
617 ****************************************************************************/
618
619 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
620                                               files_struct *fsp, int **entries)
621 {
622         TDB_DATA dbuf;
623         NTSTATUS status;
624
625         status = dbwrap_fetch(
626                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
627                 &dbuf);
628
629         if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
630                 *entries = NULL;
631                 return 0;
632         }
633
634         SMB_ASSERT(NT_STATUS_IS_OK(status));
635
636         if (dbuf.dsize == 0) {
637                 *entries = NULL;
638                 return 0;
639         }
640
641         *entries = (int *)dbuf.dptr;
642         return (size_t)(dbuf.dsize / sizeof(int));
643 }
644
645 /****************************************************************************
646  Deal with pending closes needed by POSIX locking support.
647  Note that posix_locking_close_file() is expected to have been called
648  to delete all locks on this fsp before this function is called.
649 ****************************************************************************/
650
651 int fd_close_posix(struct files_struct *fsp)
652 {
653         int saved_errno = 0;
654         int ret;
655         int *fd_array = NULL;
656         size_t count, i;
657
658         if (!lp_locking(fsp->conn->params) ||
659             !lp_posix_locking(fsp->conn->params))
660         {
661                 /*
662                  * No locking or POSIX to worry about or we want POSIX semantics
663                  * which will lose all locks on all fd's open on this dev/inode,
664                  * just close.
665                  */
666                 return close(fsp->fh->fd);
667         }
668
669         if (get_windows_lock_ref_count(fsp)) {
670
671                 /*
672                  * There are outstanding locks on this dev/inode pair on
673                  * other fds. Add our fd to the pending close tdb and set
674                  * fsp->fh->fd to -1.
675                  */
676
677                 add_fd_to_close_entry(fsp);
678                 return 0;
679         }
680
681         /*
682          * No outstanding locks. Get the pending close fd's
683          * from the tdb and close them all.
684          */
685
686         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
687
688         if (count) {
689                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
690                           (unsigned int)count));
691
692                 for(i = 0; i < count; i++) {
693                         if (close(fd_array[i]) == -1) {
694                                 saved_errno = errno;
695                         }
696                 }
697
698                 /*
699                  * Delete all fd's stored in the tdb
700                  * for this dev/inode pair.
701                  */
702
703                 delete_close_entries(fsp);
704         }
705
706         TALLOC_FREE(fd_array);
707
708         /* Don't need a lock ref count on this dev/ino anymore. */
709         delete_windows_lock_ref_count(fsp);
710
711         /*
712          * Finally close the fd associated with this fsp.
713          */
714
715         ret = close(fsp->fh->fd);
716
717         if (ret == 0 && saved_errno != 0) {
718                 errno = saved_errno;
719                 ret = -1;
720         }
721
722         return ret;
723 }
724
725 /****************************************************************************
726  Next - the functions that deal with the mapping CIFS Windows locks onto
727  the underlying system POSIX locks.
728 ****************************************************************************/
729
730 /*
731  * Structure used when splitting a lock range
732  * into a POSIX lock range. Doubly linked list.
733  */
734
735 struct lock_list {
736         struct lock_list *next;
737         struct lock_list *prev;
738         off_t start;
739         off_t size;
740 };
741
742 /****************************************************************************
743  Create a list of lock ranges that don't overlap a given range. Used in calculating
744  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
745  understand it :-).
746 ****************************************************************************/
747
748 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
749                                                 struct lock_list *lhead,
750                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
751                                                 files_struct *fsp,
752                                                 const struct lock_struct *plocks,
753                                                 int num_locks)
754 {
755         int i;
756
757         /*
758          * Check the current lock list on this dev/inode pair.
759          * Quit if the list is deleted.
760          */
761
762         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
763                 (double)lhead->start, (double)lhead->size ));
764
765         for (i=0; i<num_locks && lhead; i++) {
766                 const struct lock_struct *lock = &plocks[i];
767                 struct lock_list *l_curr;
768
769                 /* Ignore all but read/write locks. */
770                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
771                         continue;
772                 }
773
774                 /* Ignore locks not owned by this process. */
775                 if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
776                         continue;
777                 }
778
779                 /*
780                  * Walk the lock list, checking for overlaps. Note that
781                  * the lock list can expand within this loop if the current
782                  * range being examined needs to be split.
783                  */
784
785                 for (l_curr = lhead; l_curr;) {
786
787                         DEBUG(10,("posix_lock_list: lock: fnum=%d: start=%.0f,size=%.0f:type=%s", lock->fnum,
788                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
789
790                         if ( (l_curr->start >= (lock->start + lock->size)) ||
791                                  (lock->start >= (l_curr->start + l_curr->size))) {
792
793                                 /* No overlap with existing lock - leave this range alone. */
794 /*********************************************
795                                              +---------+
796                                              | l_curr  |
797                                              +---------+
798                                 +-------+
799                                 | lock  |
800                                 +-------+
801 OR....
802              +---------+
803              |  l_curr |
804              +---------+
805 **********************************************/
806
807                                 DEBUG(10,(" no overlap case.\n" ));
808
809                                 l_curr = l_curr->next;
810
811                         } else if ( (l_curr->start >= lock->start) &&
812                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
813
814                                 /*
815                                  * This range is completely overlapped by this existing lock range
816                                  * and thus should have no effect. Delete it from the list.
817                                  */
818 /*********************************************
819                 +---------+
820                 |  l_curr |
821                 +---------+
822         +---------------------------+
823         |       lock                |
824         +---------------------------+
825 **********************************************/
826                                 /* Save the next pointer */
827                                 struct lock_list *ul_next = l_curr->next;
828
829                                 DEBUG(10,(" delete case.\n" ));
830
831                                 DLIST_REMOVE(lhead, l_curr);
832                                 if(lhead == NULL) {
833                                         break; /* No more list... */
834                                 }
835
836                                 l_curr = ul_next;
837                                 
838                         } else if ( (l_curr->start >= lock->start) &&
839                                                 (l_curr->start < lock->start + lock->size) &&
840                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
841
842                                 /*
843                                  * This range overlaps the existing lock range at the high end.
844                                  * Truncate by moving start to existing range end and reducing size.
845                                  */
846 /*********************************************
847                 +---------------+
848                 |  l_curr       |
849                 +---------------+
850         +---------------+
851         |    lock       |
852         +---------------+
853 BECOMES....
854                         +-------+
855                         | l_curr|
856                         +-------+
857 **********************************************/
858
859                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
860                                 l_curr->start = lock->start + lock->size;
861
862                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
863                                                                 (double)l_curr->start, (double)l_curr->size ));
864
865                                 l_curr = l_curr->next;
866
867                         } else if ( (l_curr->start < lock->start) &&
868                                                 (l_curr->start + l_curr->size > lock->start) &&
869                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
870
871                                 /*
872                                  * This range overlaps the existing lock range at the low end.
873                                  * Truncate by reducing size.
874                                  */
875 /*********************************************
876    +---------------+
877    |  l_curr       |
878    +---------------+
879            +---------------+
880            |    lock       |
881            +---------------+
882 BECOMES....
883    +-------+
884    | l_curr|
885    +-------+
886 **********************************************/
887
888                                 l_curr->size = lock->start - l_curr->start;
889
890                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
891                                                                 (double)l_curr->start, (double)l_curr->size ));
892
893                                 l_curr = l_curr->next;
894                 
895                         } else if ( (l_curr->start < lock->start) &&
896                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
897                                 /*
898                                  * Worst case scenario. Range completely overlaps an existing
899                                  * lock range. Split the request into two, push the new (upper) request
900                                  * into the dlink list, and continue with the entry after l_new (as we
901                                  * know that l_new will not overlap with this lock).
902                                  */
903 /*********************************************
904         +---------------------------+
905         |        l_curr             |
906         +---------------------------+
907                 +---------+
908                 | lock    |
909                 +---------+
910 BECOMES.....
911         +-------+         +---------+
912         | l_curr|         | l_new   |
913         +-------+         +---------+
914 **********************************************/
915                                 struct lock_list *l_new = talloc(ctx, struct lock_list);
916
917                                 if(l_new == NULL) {
918                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
919                                         return NULL; /* The talloc_destroy takes care of cleanup. */
920                                 }
921
922                                 ZERO_STRUCTP(l_new);
923                                 l_new->start = lock->start + lock->size;
924                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
925
926                                 /* Truncate the l_curr. */
927                                 l_curr->size = lock->start - l_curr->start;
928
929                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
930 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
931                                                                 (double)l_new->start, (double)l_new->size ));
932
933                                 /*
934                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
935                                  */
936                                 DLIST_ADD_AFTER(lhead, l_new, l_curr);
937
938                                 /* And move after the link we added. */
939                                 l_curr = l_new->next;
940
941                         } else {
942
943                                 /*
944                                  * This logic case should never happen. Ensure this is the
945                                  * case by forcing an abort.... Remove in production.
946                                  */
947                                 char *msg = NULL;
948
949                                 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
950 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
951                                         smb_panic(msg);
952                                 } else {
953                                         smb_panic("posix_lock_list");
954                                 }
955                         }
956                 } /* end for ( l_curr = lhead; l_curr;) */
957         } /* end for (i=0; i<num_locks && ul_head; i++) */
958
959         return lhead;
960 }
961
962 /****************************************************************************
963  POSIX function to acquire a lock. Returns True if the
964  lock could be granted, False if not.
965 ****************************************************************************/
966
967 bool set_posix_lock_windows_flavour(files_struct *fsp,
968                         uint64_t u_offset,
969                         uint64_t u_count,
970                         enum brl_type lock_type,
971                         const struct lock_context *lock_ctx,
972                         const struct lock_struct *plocks,
973                         int num_locks,
974                         int *errno_ret)
975 {
976         off_t offset;
977         off_t count;
978         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
979         bool ret = True;
980         size_t lock_count;
981         TALLOC_CTX *l_ctx = NULL;
982         struct lock_list *llist = NULL;
983         struct lock_list *ll = NULL;
984
985         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
986                  "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
987                  (double)u_offset, (double)u_count,
988                  posix_lock_type_name(lock_type)));
989
990         /*
991          * If the requested lock won't fit in the POSIX range, we will
992          * pretend it was successful.
993          */
994
995         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
996                 increment_windows_lock_ref_count(fsp);
997                 return True;
998         }
999
1000         /*
1001          * Windows is very strange. It allows read locks to be overlayed
1002          * (even over a write lock), but leaves the write lock in force until the first
1003          * unlock. It also reference counts the locks. This means the following sequence :
1004          *
1005          * process1                                      process2
1006          * ------------------------------------------------------------------------
1007          * WRITE LOCK : start = 2, len = 10
1008          *                                            READ LOCK: start =0, len = 10 - FAIL
1009          * READ LOCK : start = 0, len = 14 
1010          *                                            READ LOCK: start =0, len = 10 - FAIL
1011          * UNLOCK : start = 2, len = 10
1012          *                                            READ LOCK: start =0, len = 10 - OK
1013          *
1014          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1015          * would leave a single read lock over the 0-14 region.
1016          */
1017         
1018         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1019                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
1020                 return False;
1021         }
1022
1023         if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
1024                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1025                 talloc_destroy(l_ctx);
1026                 return False;
1027         }
1028
1029         /*
1030          * Create the initial list entry containing the
1031          * lock we want to add.
1032          */
1033
1034         ZERO_STRUCTP(ll);
1035         ll->start = offset;
1036         ll->size = count;
1037
1038         DLIST_ADD(llist, ll);
1039
1040         /*
1041          * The following call calculates if there are any
1042          * overlapping locks held by this process on
1043          * fd's open on the same file and splits this list
1044          * into a list of lock ranges that do not overlap with existing
1045          * POSIX locks.
1046          */
1047
1048         llist = posix_lock_list(l_ctx,
1049                                 llist,
1050                                 lock_ctx, /* Lock context llist belongs to. */
1051                                 fsp,
1052                                 plocks,
1053                                 num_locks);
1054
1055         /*
1056          * Add the POSIX locks on the list of ranges returned.
1057          * As the lock is supposed to be added atomically, we need to
1058          * back out all the locks if any one of these calls fail.
1059          */
1060
1061         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1062                 offset = ll->start;
1063                 count = ll->size;
1064
1065                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1066                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1067
1068                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1069                         *errno_ret = errno;
1070                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1071                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1072                         ret = False;
1073                         break;
1074                 }
1075         }
1076
1077         if (!ret) {
1078
1079                 /*
1080                  * Back out all the POSIX locks we have on fail.
1081                  */
1082
1083                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1084                         offset = ll->start;
1085                         count = ll->size;
1086
1087                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1088                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1089
1090                         posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1091                 }
1092         } else {
1093                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1094                 increment_windows_lock_ref_count(fsp);
1095         }
1096
1097         talloc_destroy(l_ctx);
1098         return ret;
1099 }
1100
1101 /****************************************************************************
1102  POSIX function to release a lock. Returns True if the
1103  lock could be released, False if not.
1104 ****************************************************************************/
1105
1106 bool release_posix_lock_windows_flavour(files_struct *fsp,
1107                                 uint64_t u_offset,
1108                                 uint64_t u_count,
1109                                 enum brl_type deleted_lock_type,
1110                                 const struct lock_context *lock_ctx,
1111                                 const struct lock_struct *plocks,
1112                                 int num_locks)
1113 {
1114         off_t offset;
1115         off_t count;
1116         bool ret = True;
1117         TALLOC_CTX *ul_ctx = NULL;
1118         struct lock_list *ulist = NULL;
1119         struct lock_list *ul = NULL;
1120
1121         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1122                  "count = %.0f\n", fsp_str_dbg(fsp),
1123                  (double)u_offset, (double)u_count));
1124
1125         /* Remember the number of Windows locks we have on this dev/ino pair. */
1126         decrement_windows_lock_ref_count(fsp);
1127
1128         /*
1129          * If the requested lock won't fit in the POSIX range, we will
1130          * pretend it was successful.
1131          */
1132
1133         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1134                 return True;
1135         }
1136
1137         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1138                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1139                 return False;
1140         }
1141
1142         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1143                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1144                 talloc_destroy(ul_ctx);
1145                 return False;
1146         }
1147
1148         /*
1149          * Create the initial list entry containing the
1150          * lock we want to remove.
1151          */
1152
1153         ZERO_STRUCTP(ul);
1154         ul->start = offset;
1155         ul->size = count;
1156
1157         DLIST_ADD(ulist, ul);
1158
1159         /*
1160          * The following call calculates if there are any
1161          * overlapping locks held by this process on
1162          * fd's open on the same file and creates a
1163          * list of unlock ranges that will allow
1164          * POSIX lock ranges to remain on the file whilst the
1165          * unlocks are performed.
1166          */
1167
1168         ulist = posix_lock_list(ul_ctx,
1169                                 ulist,
1170                                 lock_ctx, /* Lock context ulist belongs to. */
1171                                 fsp,
1172                                 plocks,
1173                                 num_locks);
1174
1175         /*
1176          * If there were any overlapped entries (list is > 1 or size or start have changed),
1177          * and the lock_type we just deleted from
1178          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1179          * the POSIX lock to a read lock. This allows any overlapping read locks
1180          * to be atomically maintained.
1181          */
1182
1183         if (deleted_lock_type == WRITE_LOCK &&
1184                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1185
1186                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1187                         (double)offset, (double)count ));
1188
1189                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1190                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1191                         talloc_destroy(ul_ctx);
1192                         return False;
1193                 }
1194         }
1195
1196         /*
1197          * Release the POSIX locks on the list of ranges returned.
1198          */
1199
1200         for(; ulist; ulist = ulist->next) {
1201                 offset = ulist->start;
1202                 count = ulist->size;
1203
1204                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1205                         (double)offset, (double)count ));
1206
1207                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1208                         ret = False;
1209                 }
1210         }
1211
1212         talloc_destroy(ul_ctx);
1213         return ret;
1214 }
1215
1216 /****************************************************************************
1217  Next - the functions that deal with mapping CIFS POSIX locks onto
1218  the underlying system POSIX locks.
1219 ****************************************************************************/
1220
1221 /****************************************************************************
1222  POSIX function to acquire a lock. Returns True if the
1223  lock could be granted, False if not.
1224  As POSIX locks don't stack or conflict (they just overwrite)
1225  we can map the requested lock directly onto a system one. We
1226  know it doesn't conflict with locks on other contexts as the
1227  upper layer would have refused it.
1228 ****************************************************************************/
1229
1230 bool set_posix_lock_posix_flavour(files_struct *fsp,
1231                         uint64_t u_offset,
1232                         uint64_t u_count,
1233                         enum brl_type lock_type,
1234                         int *errno_ret)
1235 {
1236         off_t offset;
1237         off_t count;
1238         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1239
1240         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count "
1241                  "= %.0f, type = %s\n", fsp_str_dbg(fsp),
1242                  (double)u_offset, (double)u_count,
1243                  posix_lock_type_name(lock_type)));
1244
1245         /*
1246          * If the requested lock won't fit in the POSIX range, we will
1247          * pretend it was successful.
1248          */
1249
1250         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1251                 return True;
1252         }
1253
1254         if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1255                 *errno_ret = errno;
1256                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1257                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1258                 return False;
1259         }
1260         return True;
1261 }
1262
1263 /****************************************************************************
1264  POSIX function to release a lock. Returns True if the
1265  lock could be released, False if not.
1266  We are given a complete lock state from the upper layer which is what the lock
1267  state should be after the unlock has already been done, so what
1268  we do is punch out holes in the unlock range where locks owned by this process
1269  have a different lock context.
1270 ****************************************************************************/
1271
1272 bool release_posix_lock_posix_flavour(files_struct *fsp,
1273                                 uint64_t u_offset,
1274                                 uint64_t u_count,
1275                                 const struct lock_context *lock_ctx,
1276                                 const struct lock_struct *plocks,
1277                                 int num_locks)
1278 {
1279         bool ret = True;
1280         off_t offset;
1281         off_t count;
1282         TALLOC_CTX *ul_ctx = NULL;
1283         struct lock_list *ulist = NULL;
1284         struct lock_list *ul = NULL;
1285
1286         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1287                  "count = %.0f\n", fsp_str_dbg(fsp),
1288                  (double)u_offset, (double)u_count));
1289
1290         /*
1291          * If the requested lock won't fit in the POSIX range, we will
1292          * pretend it was successful.
1293          */
1294
1295         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1296                 return True;
1297         }
1298
1299         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1300                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1301                 return False;
1302         }
1303
1304         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1305                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1306                 talloc_destroy(ul_ctx);
1307                 return False;
1308         }
1309
1310         /*
1311          * Create the initial list entry containing the
1312          * lock we want to remove.
1313          */
1314
1315         ZERO_STRUCTP(ul);
1316         ul->start = offset;
1317         ul->size = count;
1318
1319         DLIST_ADD(ulist, ul);
1320
1321         /*
1322          * Walk the given array creating a linked list
1323          * of unlock requests.
1324          */
1325
1326         ulist = posix_lock_list(ul_ctx,
1327                                 ulist,
1328                                 lock_ctx, /* Lock context ulist belongs to. */
1329                                 fsp,
1330                                 plocks,
1331                                 num_locks);
1332
1333         /*
1334          * Release the POSIX locks on the list of ranges returned.
1335          */
1336
1337         for(; ulist; ulist = ulist->next) {
1338                 offset = ulist->start;
1339                 count = ulist->size;
1340
1341                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1342                         (double)offset, (double)count ));
1343
1344                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1345                         ret = False;
1346                 }
1347         }
1348
1349         talloc_destroy(ul_ctx);
1350         return ret;
1351 }