s3-includes: only include system/filesys.h when needed.
[samba.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2006
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19    Revision History:
20
21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
22 */
23
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "dbwrap.h"
27
28 #undef DBGC_CLASS
29 #define DBGC_CLASS DBGC_LOCKING
30
31 /*
32  * The pending close database handle.
33  */
34
35 static struct db_context *posix_pending_close_db;
36
37 /****************************************************************************
38  First - the functions that deal with the underlying system locks - these
39  functions are used no matter if we're mapping CIFS Windows locks or CIFS
40  POSIX locks onto POSIX.
41 ****************************************************************************/
42
43 /****************************************************************************
44  Utility function to map a lock type correctly depending on the open
45  mode of a file.
46 ****************************************************************************/
47
48 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
49 {
50         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
51                 /*
52                  * Many UNIX's cannot get a write lock on a file opened read-only.
53                  * Win32 locking semantics allow this.
54                  * Do the best we can and attempt a read-only lock.
55                  */
56                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
57                 return F_RDLCK;
58         }
59
60         /*
61          * This return should be the most normal, as we attempt
62          * to always open files read/write.
63          */
64
65         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
66 }
67
68 /****************************************************************************
69  Debugging aid :-).
70 ****************************************************************************/
71
72 static const char *posix_lock_type_name(int lock_type)
73 {
74         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
75 }
76
77 /****************************************************************************
78  Check to see if the given unsigned lock range is within the possible POSIX
79  range. Modifies the given args to be in range if possible, just returns
80  False if not.
81 ****************************************************************************/
82
83 static bool posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
84                                 uint64_t u_offset, uint64_t u_count)
85 {
86         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
87         SMB_OFF_T count = (SMB_OFF_T)u_count;
88
89         /*
90          * For the type of system we are, attempt to
91          * find the maximum positive lock offset as an SMB_OFF_T.
92          */
93
94 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
95
96         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
97
98 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
99
100         /*
101          * In this case SMB_OFF_T is 64 bits,
102          * and the underlying system can handle 64 bit signed locks.
103          */
104
105         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
106         SMB_OFF_T mask = (mask2<<1);
107         SMB_OFF_T max_positive_lock_offset = ~mask;
108
109 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
110
111         /*
112          * In this case either SMB_OFF_T is 32 bits,
113          * or the underlying system cannot handle 64 bit signed locks.
114          * All offsets & counts must be 2^31 or less.
115          */
116
117         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
118
119 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
120
121         /*
122          * POSIX locks of length zero mean lock to end-of-file.
123          * Win32 locks of length zero are point probes. Ignore
124          * any Win32 locks of length zero. JRA.
125          */
126
127         if (count == (SMB_OFF_T)0) {
128                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
129                 return False;
130         }
131
132         /*
133          * If the given offset was > max_positive_lock_offset then we cannot map this at all
134          * ignore this lock.
135          */
136
137         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
138                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
139                                 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
140                 return False;
141         }
142
143         /*
144          * We must truncate the count to less than max_positive_lock_offset.
145          */
146
147         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
148                 count = max_positive_lock_offset;
149         }
150
151         /*
152          * Truncate count to end at max lock offset.
153          */
154
155         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
156                 count = max_positive_lock_offset - offset;
157         }
158
159         /*
160          * If we ate all the count, ignore this lock.
161          */
162
163         if (count == 0) {
164                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
165                                 (double)u_offset, (double)u_count ));
166                 return False;
167         }
168
169         /*
170          * The mapping was successful.
171          */
172
173         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
174                         (double)offset, (double)count ));
175
176         *offset_out = offset;
177         *count_out = count;
178         
179         return True;
180 }
181
182 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
183                        struct files_struct *fsp, int op, SMB_OFF_T offset,
184                        SMB_OFF_T count, int type)
185 {
186         VFS_FIND(lock);
187         return handle->fns->lock(handle, fsp, op, offset, count, type);
188 }
189
190 /****************************************************************************
191  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
192  broken NFS implementations.
193 ****************************************************************************/
194
195 static bool posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
196 {
197         bool ret;
198
199         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
200
201         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
202
203         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
204
205                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
206                                         (double)offset,(double)count));
207                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
208                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
209
210                 /*
211                  * If the offset is > 0x7FFFFFFF then this will cause problems on
212                  * 32 bit NFS mounted filesystems. Just ignore it.
213                  */
214
215                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
216                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
217                         return True;
218                 }
219
220                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
221                         /* 32 bit NFS file system, retry with smaller offset */
222                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
223                         errno = 0;
224                         count &= 0x7fffffff;
225                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
226                 }
227         }
228
229         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
230         return ret;
231 }
232
233 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
234                           struct files_struct *fsp, SMB_OFF_T *poffset,
235                           SMB_OFF_T *pcount, int *ptype, pid_t *ppid)
236 {
237         VFS_FIND(getlock);
238         return handle->fns->getlock(handle, fsp, poffset, pcount, ptype, ppid);
239 }
240
241 /****************************************************************************
242  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
243  broken NFS implementations.
244 ****************************************************************************/
245
246 static bool posix_fcntl_getlock(files_struct *fsp, SMB_OFF_T *poffset, SMB_OFF_T *pcount, int *ptype)
247 {
248         pid_t pid;
249         bool ret;
250
251         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
252                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
253
254         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
255
256         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
257
258                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
259                                         (double)*poffset,(double)*pcount));
260                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
261                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
262
263                 /*
264                  * If the offset is > 0x7FFFFFFF then this will cause problems on
265                  * 32 bit NFS mounted filesystems. Just ignore it.
266                  */
267
268                 if (*poffset & ~((SMB_OFF_T)0x7fffffff)) {
269                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
270                         return True;
271                 }
272
273                 if (*pcount & ~((SMB_OFF_T)0x7fffffff)) {
274                         /* 32 bit NFS file system, retry with smaller offset */
275                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
276                         errno = 0;
277                         *pcount &= 0x7fffffff;
278                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
279                 }
280         }
281
282         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
283         return ret;
284 }
285
286 /****************************************************************************
287  POSIX function to see if a file region is locked. Returns True if the
288  region is locked, False otherwise.
289 ****************************************************************************/
290
291 bool is_posix_locked(files_struct *fsp,
292                         uint64_t *pu_offset,
293                         uint64_t *pu_count,
294                         enum brl_type *plock_type,
295                         enum brl_flavour lock_flav)
296 {
297         SMB_OFF_T offset;
298         SMB_OFF_T count;
299         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
300
301         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
302                   "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
303                   (double)*pu_count,  posix_lock_type_name(*plock_type)));
304
305         /*
306          * If the requested lock won't fit in the POSIX range, we will
307          * never set it, so presume it is not locked.
308          */
309
310         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
311                 return False;
312         }
313
314         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
315                 return False;
316         }
317
318         if (posix_lock_type == F_UNLCK) {
319                 return False;
320         }
321
322         if (lock_flav == POSIX_LOCK) {
323                 /* Only POSIX lock queries need to know the details. */
324                 *pu_offset = (uint64_t)offset;
325                 *pu_count = (uint64_t)count;
326                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
327         }
328         return True;
329 }
330
331 /****************************************************************************
332  Next - the functions that deal with in memory database storing representations
333  of either Windows CIFS locks or POSIX CIFS locks.
334 ****************************************************************************/
335
336 /* The key used in the in-memory POSIX databases. */
337
338 struct lock_ref_count_key {
339         struct file_id id;
340         char r;
341 }; 
342
343 /*******************************************************************
344  Form a static locking key for a dev/inode pair for the lock ref count
345 ******************************************************************/
346
347 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
348                                           struct lock_ref_count_key *tmp)
349 {
350         ZERO_STRUCTP(tmp);
351         tmp->id = fsp->file_id;
352         tmp->r = 'r';
353         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
354 }
355
356 /*******************************************************************
357  Convenience function to get an fd_array key from an fsp.
358 ******************************************************************/
359
360 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
361 {
362         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
363 }
364
365 /*******************************************************************
366  Create the in-memory POSIX lock databases.
367 ********************************************************************/
368
369 bool posix_locking_init(bool read_only)
370 {
371         if (posix_pending_close_db != NULL) {
372                 return true;
373         }
374
375         posix_pending_close_db = db_open_rbt(NULL);
376
377         if (posix_pending_close_db == NULL) {
378                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
379                 return false;
380         }
381
382         return true;
383 }
384
385 /*******************************************************************
386  Delete the in-memory POSIX lock databases.
387 ********************************************************************/
388
389 bool posix_locking_end(void)
390 {
391         /*
392          * Shouldn't we close all fd's here?
393          */
394         TALLOC_FREE(posix_pending_close_db);
395         return true;
396 }
397
398 /****************************************************************************
399  Next - the functions that deal with storing fd's that have outstanding
400  POSIX locks when closed.
401 ****************************************************************************/
402
403 /****************************************************************************
404  The records in posix_pending_close_tdb are composed of an array of ints
405  keyed by dev/ino pair.
406  The first int is a reference count of the number of outstanding locks on
407  all open fd's on this dev/ino pair. Any subsequent ints are the fd's that
408  were open on this dev/ino pair that should have been closed, but can't as
409  the lock ref count is non zero.
410 ****************************************************************************/
411
412 /****************************************************************************
413  Keep a reference count of the number of Windows locks open on this dev/ino
414  pair. Creates entry if it doesn't exist.
415 ****************************************************************************/
416
417 static void increment_windows_lock_ref_count(files_struct *fsp)
418 {
419         struct lock_ref_count_key tmp;
420         struct db_record *rec;
421         int lock_ref_count = 0;
422         NTSTATUS status;
423
424         rec = posix_pending_close_db->fetch_locked(
425                 posix_pending_close_db, talloc_tos(),
426                 locking_ref_count_key_fsp(fsp, &tmp));
427
428         SMB_ASSERT(rec != NULL);
429
430         if (rec->value.dptr != NULL) {
431                 SMB_ASSERT(rec->value.dsize == sizeof(lock_ref_count));
432                 memcpy(&lock_ref_count, rec->value.dptr,
433                        sizeof(lock_ref_count));
434         }
435
436         lock_ref_count++;
437
438         status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
439                                                sizeof(lock_ref_count)), 0);
440
441         SMB_ASSERT(NT_STATUS_IS_OK(status));
442
443         TALLOC_FREE(rec);
444
445         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
446                   fsp_str_dbg(fsp), lock_ref_count));
447 }
448
449 /****************************************************************************
450  Bulk delete - subtract as many locks as we've just deleted.
451 ****************************************************************************/
452
453 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
454 {
455         struct lock_ref_count_key tmp;
456         struct db_record *rec;
457         int lock_ref_count = 0;
458         NTSTATUS status;
459
460         rec = posix_pending_close_db->fetch_locked(
461                 posix_pending_close_db, talloc_tos(),
462                 locking_ref_count_key_fsp(fsp, &tmp));
463
464         SMB_ASSERT((rec != NULL)
465                    && (rec->value.dptr != NULL)
466                    && (rec->value.dsize == sizeof(lock_ref_count)));
467
468         memcpy(&lock_ref_count, rec->value.dptr, sizeof(lock_ref_count));
469
470         SMB_ASSERT(lock_ref_count > 0);
471
472         lock_ref_count -= dcount;
473
474         status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
475                                                sizeof(lock_ref_count)), 0);
476
477         SMB_ASSERT(NT_STATUS_IS_OK(status));
478
479         TALLOC_FREE(rec);
480
481         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
482                   fsp_str_dbg(fsp), lock_ref_count));
483 }
484
485 static void decrement_windows_lock_ref_count(files_struct *fsp)
486 {
487         reduce_windows_lock_ref_count(fsp, 1);
488 }
489
490 /****************************************************************************
491  Fetch the lock ref count.
492 ****************************************************************************/
493
494 static int get_windows_lock_ref_count(files_struct *fsp)
495 {
496         struct lock_ref_count_key tmp;
497         TDB_DATA dbuf;
498         int res;
499         int lock_ref_count = 0;
500
501         res = posix_pending_close_db->fetch(
502                 posix_pending_close_db, talloc_tos(),
503                 locking_ref_count_key_fsp(fsp, &tmp), &dbuf);
504
505         SMB_ASSERT(res == 0);
506
507         if (dbuf.dsize != 0) {
508                 SMB_ASSERT(dbuf.dsize == sizeof(lock_ref_count));
509                 memcpy(&lock_ref_count, dbuf.dptr, sizeof(lock_ref_count));
510                 TALLOC_FREE(dbuf.dptr);
511         }
512
513         DEBUG(10,("get_windows_lock_count for file %s = %d\n",
514                   fsp_str_dbg(fsp), lock_ref_count));
515
516         return lock_ref_count;
517 }
518
519 /****************************************************************************
520  Delete a lock_ref_count entry.
521 ****************************************************************************/
522
523 static void delete_windows_lock_ref_count(files_struct *fsp)
524 {
525         struct lock_ref_count_key tmp;
526         struct db_record *rec;
527
528         rec = posix_pending_close_db->fetch_locked(
529                 posix_pending_close_db, talloc_tos(),
530                 locking_ref_count_key_fsp(fsp, &tmp));
531
532         SMB_ASSERT(rec != NULL);
533
534         /* Not a bug if it doesn't exist - no locks were ever granted. */
535
536         rec->delete_rec(rec);
537         TALLOC_FREE(rec);
538
539         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
540                   fsp_str_dbg(fsp)));
541 }
542
543 /****************************************************************************
544  Add an fd to the pending close tdb.
545 ****************************************************************************/
546
547 static void add_fd_to_close_entry(files_struct *fsp)
548 {
549         struct db_record *rec;
550         uint8_t *new_data;
551         NTSTATUS status;
552
553         rec = posix_pending_close_db->fetch_locked(
554                 posix_pending_close_db, talloc_tos(),
555                 fd_array_key_fsp(fsp));
556
557         SMB_ASSERT(rec != NULL);
558
559         new_data = TALLOC_ARRAY(
560                 rec, uint8_t, rec->value.dsize + sizeof(fsp->fh->fd));
561
562         SMB_ASSERT(new_data != NULL);
563
564         memcpy(new_data, rec->value.dptr, rec->value.dsize);
565         memcpy(new_data + rec->value.dsize,
566                &fsp->fh->fd, sizeof(fsp->fh->fd));
567
568         status = rec->store(
569                 rec, make_tdb_data(new_data,
570                                    rec->value.dsize + sizeof(fsp->fh->fd)), 0);
571
572         SMB_ASSERT(NT_STATUS_IS_OK(status));
573
574         TALLOC_FREE(rec);
575
576         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
577                   fsp->fh->fd, fsp_str_dbg(fsp)));
578 }
579
580 /****************************************************************************
581  Remove all fd entries for a specific dev/inode pair from the tdb.
582 ****************************************************************************/
583
584 static void delete_close_entries(files_struct *fsp)
585 {
586         struct db_record *rec;
587
588         rec = posix_pending_close_db->fetch_locked(
589                 posix_pending_close_db, talloc_tos(),
590                 fd_array_key_fsp(fsp));
591
592         SMB_ASSERT(rec != NULL);
593         rec->delete_rec(rec);
594         TALLOC_FREE(rec);
595 }
596
597 /****************************************************************************
598  Get the array of POSIX pending close records for an open fsp. Returns number
599  of entries.
600 ****************************************************************************/
601
602 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
603                                               files_struct *fsp, int **entries)
604 {
605         TDB_DATA dbuf;
606         int res;
607
608         res = posix_pending_close_db->fetch(
609                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
610                 &dbuf);
611
612         SMB_ASSERT(res == 0);
613
614         if (dbuf.dsize == 0) {
615                 *entries = NULL;
616                 return 0;
617         }
618
619         *entries = (int *)dbuf.dptr;
620         return (size_t)(dbuf.dsize / sizeof(int));
621 }
622
623 /****************************************************************************
624  Deal with pending closes needed by POSIX locking support.
625  Note that posix_locking_close_file() is expected to have been called
626  to delete all locks on this fsp before this function is called.
627 ****************************************************************************/
628
629 int fd_close_posix(struct files_struct *fsp)
630 {
631         int saved_errno = 0;
632         int ret;
633         int *fd_array = NULL;
634         size_t count, i;
635
636         if (!lp_locking(fsp->conn->params) ||
637             !lp_posix_locking(fsp->conn->params))
638         {
639                 /*
640                  * No locking or POSIX to worry about or we want POSIX semantics
641                  * which will lose all locks on all fd's open on this dev/inode,
642                  * just close.
643                  */
644                 return close(fsp->fh->fd);
645         }
646
647         if (get_windows_lock_ref_count(fsp)) {
648
649                 /*
650                  * There are outstanding locks on this dev/inode pair on
651                  * other fds. Add our fd to the pending close tdb and set
652                  * fsp->fh->fd to -1.
653                  */
654
655                 add_fd_to_close_entry(fsp);
656                 return 0;
657         }
658
659         /*
660          * No outstanding locks. Get the pending close fd's
661          * from the tdb and close them all.
662          */
663
664         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
665
666         if (count) {
667                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
668                           (unsigned int)count));
669
670                 for(i = 0; i < count; i++) {
671                         if (close(fd_array[i]) == -1) {
672                                 saved_errno = errno;
673                         }
674                 }
675
676                 /*
677                  * Delete all fd's stored in the tdb
678                  * for this dev/inode pair.
679                  */
680
681                 delete_close_entries(fsp);
682         }
683
684         TALLOC_FREE(fd_array);
685
686         /* Don't need a lock ref count on this dev/ino anymore. */
687         delete_windows_lock_ref_count(fsp);
688
689         /*
690          * Finally close the fd associated with this fsp.
691          */
692
693         ret = close(fsp->fh->fd);
694
695         if (ret == 0 && saved_errno != 0) {
696                 errno = saved_errno;
697                 ret = -1;
698         }
699
700         return ret;
701 }
702
703 /****************************************************************************
704  Next - the functions that deal with the mapping CIFS Windows locks onto
705  the underlying system POSIX locks.
706 ****************************************************************************/
707
708 /*
709  * Structure used when splitting a lock range
710  * into a POSIX lock range. Doubly linked list.
711  */
712
713 struct lock_list {
714         struct lock_list *next;
715         struct lock_list *prev;
716         SMB_OFF_T start;
717         SMB_OFF_T size;
718 };
719
720 /****************************************************************************
721  Create a list of lock ranges that don't overlap a given range. Used in calculating
722  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
723  understand it :-).
724 ****************************************************************************/
725
726 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
727                                                 struct lock_list *lhead,
728                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
729                                                 files_struct *fsp,
730                                                 const struct lock_struct *plocks,
731                                                 int num_locks)
732 {
733         int i;
734
735         /*
736          * Check the current lock list on this dev/inode pair.
737          * Quit if the list is deleted.
738          */
739
740         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
741                 (double)lhead->start, (double)lhead->size ));
742
743         for (i=0; i<num_locks && lhead; i++) {
744                 const struct lock_struct *lock = &plocks[i];
745                 struct lock_list *l_curr;
746
747                 /* Ignore all but read/write locks. */
748                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
749                         continue;
750                 }
751
752                 /* Ignore locks not owned by this process. */
753                 if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
754                         continue;
755                 }
756
757                 /*
758                  * Walk the lock list, checking for overlaps. Note that
759                  * the lock list can expand within this loop if the current
760                  * range being examined needs to be split.
761                  */
762
763                 for (l_curr = lhead; l_curr;) {
764
765                         DEBUG(10,("posix_lock_list: lock: fnum=%d: start=%.0f,size=%.0f:type=%s", lock->fnum,
766                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
767
768                         if ( (l_curr->start >= (lock->start + lock->size)) ||
769                                  (lock->start >= (l_curr->start + l_curr->size))) {
770
771                                 /* No overlap with existing lock - leave this range alone. */
772 /*********************************************
773                                              +---------+
774                                              | l_curr  |
775                                              +---------+
776                                 +-------+
777                                 | lock  |
778                                 +-------+
779 OR....
780              +---------+
781              |  l_curr |
782              +---------+
783 **********************************************/
784
785                                 DEBUG(10,(" no overlap case.\n" ));
786
787                                 l_curr = l_curr->next;
788
789                         } else if ( (l_curr->start >= lock->start) &&
790                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
791
792                                 /*
793                                  * This range is completely overlapped by this existing lock range
794                                  * and thus should have no effect. Delete it from the list.
795                                  */
796 /*********************************************
797                 +---------+
798                 |  l_curr |
799                 +---------+
800         +---------------------------+
801         |       lock                |
802         +---------------------------+
803 **********************************************/
804                                 /* Save the next pointer */
805                                 struct lock_list *ul_next = l_curr->next;
806
807                                 DEBUG(10,(" delete case.\n" ));
808
809                                 DLIST_REMOVE(lhead, l_curr);
810                                 if(lhead == NULL) {
811                                         break; /* No more list... */
812                                 }
813
814                                 l_curr = ul_next;
815                                 
816                         } else if ( (l_curr->start >= lock->start) &&
817                                                 (l_curr->start < lock->start + lock->size) &&
818                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
819
820                                 /*
821                                  * This range overlaps the existing lock range at the high end.
822                                  * Truncate by moving start to existing range end and reducing size.
823                                  */
824 /*********************************************
825                 +---------------+
826                 |  l_curr       |
827                 +---------------+
828         +---------------+
829         |    lock       |
830         +---------------+
831 BECOMES....
832                         +-------+
833                         | l_curr|
834                         +-------+
835 **********************************************/
836
837                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
838                                 l_curr->start = lock->start + lock->size;
839
840                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
841                                                                 (double)l_curr->start, (double)l_curr->size ));
842
843                                 l_curr = l_curr->next;
844
845                         } else if ( (l_curr->start < lock->start) &&
846                                                 (l_curr->start + l_curr->size > lock->start) &&
847                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
848
849                                 /*
850                                  * This range overlaps the existing lock range at the low end.
851                                  * Truncate by reducing size.
852                                  */
853 /*********************************************
854    +---------------+
855    |  l_curr       |
856    +---------------+
857            +---------------+
858            |    lock       |
859            +---------------+
860 BECOMES....
861    +-------+
862    | l_curr|
863    +-------+
864 **********************************************/
865
866                                 l_curr->size = lock->start - l_curr->start;
867
868                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
869                                                                 (double)l_curr->start, (double)l_curr->size ));
870
871                                 l_curr = l_curr->next;
872                 
873                         } else if ( (l_curr->start < lock->start) &&
874                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
875                                 /*
876                                  * Worst case scenario. Range completely overlaps an existing
877                                  * lock range. Split the request into two, push the new (upper) request
878                                  * into the dlink list, and continue with the entry after l_new (as we
879                                  * know that l_new will not overlap with this lock).
880                                  */
881 /*********************************************
882         +---------------------------+
883         |        l_curr             |
884         +---------------------------+
885                 +---------+
886                 | lock    |
887                 +---------+
888 BECOMES.....
889         +-------+         +---------+
890         | l_curr|         | l_new   |
891         +-------+         +---------+
892 **********************************************/
893                                 struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
894
895                                 if(l_new == NULL) {
896                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
897                                         return NULL; /* The talloc_destroy takes care of cleanup. */
898                                 }
899
900                                 ZERO_STRUCTP(l_new);
901                                 l_new->start = lock->start + lock->size;
902                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
903
904                                 /* Truncate the l_curr. */
905                                 l_curr->size = lock->start - l_curr->start;
906
907                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
908 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
909                                                                 (double)l_new->start, (double)l_new->size ));
910
911                                 /*
912                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
913                                  */
914                                 DLIST_ADD_AFTER(lhead, l_new, l_curr);
915
916                                 /* And move after the link we added. */
917                                 l_curr = l_new->next;
918
919                         } else {
920
921                                 /*
922                                  * This logic case should never happen. Ensure this is the
923                                  * case by forcing an abort.... Remove in production.
924                                  */
925                                 char *msg = NULL;
926
927                                 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
928 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
929                                         smb_panic(msg);
930                                 } else {
931                                         smb_panic("posix_lock_list");
932                                 }
933                         }
934                 } /* end for ( l_curr = lhead; l_curr;) */
935         } /* end for (i=0; i<num_locks && ul_head; i++) */
936
937         return lhead;
938 }
939
940 /****************************************************************************
941  POSIX function to acquire a lock. Returns True if the
942  lock could be granted, False if not.
943 ****************************************************************************/
944
945 bool set_posix_lock_windows_flavour(files_struct *fsp,
946                         uint64_t u_offset,
947                         uint64_t u_count,
948                         enum brl_type lock_type,
949                         const struct lock_context *lock_ctx,
950                         const struct lock_struct *plocks,
951                         int num_locks,
952                         int *errno_ret)
953 {
954         SMB_OFF_T offset;
955         SMB_OFF_T count;
956         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
957         bool ret = True;
958         size_t lock_count;
959         TALLOC_CTX *l_ctx = NULL;
960         struct lock_list *llist = NULL;
961         struct lock_list *ll = NULL;
962
963         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
964                  "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
965                  (double)u_offset, (double)u_count,
966                  posix_lock_type_name(lock_type)));
967
968         /*
969          * If the requested lock won't fit in the POSIX range, we will
970          * pretend it was successful.
971          */
972
973         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
974                 increment_windows_lock_ref_count(fsp);
975                 return True;
976         }
977
978         /*
979          * Windows is very strange. It allows read locks to be overlayed
980          * (even over a write lock), but leaves the write lock in force until the first
981          * unlock. It also reference counts the locks. This means the following sequence :
982          *
983          * process1                                      process2
984          * ------------------------------------------------------------------------
985          * WRITE LOCK : start = 2, len = 10
986          *                                            READ LOCK: start =0, len = 10 - FAIL
987          * READ LOCK : start = 0, len = 14 
988          *                                            READ LOCK: start =0, len = 10 - FAIL
989          * UNLOCK : start = 2, len = 10
990          *                                            READ LOCK: start =0, len = 10 - OK
991          *
992          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
993          * would leave a single read lock over the 0-14 region.
994          */
995         
996         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
997                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
998                 return False;
999         }
1000
1001         if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
1002                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1003                 talloc_destroy(l_ctx);
1004                 return False;
1005         }
1006
1007         /*
1008          * Create the initial list entry containing the
1009          * lock we want to add.
1010          */
1011
1012         ZERO_STRUCTP(ll);
1013         ll->start = offset;
1014         ll->size = count;
1015
1016         DLIST_ADD(llist, ll);
1017
1018         /*
1019          * The following call calculates if there are any
1020          * overlapping locks held by this process on
1021          * fd's open on the same file and splits this list
1022          * into a list of lock ranges that do not overlap with existing
1023          * POSIX locks.
1024          */
1025
1026         llist = posix_lock_list(l_ctx,
1027                                 llist,
1028                                 lock_ctx, /* Lock context llist belongs to. */
1029                                 fsp,
1030                                 plocks,
1031                                 num_locks);
1032
1033         /*
1034          * Add the POSIX locks on the list of ranges returned.
1035          * As the lock is supposed to be added atomically, we need to
1036          * back out all the locks if any one of these calls fail.
1037          */
1038
1039         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1040                 offset = ll->start;
1041                 count = ll->size;
1042
1043                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1044                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1045
1046                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1047                         *errno_ret = errno;
1048                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1049                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1050                         ret = False;
1051                         break;
1052                 }
1053         }
1054
1055         if (!ret) {
1056
1057                 /*
1058                  * Back out all the POSIX locks we have on fail.
1059                  */
1060
1061                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1062                         offset = ll->start;
1063                         count = ll->size;
1064
1065                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1066                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1067
1068                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1069                 }
1070         } else {
1071                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1072                 increment_windows_lock_ref_count(fsp);
1073         }
1074
1075         talloc_destroy(l_ctx);
1076         return ret;
1077 }
1078
1079 /****************************************************************************
1080  POSIX function to release a lock. Returns True if the
1081  lock could be released, False if not.
1082 ****************************************************************************/
1083
1084 bool release_posix_lock_windows_flavour(files_struct *fsp,
1085                                 uint64_t u_offset,
1086                                 uint64_t u_count,
1087                                 enum brl_type deleted_lock_type,
1088                                 const struct lock_context *lock_ctx,
1089                                 const struct lock_struct *plocks,
1090                                 int num_locks)
1091 {
1092         SMB_OFF_T offset;
1093         SMB_OFF_T count;
1094         bool ret = True;
1095         TALLOC_CTX *ul_ctx = NULL;
1096         struct lock_list *ulist = NULL;
1097         struct lock_list *ul = NULL;
1098
1099         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1100                  "count = %.0f\n", fsp_str_dbg(fsp),
1101                  (double)u_offset, (double)u_count));
1102
1103         /* Remember the number of Windows locks we have on this dev/ino pair. */
1104         decrement_windows_lock_ref_count(fsp);
1105
1106         /*
1107          * If the requested lock won't fit in the POSIX range, we will
1108          * pretend it was successful.
1109          */
1110
1111         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1112                 return True;
1113         }
1114
1115         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1116                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1117                 return False;
1118         }
1119
1120         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1121                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1122                 talloc_destroy(ul_ctx);
1123                 return False;
1124         }
1125
1126         /*
1127          * Create the initial list entry containing the
1128          * lock we want to remove.
1129          */
1130
1131         ZERO_STRUCTP(ul);
1132         ul->start = offset;
1133         ul->size = count;
1134
1135         DLIST_ADD(ulist, ul);
1136
1137         /*
1138          * The following call calculates if there are any
1139          * overlapping locks held by this process on
1140          * fd's open on the same file and creates a
1141          * list of unlock ranges that will allow
1142          * POSIX lock ranges to remain on the file whilst the
1143          * unlocks are performed.
1144          */
1145
1146         ulist = posix_lock_list(ul_ctx,
1147                                 ulist,
1148                                 lock_ctx, /* Lock context ulist belongs to. */
1149                                 fsp,
1150                                 plocks,
1151                                 num_locks);
1152
1153         /*
1154          * If there were any overlapped entries (list is > 1 or size or start have changed),
1155          * and the lock_type we just deleted from
1156          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1157          * the POSIX lock to a read lock. This allows any overlapping read locks
1158          * to be atomically maintained.
1159          */
1160
1161         if (deleted_lock_type == WRITE_LOCK &&
1162                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1163
1164                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1165                         (double)offset, (double)count ));
1166
1167                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1168                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1169                         talloc_destroy(ul_ctx);
1170                         return False;
1171                 }
1172         }
1173
1174         /*
1175          * Release the POSIX locks on the list of ranges returned.
1176          */
1177
1178         for(; ulist; ulist = ulist->next) {
1179                 offset = ulist->start;
1180                 count = ulist->size;
1181
1182                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1183                         (double)offset, (double)count ));
1184
1185                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1186                         ret = False;
1187                 }
1188         }
1189
1190         talloc_destroy(ul_ctx);
1191         return ret;
1192 }
1193
1194 /****************************************************************************
1195  Next - the functions that deal with mapping CIFS POSIX locks onto
1196  the underlying system POSIX locks.
1197 ****************************************************************************/
1198
1199 /****************************************************************************
1200  POSIX function to acquire a lock. Returns True if the
1201  lock could be granted, False if not.
1202  As POSIX locks don't stack or conflict (they just overwrite)
1203  we can map the requested lock directly onto a system one. We
1204  know it doesn't conflict with locks on other contexts as the
1205  upper layer would have refused it.
1206 ****************************************************************************/
1207
1208 bool set_posix_lock_posix_flavour(files_struct *fsp,
1209                         uint64_t u_offset,
1210                         uint64_t u_count,
1211                         enum brl_type lock_type,
1212                         int *errno_ret)
1213 {
1214         SMB_OFF_T offset;
1215         SMB_OFF_T count;
1216         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1217
1218         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count "
1219                  "= %.0f, type = %s\n", fsp_str_dbg(fsp),
1220                  (double)u_offset, (double)u_count,
1221                  posix_lock_type_name(lock_type)));
1222
1223         /*
1224          * If the requested lock won't fit in the POSIX range, we will
1225          * pretend it was successful.
1226          */
1227
1228         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1229                 return True;
1230         }
1231
1232         if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1233                 *errno_ret = errno;
1234                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1235                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1236                 return False;
1237         }
1238         return True;
1239 }
1240
1241 /****************************************************************************
1242  POSIX function to release a lock. Returns True if the
1243  lock could be released, False if not.
1244  We are given a complete lock state from the upper layer which is what the lock
1245  state should be after the unlock has already been done, so what
1246  we do is punch out holes in the unlock range where locks owned by this process
1247  have a different lock context.
1248 ****************************************************************************/
1249
1250 bool release_posix_lock_posix_flavour(files_struct *fsp,
1251                                 uint64_t u_offset,
1252                                 uint64_t u_count,
1253                                 const struct lock_context *lock_ctx,
1254                                 const struct lock_struct *plocks,
1255                                 int num_locks)
1256 {
1257         bool ret = True;
1258         SMB_OFF_T offset;
1259         SMB_OFF_T count;
1260         TALLOC_CTX *ul_ctx = NULL;
1261         struct lock_list *ulist = NULL;
1262         struct lock_list *ul = NULL;
1263
1264         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1265                  "count = %.0f\n", fsp_str_dbg(fsp),
1266                  (double)u_offset, (double)u_count));
1267
1268         /*
1269          * If the requested lock won't fit in the POSIX range, we will
1270          * pretend it was successful.
1271          */
1272
1273         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1274                 return True;
1275         }
1276
1277         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1278                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1279                 return False;
1280         }
1281
1282         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1283                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1284                 talloc_destroy(ul_ctx);
1285                 return False;
1286         }
1287
1288         /*
1289          * Create the initial list entry containing the
1290          * lock we want to remove.
1291          */
1292
1293         ZERO_STRUCTP(ul);
1294         ul->start = offset;
1295         ul->size = count;
1296
1297         DLIST_ADD(ulist, ul);
1298
1299         /*
1300          * Walk the given array creating a linked list
1301          * of unlock requests.
1302          */
1303
1304         ulist = posix_lock_list(ul_ctx,
1305                                 ulist,
1306                                 lock_ctx, /* Lock context ulist belongs to. */
1307                                 fsp,
1308                                 plocks,
1309                                 num_locks);
1310
1311         /*
1312          * Release the POSIX locks on the list of ranges returned.
1313          */
1314
1315         for(; ulist; ulist = ulist->next) {
1316                 offset = ulist->start;
1317                 count = ulist->size;
1318
1319                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1320                         (double)offset, (double)count ));
1321
1322                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1323                         ret = False;
1324                 }
1325         }
1326
1327         talloc_destroy(ul_ctx);
1328         return ret;
1329 }