Merge branch 'v3-2-test' of ssh://git.samba.org/data/git/samba into v3-2-test
[ddiss/samba.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2006
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19    Revision History:
20
21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
22 */
23
24 #include "includes.h"
25
26 #undef DBGC_CLASS
27 #define DBGC_CLASS DBGC_LOCKING
28
29 /*
30  * The pending close database handle.
31  */
32
33 static struct db_context *posix_pending_close_db;
34
35 /****************************************************************************
36  First - the functions that deal with the underlying system locks - these
37  functions are used no matter if we're mapping CIFS Windows locks or CIFS
38  POSIX locks onto POSIX.
39 ****************************************************************************/
40
41 /****************************************************************************
42  Utility function to map a lock type correctly depending on the open
43  mode of a file.
44 ****************************************************************************/
45
46 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
47 {
48         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
49                 /*
50                  * Many UNIX's cannot get a write lock on a file opened read-only.
51                  * Win32 locking semantics allow this.
52                  * Do the best we can and attempt a read-only lock.
53                  */
54                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
55                 return F_RDLCK;
56         }
57
58         /*
59          * This return should be the most normal, as we attempt
60          * to always open files read/write.
61          */
62
63         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
64 }
65
66 /****************************************************************************
67  Debugging aid :-).
68 ****************************************************************************/
69
70 static const char *posix_lock_type_name(int lock_type)
71 {
72         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
73 }
74
75 /****************************************************************************
76  Check to see if the given unsigned lock range is within the possible POSIX
77  range. Modifies the given args to be in range if possible, just returns
78  False if not.
79 ****************************************************************************/
80
81 static bool posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
82                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
83 {
84         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
85         SMB_OFF_T count = (SMB_OFF_T)u_count;
86
87         /*
88          * For the type of system we are, attempt to
89          * find the maximum positive lock offset as an SMB_OFF_T.
90          */
91
92 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
93
94         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
95
96 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
97
98         /*
99          * In this case SMB_OFF_T is 64 bits,
100          * and the underlying system can handle 64 bit signed locks.
101          */
102
103         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
104         SMB_OFF_T mask = (mask2<<1);
105         SMB_OFF_T max_positive_lock_offset = ~mask;
106
107 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
108
109         /*
110          * In this case either SMB_OFF_T is 32 bits,
111          * or the underlying system cannot handle 64 bit signed locks.
112          * All offsets & counts must be 2^31 or less.
113          */
114
115         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
116
117 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
118
119         /*
120          * POSIX locks of length zero mean lock to end-of-file.
121          * Win32 locks of length zero are point probes. Ignore
122          * any Win32 locks of length zero. JRA.
123          */
124
125         if (count == (SMB_OFF_T)0) {
126                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
127                 return False;
128         }
129
130         /*
131          * If the given offset was > max_positive_lock_offset then we cannot map this at all
132          * ignore this lock.
133          */
134
135         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
136                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
137                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
138                 return False;
139         }
140
141         /*
142          * We must truncate the count to less than max_positive_lock_offset.
143          */
144
145         if (u_count & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
146                 count = max_positive_lock_offset;
147         }
148
149         /*
150          * Truncate count to end at max lock offset.
151          */
152
153         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
154                 count = max_positive_lock_offset - offset;
155         }
156
157         /*
158          * If we ate all the count, ignore this lock.
159          */
160
161         if (count == 0) {
162                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
163                                 (double)u_offset, (double)u_count ));
164                 return False;
165         }
166
167         /*
168          * The mapping was successful.
169          */
170
171         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
172                         (double)offset, (double)count ));
173
174         *offset_out = offset;
175         *count_out = count;
176         
177         return True;
178 }
179
180 /****************************************************************************
181  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
182  broken NFS implementations.
183 ****************************************************************************/
184
185 static bool posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
186 {
187         bool ret;
188
189         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
190
191         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
192
193         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
194
195                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
196                                         (double)offset,(double)count));
197                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
198                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
199
200                 /*
201                  * If the offset is > 0x7FFFFFFF then this will cause problems on
202                  * 32 bit NFS mounted filesystems. Just ignore it.
203                  */
204
205                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
206                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
207                         return True;
208                 }
209
210                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
211                         /* 32 bit NFS file system, retry with smaller offset */
212                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
213                         errno = 0;
214                         count &= 0x7fffffff;
215                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
216                 }
217         }
218
219         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
220         return ret;
221 }
222
223 /****************************************************************************
224  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
225  broken NFS implementations.
226 ****************************************************************************/
227
228 static bool posix_fcntl_getlock(files_struct *fsp, SMB_OFF_T *poffset, SMB_OFF_T *pcount, int *ptype)
229 {
230         pid_t pid;
231         bool ret;
232
233         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
234                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
235
236         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
237
238         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
239
240                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
241                                         (double)*poffset,(double)*pcount));
242                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
243                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
244
245                 /*
246                  * If the offset is > 0x7FFFFFFF then this will cause problems on
247                  * 32 bit NFS mounted filesystems. Just ignore it.
248                  */
249
250                 if (*poffset & ~((SMB_OFF_T)0x7fffffff)) {
251                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
252                         return True;
253                 }
254
255                 if (*pcount & ~((SMB_OFF_T)0x7fffffff)) {
256                         /* 32 bit NFS file system, retry with smaller offset */
257                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
258                         errno = 0;
259                         *pcount &= 0x7fffffff;
260                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
261                 }
262         }
263
264         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
265         return ret;
266 }
267
268 /****************************************************************************
269  POSIX function to see if a file region is locked. Returns True if the
270  region is locked, False otherwise.
271 ****************************************************************************/
272
273 bool is_posix_locked(files_struct *fsp,
274                         SMB_BIG_UINT *pu_offset,
275                         SMB_BIG_UINT *pu_count,
276                         enum brl_type *plock_type,
277                         enum brl_flavour lock_flav)
278 {
279         SMB_OFF_T offset;
280         SMB_OFF_T count;
281         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
282
283         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
284                 fsp->fsp_name, (double)*pu_offset, (double)*pu_count, posix_lock_type_name(*plock_type) ));
285
286         /*
287          * If the requested lock won't fit in the POSIX range, we will
288          * never set it, so presume it is not locked.
289          */
290
291         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
292                 return False;
293         }
294
295         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
296                 return False;
297         }
298
299         if (posix_lock_type == F_UNLCK) {
300                 return False;
301         }
302
303         if (lock_flav == POSIX_LOCK) {
304                 /* Only POSIX lock queries need to know the details. */
305                 *pu_offset = (SMB_BIG_UINT)offset;
306                 *pu_count = (SMB_BIG_UINT)count;
307                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
308         }
309         return True;
310 }
311
312 /****************************************************************************
313  Next - the functions that deal with in memory database storing representations
314  of either Windows CIFS locks or POSIX CIFS locks.
315 ****************************************************************************/
316
317 /* The key used in the in-memory POSIX databases. */
318
319 struct lock_ref_count_key {
320         struct file_id id;
321         char r;
322 }; 
323
324 /*******************************************************************
325  Form a static locking key for a dev/inode pair for the lock ref count
326 ******************************************************************/
327
328 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
329                                           struct lock_ref_count_key *tmp)
330 {
331         ZERO_STRUCTP(tmp);
332         tmp->id = fsp->file_id;
333         tmp->r = 'r';
334         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
335 }
336
337 /*******************************************************************
338  Convenience function to get an fd_array key from an fsp.
339 ******************************************************************/
340
341 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
342 {
343         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
344 }
345
346 /*******************************************************************
347  Create the in-memory POSIX lock databases.
348 ********************************************************************/
349
350 bool posix_locking_init(bool read_only)
351 {
352         if (posix_pending_close_db != NULL) {
353                 return true;
354         }
355
356         posix_pending_close_db = db_open_rbt(NULL);
357
358         if (posix_pending_close_db == NULL) {
359                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
360                 return false;
361         }
362
363         return true;
364 }
365
366 /*******************************************************************
367  Delete the in-memory POSIX lock databases.
368 ********************************************************************/
369
370 bool posix_locking_end(void)
371 {
372         /*
373          * Shouldn't we close all fd's here?
374          */
375         TALLOC_FREE(posix_pending_close_db);
376         return true;
377 }
378
379 /****************************************************************************
380  Next - the functions that deal with storing fd's that have outstanding
381  POSIX locks when closed.
382 ****************************************************************************/
383
384 /****************************************************************************
385  The records in posix_pending_close_tdb are composed of an array of ints
386  keyed by dev/ino pair.
387  The first int is a reference count of the number of outstanding locks on
388  all open fd's on this dev/ino pair. Any subsequent ints are the fd's that
389  were open on this dev/ino pair that should have been closed, but can't as
390  the lock ref count is non zero.
391 ****************************************************************************/
392
393 /****************************************************************************
394  Keep a reference count of the number of Windows locks open on this dev/ino
395  pair. Creates entry if it doesn't exist.
396 ****************************************************************************/
397
398 static void increment_windows_lock_ref_count(files_struct *fsp)
399 {
400         struct lock_ref_count_key tmp;
401         struct db_record *rec;
402         int lock_ref_count = 0;
403         NTSTATUS status;
404
405         rec = posix_pending_close_db->fetch_locked(
406                 posix_pending_close_db, talloc_tos(),
407                 locking_ref_count_key_fsp(fsp, &tmp));
408
409         SMB_ASSERT(rec != NULL);
410
411         if (rec->value.dptr != NULL) {
412                 SMB_ASSERT(rec->value.dsize == sizeof(lock_ref_count));
413                 memcpy(&lock_ref_count, rec->value.dptr,
414                        sizeof(lock_ref_count));
415         }
416
417         lock_ref_count++;
418
419         status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
420                                                sizeof(lock_ref_count)), 0);
421
422         SMB_ASSERT(NT_STATUS_IS_OK(status));
423
424         TALLOC_FREE(rec);
425
426         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
427                   fsp->fsp_name, lock_ref_count ));
428 }
429
430 /****************************************************************************
431  Bulk delete - subtract as many locks as we've just deleted.
432 ****************************************************************************/
433
434 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
435 {
436         struct lock_ref_count_key tmp;
437         struct db_record *rec;
438         int lock_ref_count = 0;
439         NTSTATUS status;
440
441         rec = posix_pending_close_db->fetch_locked(
442                 posix_pending_close_db, talloc_tos(),
443                 locking_ref_count_key_fsp(fsp, &tmp));
444
445         SMB_ASSERT((rec != NULL)
446                    && (rec->value.dptr != NULL)
447                    && (rec->value.dsize == sizeof(lock_ref_count)));
448
449         memcpy(&lock_ref_count, rec->value.dptr, sizeof(lock_ref_count));
450
451         SMB_ASSERT(lock_ref_count > 0);
452
453         lock_ref_count -= dcount;
454
455         status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
456                                                sizeof(lock_ref_count)), 0);
457
458         SMB_ASSERT(NT_STATUS_IS_OK(status));
459
460         TALLOC_FREE(rec);
461
462         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
463                   fsp->fsp_name, lock_ref_count ));
464 }
465
466 static void decrement_windows_lock_ref_count(files_struct *fsp)
467 {
468         reduce_windows_lock_ref_count(fsp, 1);
469 }
470
471 /****************************************************************************
472  Fetch the lock ref count.
473 ****************************************************************************/
474
475 static int get_windows_lock_ref_count(files_struct *fsp)
476 {
477         struct lock_ref_count_key tmp;
478         TDB_DATA dbuf;
479         int res;
480         int lock_ref_count = 0;
481
482         res = posix_pending_close_db->fetch(
483                 posix_pending_close_db, talloc_tos(),
484                 locking_ref_count_key_fsp(fsp, &tmp), &dbuf);
485
486         SMB_ASSERT(res == 0);
487
488         if (dbuf.dsize != 0) {
489                 SMB_ASSERT(dbuf.dsize == sizeof(lock_ref_count));
490                 memcpy(&lock_ref_count, dbuf.dptr, sizeof(lock_ref_count));
491                 TALLOC_FREE(dbuf.dptr);
492         }
493
494         DEBUG(10,("get_windows_lock_count for file %s = %d\n",
495                   fsp->fsp_name, lock_ref_count ));
496
497         return lock_ref_count;
498 }
499
500 /****************************************************************************
501  Delete a lock_ref_count entry.
502 ****************************************************************************/
503
504 static void delete_windows_lock_ref_count(files_struct *fsp)
505 {
506         struct lock_ref_count_key tmp;
507         struct db_record *rec;
508
509         rec = posix_pending_close_db->fetch_locked(
510                 posix_pending_close_db, talloc_tos(),
511                 locking_ref_count_key_fsp(fsp, &tmp));
512
513         SMB_ASSERT(rec != NULL);
514
515         /* Not a bug if it doesn't exist - no locks were ever granted. */
516
517         rec->delete_rec(rec);
518         TALLOC_FREE(rec);
519
520         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
521                   fsp->fsp_name));
522 }
523
524 /****************************************************************************
525  Add an fd to the pending close tdb.
526 ****************************************************************************/
527
528 static void add_fd_to_close_entry(files_struct *fsp)
529 {
530         struct db_record *rec;
531         uint8_t *new_data;
532         NTSTATUS status;
533
534         rec = posix_pending_close_db->fetch_locked(
535                 posix_pending_close_db, talloc_tos(),
536                 fd_array_key_fsp(fsp));
537
538         SMB_ASSERT(rec != NULL);
539
540         new_data = TALLOC_ARRAY(
541                 rec, uint8_t, rec->value.dsize + sizeof(fsp->fh->fd));
542
543         SMB_ASSERT(new_data != NULL);
544
545         memcpy(new_data, rec->value.dptr, rec->value.dsize);
546         memcpy(new_data + rec->value.dsize,
547                &fsp->fh->fd, sizeof(fsp->fh->fd));
548
549         status = rec->store(
550                 rec, make_tdb_data(new_data,
551                                    rec->value.dsize + sizeof(fsp->fh->fd)), 0);
552
553         SMB_ASSERT(NT_STATUS_IS_OK(status));
554
555         TALLOC_FREE(rec);
556
557         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
558                   fsp->fh->fd, fsp->fsp_name ));
559 }
560
561 /****************************************************************************
562  Remove all fd entries for a specific dev/inode pair from the tdb.
563 ****************************************************************************/
564
565 static void delete_close_entries(files_struct *fsp)
566 {
567         struct db_record *rec;
568
569         rec = posix_pending_close_db->fetch_locked(
570                 posix_pending_close_db, talloc_tos(),
571                 fd_array_key_fsp(fsp));
572
573         SMB_ASSERT(rec != NULL);
574         rec->delete_rec(rec);
575         TALLOC_FREE(rec);
576 }
577
578 /****************************************************************************
579  Get the array of POSIX pending close records for an open fsp. Returns number
580  of entries.
581 ****************************************************************************/
582
583 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
584                                               files_struct *fsp, int **entries)
585 {
586         TDB_DATA dbuf;
587         int res;
588
589         res = posix_pending_close_db->fetch(
590                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
591                 &dbuf);
592
593         SMB_ASSERT(res == 0);
594
595         if (dbuf.dsize == 0) {
596                 *entries = NULL;
597                 return 0;
598         }
599
600         *entries = (int *)dbuf.dptr;
601         return (size_t)(dbuf.dsize / sizeof(int));
602 }
603
604 /****************************************************************************
605  Deal with pending closes needed by POSIX locking support.
606  Note that posix_locking_close_file() is expected to have been called
607  to delete all locks on this fsp before this function is called.
608 ****************************************************************************/
609
610 NTSTATUS fd_close_posix(struct files_struct *fsp)
611 {
612         int saved_errno = 0;
613         int ret;
614         int *fd_array = NULL;
615         size_t count, i;
616
617         if (!lp_locking(fsp->conn->params) || !lp_posix_locking(fsp->conn->params)) {
618                 /*
619                  * No locking or POSIX to worry about or we want POSIX semantics
620                  * which will lose all locks on all fd's open on this dev/inode,
621                  * just close.
622                  */
623                 ret = SMB_VFS_CLOSE(fsp,fsp->fh->fd);
624                 fsp->fh->fd = -1;
625                 if (ret == -1) {
626                         return map_nt_error_from_unix(errno);
627                 }
628                 return NT_STATUS_OK;
629         }
630
631         if (get_windows_lock_ref_count(fsp)) {
632
633                 /*
634                  * There are outstanding locks on this dev/inode pair on other fds.
635                  * Add our fd to the pending close tdb and set fsp->fh->fd to -1.
636                  */
637
638                 add_fd_to_close_entry(fsp);
639                 fsp->fh->fd = -1;
640                 return NT_STATUS_OK;
641         }
642
643         /*
644          * No outstanding locks. Get the pending close fd's
645          * from the tdb and close them all.
646          */
647
648         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
649
650         if (count) {
651                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
652
653                 for(i = 0; i < count; i++) {
654                         if (SMB_VFS_CLOSE(fsp,fd_array[i]) == -1) {
655                                 saved_errno = errno;
656                         }
657                 }
658
659                 /*
660                  * Delete all fd's stored in the tdb
661                  * for this dev/inode pair.
662                  */
663
664                 delete_close_entries(fsp);
665         }
666
667         TALLOC_FREE(fd_array);
668
669         /* Don't need a lock ref count on this dev/ino anymore. */
670         delete_windows_lock_ref_count(fsp);
671
672         /*
673          * Finally close the fd associated with this fsp.
674          */
675
676         ret = SMB_VFS_CLOSE(fsp,fsp->fh->fd);
677
678         if (ret == 0 && saved_errno != 0) {
679                 errno = saved_errno;
680                 ret = -1;
681         } 
682
683         fsp->fh->fd = -1;
684
685         if (ret == -1) {
686                 return map_nt_error_from_unix(errno);
687         }
688
689         return NT_STATUS_OK;
690 }
691
692 /****************************************************************************
693  Next - the functions that deal with the mapping CIFS Windows locks onto
694  the underlying system POSIX locks.
695 ****************************************************************************/
696
697 /*
698  * Structure used when splitting a lock range
699  * into a POSIX lock range. Doubly linked list.
700  */
701
702 struct lock_list {
703         struct lock_list *next;
704         struct lock_list *prev;
705         SMB_OFF_T start;
706         SMB_OFF_T size;
707 };
708
709 /****************************************************************************
710  Create a list of lock ranges that don't overlap a given range. Used in calculating
711  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
712  understand it :-).
713 ****************************************************************************/
714
715 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
716                                                 struct lock_list *lhead,
717                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
718                                                 files_struct *fsp,
719                                                 const struct lock_struct *plocks,
720                                                 int num_locks)
721 {
722         int i;
723
724         /*
725          * Check the current lock list on this dev/inode pair.
726          * Quit if the list is deleted.
727          */
728
729         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
730                 (double)lhead->start, (double)lhead->size ));
731
732         for (i=0; i<num_locks && lhead; i++) {
733                 const struct lock_struct *lock = &plocks[i];
734                 struct lock_list *l_curr;
735
736                 /* Ignore all but read/write locks. */
737                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
738                         continue;
739                 }
740
741                 /* Ignore locks not owned by this process. */
742                 if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
743                         continue;
744                 }
745
746                 /*
747                  * Walk the lock list, checking for overlaps. Note that
748                  * the lock list can expand within this loop if the current
749                  * range being examined needs to be split.
750                  */
751
752                 for (l_curr = lhead; l_curr;) {
753
754                         DEBUG(10,("posix_lock_list: lock: fnum=%d: start=%.0f,size=%.0f:type=%s", lock->fnum,
755                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
756
757                         if ( (l_curr->start >= (lock->start + lock->size)) ||
758                                  (lock->start >= (l_curr->start + l_curr->size))) {
759
760                                 /* No overlap with existing lock - leave this range alone. */
761 /*********************************************
762                                              +---------+
763                                              | l_curr  |
764                                              +---------+
765                                 +-------+
766                                 | lock  |
767                                 +-------+
768 OR....
769              +---------+
770              |  l_curr |
771              +---------+
772 **********************************************/
773
774                                 DEBUG(10,(" no overlap case.\n" ));
775
776                                 l_curr = l_curr->next;
777
778                         } else if ( (l_curr->start >= lock->start) &&
779                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
780
781                                 /*
782                                  * This range is completely overlapped by this existing lock range
783                                  * and thus should have no effect. Delete it from the list.
784                                  */
785 /*********************************************
786                 +---------+
787                 |  l_curr |
788                 +---------+
789         +---------------------------+
790         |       lock                |
791         +---------------------------+
792 **********************************************/
793                                 /* Save the next pointer */
794                                 struct lock_list *ul_next = l_curr->next;
795
796                                 DEBUG(10,(" delete case.\n" ));
797
798                                 DLIST_REMOVE(lhead, l_curr);
799                                 if(lhead == NULL) {
800                                         break; /* No more list... */
801                                 }
802
803                                 l_curr = ul_next;
804                                 
805                         } else if ( (l_curr->start >= lock->start) &&
806                                                 (l_curr->start < lock->start + lock->size) &&
807                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
808
809                                 /*
810                                  * This range overlaps the existing lock range at the high end.
811                                  * Truncate by moving start to existing range end and reducing size.
812                                  */
813 /*********************************************
814                 +---------------+
815                 |  l_curr       |
816                 +---------------+
817         +---------------+
818         |    lock       |
819         +---------------+
820 BECOMES....
821                         +-------+
822                         | l_curr|
823                         +-------+
824 **********************************************/
825
826                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
827                                 l_curr->start = lock->start + lock->size;
828
829                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
830                                                                 (double)l_curr->start, (double)l_curr->size ));
831
832                                 l_curr = l_curr->next;
833
834                         } else if ( (l_curr->start < lock->start) &&
835                                                 (l_curr->start + l_curr->size > lock->start) &&
836                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
837
838                                 /*
839                                  * This range overlaps the existing lock range at the low end.
840                                  * Truncate by reducing size.
841                                  */
842 /*********************************************
843    +---------------+
844    |  l_curr       |
845    +---------------+
846            +---------------+
847            |    lock       |
848            +---------------+
849 BECOMES....
850    +-------+
851    | l_curr|
852    +-------+
853 **********************************************/
854
855                                 l_curr->size = lock->start - l_curr->start;
856
857                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
858                                                                 (double)l_curr->start, (double)l_curr->size ));
859
860                                 l_curr = l_curr->next;
861                 
862                         } else if ( (l_curr->start < lock->start) &&
863                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
864                                 /*
865                                  * Worst case scenario. Range completely overlaps an existing
866                                  * lock range. Split the request into two, push the new (upper) request
867                                  * into the dlink list, and continue with the entry after l_new (as we
868                                  * know that l_new will not overlap with this lock).
869                                  */
870 /*********************************************
871         +---------------------------+
872         |        l_curr             |
873         +---------------------------+
874                 +---------+
875                 | lock    |
876                 +---------+
877 BECOMES.....
878         +-------+         +---------+
879         | l_curr|         | l_new   |
880         +-------+         +---------+
881 **********************************************/
882                                 struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
883
884                                 if(l_new == NULL) {
885                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
886                                         return NULL; /* The talloc_destroy takes care of cleanup. */
887                                 }
888
889                                 ZERO_STRUCTP(l_new);
890                                 l_new->start = lock->start + lock->size;
891                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
892
893                                 /* Truncate the l_curr. */
894                                 l_curr->size = lock->start - l_curr->start;
895
896                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
897 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
898                                                                 (double)l_new->start, (double)l_new->size ));
899
900                                 /*
901                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
902                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
903                                  */
904
905                                 l_new->prev = l_curr;
906                                 l_new->next = l_curr->next;
907                                 l_curr->next = l_new;
908
909                                 /* And move after the link we added. */
910                                 l_curr = l_new->next;
911
912                         } else {
913
914                                 /*
915                                  * This logic case should never happen. Ensure this is the
916                                  * case by forcing an abort.... Remove in production.
917                                  */
918                                 char *msg = NULL;
919
920                                 /* Don't check if alloc succeeds here - we're
921                                  * forcing a core dump anyway. */
922
923                                 asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
924 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
925
926                                 smb_panic(msg);
927                         }
928                 } /* end for ( l_curr = lhead; l_curr;) */
929         } /* end for (i=0; i<num_locks && ul_head; i++) */
930
931         return lhead;
932 }
933
934 /****************************************************************************
935  POSIX function to acquire a lock. Returns True if the
936  lock could be granted, False if not.
937 ****************************************************************************/
938
939 bool set_posix_lock_windows_flavour(files_struct *fsp,
940                         SMB_BIG_UINT u_offset,
941                         SMB_BIG_UINT u_count,
942                         enum brl_type lock_type,
943                         const struct lock_context *lock_ctx,
944                         const struct lock_struct *plocks,
945                         int num_locks,
946                         int *errno_ret)
947 {
948         SMB_OFF_T offset;
949         SMB_OFF_T count;
950         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
951         bool ret = True;
952         size_t lock_count;
953         TALLOC_CTX *l_ctx = NULL;
954         struct lock_list *llist = NULL;
955         struct lock_list *ll = NULL;
956
957         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, count = %.0f, type = %s\n",
958                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
959
960         /*
961          * If the requested lock won't fit in the POSIX range, we will
962          * pretend it was successful.
963          */
964
965         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
966                 increment_windows_lock_ref_count(fsp);
967                 return True;
968         }
969
970         /*
971          * Windows is very strange. It allows read locks to be overlayed
972          * (even over a write lock), but leaves the write lock in force until the first
973          * unlock. It also reference counts the locks. This means the following sequence :
974          *
975          * process1                                      process2
976          * ------------------------------------------------------------------------
977          * WRITE LOCK : start = 2, len = 10
978          *                                            READ LOCK: start =0, len = 10 - FAIL
979          * READ LOCK : start = 0, len = 14 
980          *                                            READ LOCK: start =0, len = 10 - FAIL
981          * UNLOCK : start = 2, len = 10
982          *                                            READ LOCK: start =0, len = 10 - OK
983          *
984          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
985          * would leave a single read lock over the 0-14 region.
986          */
987         
988         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
989                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
990                 return False;
991         }
992
993         if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
994                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
995                 talloc_destroy(l_ctx);
996                 return False;
997         }
998
999         /*
1000          * Create the initial list entry containing the
1001          * lock we want to add.
1002          */
1003
1004         ZERO_STRUCTP(ll);
1005         ll->start = offset;
1006         ll->size = count;
1007
1008         DLIST_ADD(llist, ll);
1009
1010         /*
1011          * The following call calculates if there are any
1012          * overlapping locks held by this process on
1013          * fd's open on the same file and splits this list
1014          * into a list of lock ranges that do not overlap with existing
1015          * POSIX locks.
1016          */
1017
1018         llist = posix_lock_list(l_ctx,
1019                                 llist,
1020                                 lock_ctx, /* Lock context llist belongs to. */
1021                                 fsp,
1022                                 plocks,
1023                                 num_locks);
1024
1025         /*
1026          * Add the POSIX locks on the list of ranges returned.
1027          * As the lock is supposed to be added atomically, we need to
1028          * back out all the locks if any one of these calls fail.
1029          */
1030
1031         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1032                 offset = ll->start;
1033                 count = ll->size;
1034
1035                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1036                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1037
1038                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1039                         *errno_ret = errno;
1040                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1041                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1042                         ret = False;
1043                         break;
1044                 }
1045         }
1046
1047         if (!ret) {
1048
1049                 /*
1050                  * Back out all the POSIX locks we have on fail.
1051                  */
1052
1053                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1054                         offset = ll->start;
1055                         count = ll->size;
1056
1057                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1058                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1059
1060                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1061                 }
1062         } else {
1063                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1064                 increment_windows_lock_ref_count(fsp);
1065         }
1066
1067         talloc_destroy(l_ctx);
1068         return ret;
1069 }
1070
1071 /****************************************************************************
1072  POSIX function to release a lock. Returns True if the
1073  lock could be released, False if not.
1074 ****************************************************************************/
1075
1076 bool release_posix_lock_windows_flavour(files_struct *fsp,
1077                                 SMB_BIG_UINT u_offset,
1078                                 SMB_BIG_UINT u_count,
1079                                 enum brl_type deleted_lock_type,
1080                                 const struct lock_context *lock_ctx,
1081                                 const struct lock_struct *plocks,
1082                                 int num_locks)
1083 {
1084         SMB_OFF_T offset;
1085         SMB_OFF_T count;
1086         bool ret = True;
1087         TALLOC_CTX *ul_ctx = NULL;
1088         struct lock_list *ulist = NULL;
1089         struct lock_list *ul = NULL;
1090
1091         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, count = %.0f\n",
1092                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1093
1094         /* Remember the number of Windows locks we have on this dev/ino pair. */
1095         decrement_windows_lock_ref_count(fsp);
1096
1097         /*
1098          * If the requested lock won't fit in the POSIX range, we will
1099          * pretend it was successful.
1100          */
1101
1102         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1103                 return True;
1104         }
1105
1106         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1107                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1108                 return False;
1109         }
1110
1111         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1112                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1113                 talloc_destroy(ul_ctx);
1114                 return False;
1115         }
1116
1117         /*
1118          * Create the initial list entry containing the
1119          * lock we want to remove.
1120          */
1121
1122         ZERO_STRUCTP(ul);
1123         ul->start = offset;
1124         ul->size = count;
1125
1126         DLIST_ADD(ulist, ul);
1127
1128         /*
1129          * The following call calculates if there are any
1130          * overlapping locks held by this process on
1131          * fd's open on the same file and creates a
1132          * list of unlock ranges that will allow
1133          * POSIX lock ranges to remain on the file whilst the
1134          * unlocks are performed.
1135          */
1136
1137         ulist = posix_lock_list(ul_ctx,
1138                                 ulist,
1139                                 lock_ctx, /* Lock context ulist belongs to. */
1140                                 fsp,
1141                                 plocks,
1142                                 num_locks);
1143
1144         /*
1145          * If there were any overlapped entries (list is > 1 or size or start have changed),
1146          * and the lock_type we just deleted from
1147          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1148          * the POSIX lock to a read lock. This allows any overlapping read locks
1149          * to be atomically maintained.
1150          */
1151
1152         if (deleted_lock_type == WRITE_LOCK &&
1153                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1154
1155                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1156                         (double)offset, (double)count ));
1157
1158                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1159                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1160                         talloc_destroy(ul_ctx);
1161                         return False;
1162                 }
1163         }
1164
1165         /*
1166          * Release the POSIX locks on the list of ranges returned.
1167          */
1168
1169         for(; ulist; ulist = ulist->next) {
1170                 offset = ulist->start;
1171                 count = ulist->size;
1172
1173                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1174                         (double)offset, (double)count ));
1175
1176                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1177                         ret = False;
1178                 }
1179         }
1180
1181         talloc_destroy(ul_ctx);
1182         return ret;
1183 }
1184
1185 /****************************************************************************
1186  Next - the functions that deal with mapping CIFS POSIX locks onto
1187  the underlying system POSIX locks.
1188 ****************************************************************************/
1189
1190 /****************************************************************************
1191  POSIX function to acquire a lock. Returns True if the
1192  lock could be granted, False if not.
1193  As POSIX locks don't stack or conflict (they just overwrite)
1194  we can map the requested lock directly onto a system one. We
1195  know it doesn't conflict with locks on other contexts as the
1196  upper layer would have refused it.
1197 ****************************************************************************/
1198
1199 bool set_posix_lock_posix_flavour(files_struct *fsp,
1200                         SMB_BIG_UINT u_offset,
1201                         SMB_BIG_UINT u_count,
1202                         enum brl_type lock_type,
1203                         int *errno_ret)
1204 {
1205         SMB_OFF_T offset;
1206         SMB_OFF_T count;
1207         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1208
1209         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count = %.0f, type = %s\n",
1210                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
1211
1212         /*
1213          * If the requested lock won't fit in the POSIX range, we will
1214          * pretend it was successful.
1215          */
1216
1217         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1218                 return True;
1219         }
1220
1221         if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1222                 *errno_ret = errno;
1223                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1224                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1225                 return False;
1226         }
1227         return True;
1228 }
1229
1230 /****************************************************************************
1231  POSIX function to release a lock. Returns True if the
1232  lock could be released, False if not.
1233  We are given a complete lock state from the upper layer which is what the lock
1234  state should be after the unlock has already been done, so what
1235  we do is punch out holes in the unlock range where locks owned by this process
1236  have a different lock context.
1237 ****************************************************************************/
1238
1239 bool release_posix_lock_posix_flavour(files_struct *fsp,
1240                                 SMB_BIG_UINT u_offset,
1241                                 SMB_BIG_UINT u_count,
1242                                 const struct lock_context *lock_ctx,
1243                                 const struct lock_struct *plocks,
1244                                 int num_locks)
1245 {
1246         bool ret = True;
1247         SMB_OFF_T offset;
1248         SMB_OFF_T count;
1249         TALLOC_CTX *ul_ctx = NULL;
1250         struct lock_list *ulist = NULL;
1251         struct lock_list *ul = NULL;
1252
1253         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, count = %.0f\n",
1254                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1255
1256         /*
1257          * If the requested lock won't fit in the POSIX range, we will
1258          * pretend it was successful.
1259          */
1260
1261         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1262                 return True;
1263         }
1264
1265         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1266                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1267                 return False;
1268         }
1269
1270         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1271                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1272                 talloc_destroy(ul_ctx);
1273                 return False;
1274         }
1275
1276         /*
1277          * Create the initial list entry containing the
1278          * lock we want to remove.
1279          */
1280
1281         ZERO_STRUCTP(ul);
1282         ul->start = offset;
1283         ul->size = count;
1284
1285         DLIST_ADD(ulist, ul);
1286
1287         /*
1288          * Walk the given array creating a linked list
1289          * of unlock requests.
1290          */
1291
1292         ulist = posix_lock_list(ul_ctx,
1293                                 ulist,
1294                                 lock_ctx, /* Lock context ulist belongs to. */
1295                                 fsp,
1296                                 plocks,
1297                                 num_locks);
1298
1299         /*
1300          * Release the POSIX locks on the list of ranges returned.
1301          */
1302
1303         for(; ulist; ulist = ulist->next) {
1304                 offset = ulist->start;
1305                 count = ulist->size;
1306
1307                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1308                         (double)offset, (double)count ));
1309
1310                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1311                         ret = False;
1312                 }
1313         }
1314
1315         talloc_destroy(ul_ctx);
1316         return ret;
1317 }