source3/locking/posix.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Locking functions
   4    Copyright (C) Jeremy Allison 1992-2006
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18
  19    Revision History:
  20
  21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
  22 */
  23
  24 #include "includes.h"
  25 #include "system/filesys.h"
  26 #include "locking/proto.h"
  27 #include "dbwrap/dbwrap.h"
  28 #include "dbwrap/dbwrap_rbt.h"
  29 #include "util_tdb.h"
  30
  31 #undef DBGC_CLASS
  32 #define DBGC_CLASS DBGC_LOCKING
  33
  34 /*
  35  * The pending close database handle.
  36  */
  37
  38 static struct db_context *posix_pending_close_db;
  39
  40 /****************************************************************************
  41  First - the functions that deal with the underlying system locks - these
  42  functions are used no matter if we're mapping CIFS Windows locks or CIFS
  43  POSIX locks onto POSIX.
  44 ****************************************************************************/
  45
  46 /****************************************************************************
  47  Utility function to map a lock type correctly depending on the open
  48  mode of a file.
  49 ****************************************************************************/
  50
  51 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
  52 {
  53         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
  54                 /*
  55                  * Many UNIX's cannot get a write lock on a file opened read-only.
  56                  * Win32 locking semantics allow this.
  57                  * Do the best we can and attempt a read-only lock.
  58                  */
  59                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
  60                 return F_RDLCK;
  61         }
  62
  63         /*
  64          * This return should be the most normal, as we attempt
  65          * to always open files read/write.
  66          */
  67
  68         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
  69 }
  70
  71 /****************************************************************************
  72  Debugging aid :-).
  73 ****************************************************************************/
  74
  75 static const char *posix_lock_type_name(int lock_type)
  76 {
  77         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
  78 }
  79
  80 /****************************************************************************
  81  Check to see if the given unsigned lock range is within the possible POSIX
  82  range. Modifies the given args to be in range if possible, just returns
  83  False if not.
  84 ****************************************************************************/
  85
  86 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
  87                                 uint64_t u_offset, uint64_t u_count)
  88 {
  89         off_t offset = (off_t)u_offset;
  90         off_t count = (off_t)u_count;
  91
  92         /*
  93          * For the type of system we are, attempt to
  94          * find the maximum positive lock offset as an off_t.
  95          */
  96
  97 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
  98
  99         off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
 100 #else
 101         /*
 102          * In this case off_t is 64 bits,
 103          * and the underlying system can handle 64 bit signed locks.
 104          */
 105
 106         off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
 107         off_t mask = (mask2<<1);
 108         off_t max_positive_lock_offset = ~mask;
 109
 110 #endif
 111         /*
 112          * POSIX locks of length zero mean lock to end-of-file.
 113          * Win32 locks of length zero are point probes. Ignore
 114          * any Win32 locks of length zero. JRA.
 115          */
 116
 117         if (count == (off_t)0) {
 118                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
 119                 return False;
 120         }
 121
 122         /*
 123          * If the given offset was > max_positive_lock_offset then we cannot map this at all
 124          * ignore this lock.
 125          */
 126
 127         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
 128                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
 129                                 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
 130                 return False;
 131         }
 132
 133         /*
 134          * We must truncate the count to less than max_positive_lock_offset.
 135          */
 136
 137         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
 138                 count = max_positive_lock_offset;
 139         }
 140
 141         /*
 142          * Truncate count to end at max lock offset.
 143          */
 144
 145         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
 146                 count = max_positive_lock_offset - offset;
 147         }
 148
 149         /*
 150          * If we ate all the count, ignore this lock.
 151          */
 152
 153         if (count == 0) {
 154                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
 155                                 (double)u_offset, (double)u_count ));
 156                 return False;
 157         }
 158
 159         /*
 160          * The mapping was successful.
 161          */
 162
 163         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
 164                         (double)offset, (double)count ));
 165
 166         *offset_out = offset;
 167         *count_out = count;
 168
 169         return True;
 170 }
 171
 172 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
 173                        struct files_struct *fsp, int op, off_t offset,
 174                        off_t count, int type)
 175 {
 176         VFS_FIND(lock);
 177         return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
 178 }
 179
 180 /****************************************************************************
 181  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
 182  broken NFS implementations.
 183 ****************************************************************************/
 184
 185 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
 186 {
 187         bool ret;
 188
 189         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
 190
 191         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
 192
 193         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 194
 195                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
 196                                         (double)offset,(double)count));
 197                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
 198                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
 199
 200                 /*
 201                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 202                  * 32 bit NFS mounted filesystems. Just ignore it.
 203                  */
 204
 205                 if (offset & ~((off_t)0x7fffffff)) {
 206                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 207                         return True;
 208                 }
 209
 210                 if (count & ~((off_t)0x7fffffff)) {
 211                         /* 32 bit NFS file system, retry with smaller offset */
 212                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 213                         errno = 0;
 214                         count &= 0x7fffffff;
 215                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
 216                 }
 217         }
 218
 219         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
 220         return ret;
 221 }
 222
 223 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
 224                           struct files_struct *fsp, off_t *poffset,
 225                           off_t *pcount, int *ptype, pid_t *ppid)
 226 {
 227         VFS_FIND(getlock);
 228         return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype,
 229                                        ppid);
 230 }
 231
 232 /****************************************************************************
 233  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
 234  broken NFS implementations.
 235 ****************************************************************************/
 236
 237 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
 238 {
 239         pid_t pid;
 240         bool ret;
 241
 242         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
 243                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
 244
 245         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
 246
 247         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 248
 249                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
 250                                         (double)*poffset,(double)*pcount));
 251                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
 252                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
 253
 254                 /*
 255                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 256                  * 32 bit NFS mounted filesystems. Just ignore it.
 257                  */
 258
 259                 if (*poffset & ~((off_t)0x7fffffff)) {
 260                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 261                         return True;
 262                 }
 263
 264                 if (*pcount & ~((off_t)0x7fffffff)) {
 265                         /* 32 bit NFS file system, retry with smaller offset */
 266                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 267                         errno = 0;
 268                         *pcount &= 0x7fffffff;
 269                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
 270                 }
 271         }
 272
 273         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
 274         return ret;
 275 }
 276
 277 /****************************************************************************
 278  POSIX function to see if a file region is locked. Returns True if the
 279  region is locked, False otherwise.
 280 ****************************************************************************/
 281
 282 bool is_posix_locked(files_struct *fsp,
 283                         uint64_t *pu_offset,
 284                         uint64_t *pu_count,
 285                         enum brl_type *plock_type,
 286                         enum brl_flavour lock_flav)
 287 {
 288         off_t offset;
 289         off_t count;
 290         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
 291
 292         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
 293                   "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
 294                   (double)*pu_count,  posix_lock_type_name(*plock_type)));
 295
 296         /*
 297          * If the requested lock won't fit in the POSIX range, we will
 298          * never set it, so presume it is not locked.
 299          */
 300
 301         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
 302                 return False;
 303         }
 304
 305         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
 306                 return False;
 307         }
 308
 309         if (posix_lock_type == F_UNLCK) {
 310                 return False;
 311         }
 312
 313         if (lock_flav == POSIX_LOCK) {
 314                 /* Only POSIX lock queries need to know the details. */
 315                 *pu_offset = (uint64_t)offset;
 316                 *pu_count = (uint64_t)count;
 317                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
 318         }
 319         return True;
 320 }
 321
 322 /****************************************************************************
 323  Next - the functions that deal with in memory database storing representations
 324  of either Windows CIFS locks or POSIX CIFS locks.
 325 ****************************************************************************/
 326
 327 /* The key used in the in-memory POSIX databases. */
 328
 329 struct lock_ref_count_key {
 330         struct file_id id;
 331         char r;
 332 };
 333
 334 /*******************************************************************
 335  Form a static locking key for a dev/inode pair for the lock ref count
 336 ******************************************************************/
 337
 338 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
 339                                           struct lock_ref_count_key *tmp)
 340 {
 341         ZERO_STRUCTP(tmp);
 342         tmp->id = fsp->file_id;
 343         tmp->r = 'r';
 344         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
 345 }
 346
 347 /*******************************************************************
 348  Convenience function to get an fd_array key from an fsp.
 349 ******************************************************************/
 350
 351 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
 352 {
 353         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
 354 }
 355
 356 /*******************************************************************
 357  Create the in-memory POSIX lock databases.
 358 ********************************************************************/
 359
 360 bool posix_locking_init(bool read_only)
 361 {
 362         if (posix_pending_close_db != NULL) {
 363                 return true;
 364         }
 365
 366         posix_pending_close_db = db_open_rbt(NULL);
 367
 368         if (posix_pending_close_db == NULL) {
 369                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
 370                 return false;
 371         }
 372
 373         return true;
 374 }
 375
 376 /*******************************************************************
 377  Delete the in-memory POSIX lock databases.
 378 ********************************************************************/
 379
 380 bool posix_locking_end(void)
 381 {
 382         /*
 383          * Shouldn't we close all fd's here?
 384          */
 385         TALLOC_FREE(posix_pending_close_db);
 386         return true;
 387 }
 388
 389 /****************************************************************************
 390  Next - the functions that deal with storing fd's that have outstanding
 391  POSIX locks when closed.
 392 ****************************************************************************/
 393
 394 /****************************************************************************
 395  The records in posix_pending_close_db are composed of an array of
 396  ints keyed by dev/ino pair. Those ints are the fd's that were open on
 397  this dev/ino pair that should have been closed, but can't as the lock
 398  ref count is non zero.
 399 ****************************************************************************/
 400
 401 /****************************************************************************
 402  Keep a reference count of the number of Windows locks open on this dev/ino
 403  pair. Creates entry if it doesn't exist.
 404 ****************************************************************************/
 405
 406 static void increment_windows_lock_ref_count(files_struct *fsp)
 407 {
 408         struct lock_ref_count_key tmp;
 409         int32_t lock_ref_count = 0;
 410         NTSTATUS status;
 411
 412         status = dbwrap_change_int32_atomic(
 413                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 414                 &lock_ref_count, 1);
 415
 416         SMB_ASSERT(NT_STATUS_IS_OK(status));
 417         SMB_ASSERT(lock_ref_count < INT32_MAX);
 418
 419         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
 420                   fsp_str_dbg(fsp), (int)lock_ref_count));
 421 }
 422
 423 /****************************************************************************
 424  Bulk delete - subtract as many locks as we've just deleted.
 425 ****************************************************************************/
 426
 427 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
 428 {
 429         struct lock_ref_count_key tmp;
 430         int32_t lock_ref_count = 0;
 431         NTSTATUS status;
 432
 433         status = dbwrap_change_int32_atomic(
 434                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 435                 &lock_ref_count, -dcount);
 436
 437         SMB_ASSERT(NT_STATUS_IS_OK(status));
 438         SMB_ASSERT(lock_ref_count >= 0);
 439
 440         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
 441                   fsp_str_dbg(fsp), (int)lock_ref_count));
 442 }
 443
 444 static void decrement_windows_lock_ref_count(files_struct *fsp)
 445 {
 446         reduce_windows_lock_ref_count(fsp, 1);
 447 }
 448
 449 /****************************************************************************
 450  Fetch the lock ref count.
 451 ****************************************************************************/
 452
 453 static int32_t get_windows_lock_ref_count(files_struct *fsp)
 454 {
 455         struct lock_ref_count_key tmp;
 456         NTSTATUS status;
 457         int32_t lock_ref_count = 0;
 458
 459         status = dbwrap_fetch_int32(
 460                 posix_pending_close_db, locking_ref_count_key_fsp(fsp, &tmp),
 461                 &lock_ref_count);
 462
 463         if (!NT_STATUS_IS_OK(status) &&
 464             !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
 465                 DEBUG(0, ("get_windows_lock_ref_count: Error fetching "
 466                           "lock ref count for file %s: %s\n",
 467                           fsp_str_dbg(fsp), nt_errstr(status)));
 468         }
 469         return lock_ref_count;
 470 }
 471
 472 /****************************************************************************
 473  Delete a lock_ref_count entry.
 474 ****************************************************************************/
 475
 476 static void delete_windows_lock_ref_count(files_struct *fsp)
 477 {
 478         struct lock_ref_count_key tmp;
 479
 480         /* Not a bug if it doesn't exist - no locks were ever granted. */
 481
 482         dbwrap_delete(posix_pending_close_db,
 483                       locking_ref_count_key_fsp(fsp, &tmp));
 484
 485         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
 486                   fsp_str_dbg(fsp)));
 487 }
 488
 489 /****************************************************************************
 490  Add an fd to the pending close tdb.
 491 ****************************************************************************/
 492
 493 static void add_fd_to_close_entry(files_struct *fsp)
 494 {
 495         struct db_record *rec;
 496         int *fds;
 497         size_t num_fds;
 498         NTSTATUS status;
 499         TDB_DATA value;
 500
 501         rec = dbwrap_fetch_locked(
 502                 posix_pending_close_db, talloc_tos(),
 503                 fd_array_key_fsp(fsp));
 504
 505         SMB_ASSERT(rec != NULL);
 506
 507         value = dbwrap_record_get_value(rec);
 508         SMB_ASSERT((value.dsize % sizeof(int)) == 0);
 509
 510         num_fds = value.dsize / sizeof(int);
 511         fds = talloc_array(rec, int, num_fds+1);
 512
 513         SMB_ASSERT(fds != NULL);
 514
 515         memcpy(fds, value.dptr, value.dsize);
 516         fds[num_fds] = fsp->fh->fd;
 517
 518         status = dbwrap_record_store(
 519                 rec, make_tdb_data((uint8_t *)fds, talloc_get_size(fds)), 0);
 520
 521         SMB_ASSERT(NT_STATUS_IS_OK(status));
 522
 523         TALLOC_FREE(rec);
 524
 525         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
 526                   fsp->fh->fd, fsp_str_dbg(fsp)));
 527 }
 528
 529 /****************************************************************************
 530  Remove all fd entries for a specific dev/inode pair from the tdb.
 531 ****************************************************************************/
 532
 533 static void delete_close_entries(files_struct *fsp)
 534 {
 535         struct db_record *rec;
 536
 537         rec = dbwrap_fetch_locked(
 538                 posix_pending_close_db, talloc_tos(),
 539                 fd_array_key_fsp(fsp));
 540
 541         SMB_ASSERT(rec != NULL);
 542         dbwrap_record_delete(rec);
 543         TALLOC_FREE(rec);
 544 }
 545
 546 /****************************************************************************
 547  Get the array of POSIX pending close records for an open fsp. Returns number
 548  of entries.
 549 ****************************************************************************/
 550
 551 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
 552                                               files_struct *fsp, int **entries)
 553 {
 554         TDB_DATA dbuf;
 555         NTSTATUS status;
 556
 557         status = dbwrap_fetch(
 558                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
 559                 &dbuf);
 560
 561         if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
 562                 *entries = NULL;
 563                 return 0;
 564         }
 565
 566         SMB_ASSERT(NT_STATUS_IS_OK(status));
 567
 568         if (dbuf.dsize == 0) {
 569                 *entries = NULL;
 570                 return 0;
 571         }
 572
 573         *entries = (int *)dbuf.dptr;
 574         return (size_t)(dbuf.dsize / sizeof(int));
 575 }
 576
 577 /****************************************************************************
 578  Deal with pending closes needed by POSIX locking support.
 579  Note that posix_locking_close_file() is expected to have been called
 580  to delete all locks on this fsp before this function is called.
 581 ****************************************************************************/
 582
 583 int fd_close_posix(struct files_struct *fsp)
 584 {
 585         int saved_errno = 0;
 586         int ret;
 587         int *fd_array = NULL;
 588         size_t count, i;
 589
 590         if (!lp_locking(fsp->conn->params) ||
 591             !lp_posix_locking(fsp->conn->params))
 592         {
 593                 /*
 594                  * No locking or POSIX to worry about or we want POSIX semantics
 595                  * which will lose all locks on all fd's open on this dev/inode,
 596                  * just close.
 597                  */
 598                 return close(fsp->fh->fd);
 599         }
 600
 601         if (get_windows_lock_ref_count(fsp)) {
 602
 603                 /*
 604                  * There are outstanding locks on this dev/inode pair on
 605                  * other fds. Add our fd to the pending close tdb and set
 606                  * fsp->fh->fd to -1.
 607                  */
 608
 609                 add_fd_to_close_entry(fsp);
 610                 return 0;
 611         }
 612
 613         /*
 614          * No outstanding locks. Get the pending close fd's
 615          * from the tdb and close them all.
 616          */
 617
 618         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
 619
 620         if (count) {
 621                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
 622                           (unsigned int)count));
 623
 624                 for(i = 0; i < count; i++) {
 625                         if (close(fd_array[i]) == -1) {
 626                                 saved_errno = errno;
 627                         }
 628                 }
 629
 630                 /*
 631                  * Delete all fd's stored in the tdb
 632                  * for this dev/inode pair.
 633                  */
 634
 635                 delete_close_entries(fsp);
 636         }
 637
 638         TALLOC_FREE(fd_array);
 639
 640         /* Don't need a lock ref count on this dev/ino anymore. */
 641         delete_windows_lock_ref_count(fsp);
 642
 643         /*
 644          * Finally close the fd associated with this fsp.
 645          */
 646
 647         ret = close(fsp->fh->fd);
 648
 649         if (ret == 0 && saved_errno != 0) {
 650                 errno = saved_errno;
 651                 ret = -1;
 652         }
 653
 654         return ret;
 655 }
 656
 657 /****************************************************************************
 658  Next - the functions that deal with the mapping CIFS Windows locks onto
 659  the underlying system POSIX locks.
 660 ****************************************************************************/
 661
 662 /*
 663  * Structure used when splitting a lock range
 664  * into a POSIX lock range. Doubly linked list.
 665  */
 666
 667 struct lock_list {
 668         struct lock_list *next;
 669         struct lock_list *prev;
 670         off_t start;
 671         off_t size;
 672 };
 673
 674 /****************************************************************************
 675  Create a list of lock ranges that don't overlap a given range. Used in calculating
 676  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
 677  understand it :-).
 678 ****************************************************************************/
 679
 680 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
 681                                                 struct lock_list *lhead,
 682                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
 683                                                 files_struct *fsp,
 684                                                 const struct lock_struct *plocks,
 685                                                 int num_locks)
 686 {
 687         int i;
 688
 689         /*
 690          * Check the current lock list on this dev/inode pair.
 691          * Quit if the list is deleted.
 692          */
 693
 694         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
 695                 (double)lhead->start, (double)lhead->size ));
 696
 697         for (i=0; i<num_locks && lhead; i++) {
 698                 const struct lock_struct *lock = &plocks[i];
 699                 struct lock_list *l_curr;
 700
 701                 /* Ignore all but read/write locks. */
 702                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
 703                         continue;
 704                 }
 705
 706                 /* Ignore locks not owned by this process. */
 707                 if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
 708                         continue;
 709                 }
 710
 711                 /*
 712                  * Walk the lock list, checking for overlaps. Note that
 713                  * the lock list can expand within this loop if the current
 714                  * range being examined needs to be split.
 715                  */
 716
 717                 for (l_curr = lhead; l_curr;) {
 718
 719                         DEBUG(10,("posix_lock_list: lock: fnum=%llu: start=%.0f,size=%.0f:type=%s",
 720                                 (unsigned long long)lock->fnum,
 721                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
 722
 723                         if ( (l_curr->start >= (lock->start + lock->size)) ||
 724                                  (lock->start >= (l_curr->start + l_curr->size))) {
 725
 726                                 /* No overlap with existing lock - leave this range alone. */
 727 /*********************************************
 728                                              +---------+
 729                                              | l_curr  |
 730                                              +---------+
 731                                 +-------+
 732                                 | lock  |
 733                                 +-------+
 734 OR....
 735              +---------+
 736              |  l_curr |
 737              +---------+
 738 **********************************************/
 739
 740                                 DEBUG(10,(" no overlap case.\n" ));
 741
 742                                 l_curr = l_curr->next;
 743
 744                         } else if ( (l_curr->start >= lock->start) &&
 745                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 746
 747                                 /*
 748                                  * This range is completely overlapped by this existing lock range
 749                                  * and thus should have no effect. Delete it from the list.
 750                                  */
 751 /*********************************************
 752                 +---------+
 753                 |  l_curr |
 754                 +---------+
 755         +---------------------------+
 756         |       lock                |
 757         +---------------------------+
 758 **********************************************/
 759                                 /* Save the next pointer */
 760                                 struct lock_list *ul_next = l_curr->next;
 761
 762                                 DEBUG(10,(" delete case.\n" ));
 763
 764                                 DLIST_REMOVE(lhead, l_curr);
 765                                 if(lhead == NULL) {
 766                                         break; /* No more list... */
 767                                 }
 768
 769                                 l_curr = ul_next;
 770
 771                         } else if ( (l_curr->start >= lock->start) &&
 772                                                 (l_curr->start < lock->start + lock->size) &&
 773                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 774
 775                                 /*
 776                                  * This range overlaps the existing lock range at the high end.
 777                                  * Truncate by moving start to existing range end and reducing size.
 778                                  */
 779 /*********************************************
 780                 +---------------+
 781                 |  l_curr       |
 782                 +---------------+
 783         +---------------+
 784         |    lock       |
 785         +---------------+
 786 BECOMES....
 787                         +-------+
 788                         | l_curr|
 789                         +-------+
 790 **********************************************/
 791
 792                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
 793                                 l_curr->start = lock->start + lock->size;
 794
 795                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
 796                                                                 (double)l_curr->start, (double)l_curr->size ));
 797
 798                                 l_curr = l_curr->next;
 799
 800                         } else if ( (l_curr->start < lock->start) &&
 801                                                 (l_curr->start + l_curr->size > lock->start) &&
 802                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 803
 804                                 /*
 805                                  * This range overlaps the existing lock range at the low end.
 806                                  * Truncate by reducing size.
 807                                  */
 808 /*********************************************
 809    +---------------+
 810    |  l_curr       |
 811    +---------------+
 812            +---------------+
 813            |    lock       |
 814            +---------------+
 815 BECOMES....
 816    +-------+
 817    | l_curr|
 818    +-------+
 819 **********************************************/
 820
 821                                 l_curr->size = lock->start - l_curr->start;
 822
 823                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
 824                                                                 (double)l_curr->start, (double)l_curr->size ));
 825
 826                                 l_curr = l_curr->next;
 827
 828                         } else if ( (l_curr->start < lock->start) &&
 829                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 830                                 /*
 831                                  * Worst case scenario. Range completely overlaps an existing
 832                                  * lock range. Split the request into two, push the new (upper) request
 833                                  * into the dlink list, and continue with the entry after l_new (as we
 834                                  * know that l_new will not overlap with this lock).
 835                                  */
 836 /*********************************************
 837         +---------------------------+
 838         |        l_curr             |
 839         +---------------------------+
 840                 +---------+
 841                 | lock    |
 842                 +---------+
 843 BECOMES.....
 844         +-------+         +---------+
 845         | l_curr|         | l_new   |
 846         +-------+         +---------+
 847 **********************************************/
 848                                 struct lock_list *l_new = talloc(ctx, struct lock_list);
 849
 850                                 if(l_new == NULL) {
 851                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
 852                                         return NULL; /* The talloc_destroy takes care of cleanup. */
 853                                 }
 854
 855                                 ZERO_STRUCTP(l_new);
 856                                 l_new->start = lock->start + lock->size;
 857                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
 858
 859                                 /* Truncate the l_curr. */
 860                                 l_curr->size = lock->start - l_curr->start;
 861
 862                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
 863 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
 864                                                                 (double)l_new->start, (double)l_new->size ));
 865
 866                                 /*
 867                                  * Add into the dlink list after the l_curr point - NOT at lhead.
 868                                  */
 869                                 DLIST_ADD_AFTER(lhead, l_new, l_curr);
 870
 871                                 /* And move after the link we added. */
 872                                 l_curr = l_new->next;
 873
 874                         } else {
 875
 876                                 /*
 877                                  * This logic case should never happen. Ensure this is the
 878                                  * case by forcing an abort.... Remove in production.
 879                                  */
 880                                 char *msg = NULL;
 881
 882                                 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
 883 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
 884                                         smb_panic(msg);
 885                                 } else {
 886                                         smb_panic("posix_lock_list");
 887                                 }
 888                         }
 889                 } /* end for ( l_curr = lhead; l_curr;) */
 890         } /* end for (i=0; i<num_locks && ul_head; i++) */
 891
 892         return lhead;
 893 }
 894
 895 /****************************************************************************
 896  POSIX function to acquire a lock. Returns True if the
 897  lock could be granted, False if not.
 898 ****************************************************************************/
 899
 900 bool set_posix_lock_windows_flavour(files_struct *fsp,
 901                         uint64_t u_offset,
 902                         uint64_t u_count,
 903                         enum brl_type lock_type,
 904                         const struct lock_context *lock_ctx,
 905                         const struct lock_struct *plocks,
 906                         int num_locks,
 907                         int *errno_ret)
 908 {
 909         off_t offset;
 910         off_t count;
 911         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 912         bool ret = True;
 913         size_t lock_count;
 914         TALLOC_CTX *l_ctx = NULL;
 915         struct lock_list *llist = NULL;
 916         struct lock_list *ll = NULL;
 917
 918         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
 919                  "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
 920                  (double)u_offset, (double)u_count,
 921                  posix_lock_type_name(lock_type)));
 922
 923         /*
 924          * If the requested lock won't fit in the POSIX range, we will
 925          * pretend it was successful.
 926          */
 927
 928         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
 929                 increment_windows_lock_ref_count(fsp);
 930                 return True;
 931         }
 932
 933         /*
 934          * Windows is very strange. It allows read locks to be overlayed
 935          * (even over a write lock), but leaves the write lock in force until the first
 936          * unlock. It also reference counts the locks. This means the following sequence :
 937          *
 938          * process1                                      process2
 939          * ------------------------------------------------------------------------
 940          * WRITE LOCK : start = 2, len = 10
 941          *                                            READ LOCK: start =0, len = 10 - FAIL
 942          * READ LOCK : start = 0, len = 14
 943          *                                            READ LOCK: start =0, len = 10 - FAIL
 944          * UNLOCK : start = 2, len = 10
 945          *                                            READ LOCK: start =0, len = 10 - OK
 946          *
 947          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
 948          * would leave a single read lock over the 0-14 region.
 949          */
 950
 951         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
 952                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
 953                 return False;
 954         }
 955
 956         if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
 957                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
 958                 talloc_destroy(l_ctx);
 959                 return False;
 960         }
 961
 962         /*
 963          * Create the initial list entry containing the
 964          * lock we want to add.
 965          */
 966
 967         ZERO_STRUCTP(ll);
 968         ll->start = offset;
 969         ll->size = count;
 970
 971         DLIST_ADD(llist, ll);
 972
 973         /*
 974          * The following call calculates if there are any
 975          * overlapping locks held by this process on
 976          * fd's open on the same file and splits this list
 977          * into a list of lock ranges that do not overlap with existing
 978          * POSIX locks.
 979          */
 980
 981         llist = posix_lock_list(l_ctx,
 982                                 llist,
 983                                 lock_ctx, /* Lock context llist belongs to. */
 984                                 fsp,
 985                                 plocks,
 986                                 num_locks);
 987
 988         /*
 989          * Add the POSIX locks on the list of ranges returned.
 990          * As the lock is supposed to be added atomically, we need to
 991          * back out all the locks if any one of these calls fail.
 992          */
 993
 994         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
 995                 offset = ll->start;
 996                 count = ll->size;
 997
 998                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
 999                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1000
1001                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1002                         *errno_ret = errno;
1003                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1004                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1005                         ret = False;
1006                         break;
1007                 }
1008         }
1009
1010         if (!ret) {
1011
1012                 /*
1013                  * Back out all the POSIX locks we have on fail.
1014                  */
1015
1016                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1017                         offset = ll->start;
1018                         count = ll->size;
1019
1020                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1021                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1022
1023                         posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1024                 }
1025         } else {
1026                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1027                 increment_windows_lock_ref_count(fsp);
1028         }
1029
1030         talloc_destroy(l_ctx);
1031         return ret;
1032 }
1033
1034 /****************************************************************************
1035  POSIX function to release a lock. Returns True if the
1036  lock could be released, False if not.
1037 ****************************************************************************/
1038
1039 bool release_posix_lock_windows_flavour(files_struct *fsp,
1040                                 uint64_t u_offset,
1041                                 uint64_t u_count,
1042                                 enum brl_type deleted_lock_type,
1043                                 const struct lock_context *lock_ctx,
1044                                 const struct lock_struct *plocks,
1045                                 int num_locks)
1046 {
1047         off_t offset;
1048         off_t count;
1049         bool ret = True;
1050         TALLOC_CTX *ul_ctx = NULL;
1051         struct lock_list *ulist = NULL;
1052         struct lock_list *ul = NULL;
1053
1054         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1055                  "count = %.0f\n", fsp_str_dbg(fsp),
1056                  (double)u_offset, (double)u_count));
1057
1058         /* Remember the number of Windows locks we have on this dev/ino pair. */
1059         decrement_windows_lock_ref_count(fsp);
1060
1061         /*
1062          * If the requested lock won't fit in the POSIX range, we will
1063          * pretend it was successful.
1064          */
1065
1066         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1067                 return True;
1068         }
1069
1070         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1071                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1072                 return False;
1073         }
1074
1075         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1076                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1077                 talloc_destroy(ul_ctx);
1078                 return False;
1079         }
1080
1081         /*
1082          * Create the initial list entry containing the
1083          * lock we want to remove.
1084          */
1085
1086         ZERO_STRUCTP(ul);
1087         ul->start = offset;
1088         ul->size = count;
1089
1090         DLIST_ADD(ulist, ul);
1091
1092         /*
1093          * The following call calculates if there are any
1094          * overlapping locks held by this process on
1095          * fd's open on the same file and creates a
1096          * list of unlock ranges that will allow
1097          * POSIX lock ranges to remain on the file whilst the
1098          * unlocks are performed.
1099          */
1100
1101         ulist = posix_lock_list(ul_ctx,
1102                                 ulist,
1103                                 lock_ctx, /* Lock context ulist belongs to. */
1104                                 fsp,
1105                                 plocks,
1106                                 num_locks);
1107
1108         /*
1109          * If there were any overlapped entries (list is > 1 or size or start have changed),
1110          * and the lock_type we just deleted from
1111          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1112          * the POSIX lock to a read lock. This allows any overlapping read locks
1113          * to be atomically maintained.
1114          */
1115
1116         if (deleted_lock_type == WRITE_LOCK &&
1117                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1118
1119                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1120                         (double)offset, (double)count ));
1121
1122                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1123                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1124                         talloc_destroy(ul_ctx);
1125                         return False;
1126                 }
1127         }
1128
1129         /*
1130          * Release the POSIX locks on the list of ranges returned.
1131          */
1132
1133         for(; ulist; ulist = ulist->next) {
1134                 offset = ulist->start;
1135                 count = ulist->size;
1136
1137                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1138                         (double)offset, (double)count ));
1139
1140                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1141                         ret = False;
1142                 }
1143         }
1144
1145         talloc_destroy(ul_ctx);
1146         return ret;
1147 }
1148
1149 /****************************************************************************
1150  Next - the functions that deal with mapping CIFS POSIX locks onto
1151  the underlying system POSIX locks.
1152 ****************************************************************************/
1153
1154 /****************************************************************************
1155  POSIX function to acquire a lock. Returns True if the
1156  lock could be granted, False if not.
1157  As POSIX locks don't stack or conflict (they just overwrite)
1158  we can map the requested lock directly onto a system one. We
1159  know it doesn't conflict with locks on other contexts as the
1160  upper layer would have refused it.
1161 ****************************************************************************/
1162
1163 bool set_posix_lock_posix_flavour(files_struct *fsp,
1164                         uint64_t u_offset,
1165                         uint64_t u_count,
1166                         enum brl_type lock_type,
1167                         int *errno_ret)
1168 {
1169         off_t offset;
1170         off_t count;
1171         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1172
1173         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count "
1174                  "= %.0f, type = %s\n", fsp_str_dbg(fsp),
1175                  (double)u_offset, (double)u_count,
1176                  posix_lock_type_name(lock_type)));
1177
1178         /*
1179          * If the requested lock won't fit in the POSIX range, we will
1180          * pretend it was successful.
1181          */
1182
1183         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1184                 return True;
1185         }
1186
1187         if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1188                 *errno_ret = errno;
1189                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1190                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1191                 return False;
1192         }
1193         return True;
1194 }
1195
1196 /****************************************************************************
1197  POSIX function to release a lock. Returns True if the
1198  lock could be released, False if not.
1199  We are given a complete lock state from the upper layer which is what the lock
1200  state should be after the unlock has already been done, so what
1201  we do is punch out holes in the unlock range where locks owned by this process
1202  have a different lock context.
1203 ****************************************************************************/
1204
1205 bool release_posix_lock_posix_flavour(files_struct *fsp,
1206                                 uint64_t u_offset,
1207                                 uint64_t u_count,
1208                                 const struct lock_context *lock_ctx,
1209                                 const struct lock_struct *plocks,
1210                                 int num_locks)
1211 {
1212         bool ret = True;
1213         off_t offset;
1214         off_t count;
1215         TALLOC_CTX *ul_ctx = NULL;
1216         struct lock_list *ulist = NULL;
1217         struct lock_list *ul = NULL;
1218
1219         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1220                  "count = %.0f\n", fsp_str_dbg(fsp),
1221                  (double)u_offset, (double)u_count));
1222
1223         /*
1224          * If the requested lock won't fit in the POSIX range, we will
1225          * pretend it was successful.
1226          */
1227
1228         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1229                 return True;
1230         }
1231
1232         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1233                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1234                 return False;
1235         }
1236
1237         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1238                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1239                 talloc_destroy(ul_ctx);
1240                 return False;
1241         }
1242
1243         /*
1244          * Create the initial list entry containing the
1245          * lock we want to remove.
1246          */
1247
1248         ZERO_STRUCTP(ul);
1249         ul->start = offset;
1250         ul->size = count;
1251
1252         DLIST_ADD(ulist, ul);
1253
1254         /*
1255          * Walk the given array creating a linked list
1256          * of unlock requests.
1257          */
1258
1259         ulist = posix_lock_list(ul_ctx,
1260                                 ulist,
1261                                 lock_ctx, /* Lock context ulist belongs to. */
1262                                 fsp,
1263                                 plocks,
1264                                 num_locks);
1265
1266         /*
1267          * Release the POSIX locks on the list of ranges returned.
1268          */
1269
1270         for(; ulist; ulist = ulist->next) {
1271                 offset = ulist->start;
1272                 count = ulist->size;
1273
1274                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1275                         (double)offset, (double)count ));
1276
1277                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1278                         ret = False;
1279                 }
1280         }
1281
1282         talloc_destroy(ul_ctx);
1283         return ret;
1284 }