source/locking/posix.c

   1 /*
   2    Unix SMB/Netbios implementation.
   3    Version 3.0
   4    Locking functions
   5    Copyright (C) Jeremy Allison 1992-2000
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software
  19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20
  21    Revision History:
  22
  23    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
  24 */
  25
  26 #include "includes.h"
  27
  28 /*
  29  * The POSIX locking database handle.
  30  */
  31
  32 static TDB_CONTEXT *posix_lock_tdb;
  33
  34 /*
  35  * The pending close database handle.
  36  */
  37
  38 static TDB_CONTEXT *posix_pending_close_tdb;
  39
  40 /*
  41  * The data in POSIX lock records is an unsorted linear array of these
  42  * records.  It is unnecessary to store the count as tdb provides the
  43  * size of the record.
  44  */
  45
  46 struct posix_lock {
  47         int fd;
  48         SMB_OFF_T start;
  49         SMB_OFF_T size;
  50         int lock_type;
  51 };
  52
  53 /*
  54  * The data in POSIX pending close records is an unsorted linear array of int
  55  * records.  It is unnecessary to store the count as tdb provides the
  56  * size of the record.
  57  */
  58
  59 /* The key used in both the POSIX databases. */
  60
  61 struct posix_lock_key {
  62         SMB_DEV_T device;
  63         SMB_INO_T inode;
  64 };
  65
  66 /*******************************************************************
  67  Form a static locking key for a dev/inode pair.
  68 ******************************************************************/
  69
  70 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
  71 {
  72         static struct posix_lock_key key;
  73         TDB_DATA kbuf;
  74
  75         memset(&key, '\0', sizeof(key));
  76         key.device = dev;
  77         key.inode = inode;
  78         kbuf.dptr = (char *)&key;
  79         kbuf.dsize = sizeof(key);
  80         return kbuf;
  81 }
  82
  83 /*******************************************************************
  84  Convenience function to get a key from an fsp.
  85 ******************************************************************/
  86
  87 static TDB_DATA locking_key_fsp(files_struct *fsp)
  88 {
  89         return locking_key(fsp->dev, fsp->inode);
  90 }
  91
  92 /****************************************************************************
  93  Add an fd to the pending close tdb.
  94 ****************************************************************************/
  95
  96 static BOOL add_fd_to_close_entry(files_struct *fsp)
  97 {
  98         TDB_DATA kbuf = locking_key_fsp(fsp);
  99         TDB_DATA dbuf;
 100         char *tp;
 101
 102         dbuf.dptr = NULL;
 103
 104         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
 105
 106         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
 107         if (!tp) {
 108                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
 109                 SAFE_FREE(dbuf.dptr);
 110                 return False;
 111         } else
 112                 dbuf.dptr = tp;
 113
 114         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
 115         dbuf.dsize += sizeof(int);
 116
 117         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
 118                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
 119         }
 120
 121         SAFE_FREE(dbuf.dptr);
 122         return True;
 123 }
 124
 125 /****************************************************************************
 126  Remove all fd entries for a specific dev/inode pair from the tdb.
 127 ****************************************************************************/
 128
 129 static void delete_close_entries(files_struct *fsp)
 130 {
 131         TDB_DATA kbuf = locking_key_fsp(fsp);
 132
 133         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
 134                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
 135 }
 136
 137 /****************************************************************************
 138  Get the array of POSIX pending close records for an open fsp. Caller must
 139  free. Returns number of entries.
 140 ****************************************************************************/
 141
 142 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
 143 {
 144         TDB_DATA kbuf = locking_key_fsp(fsp);
 145         TDB_DATA dbuf;
 146         size_t count = 0;
 147
 148         *entries = NULL;
 149         dbuf.dptr = NULL;
 150
 151         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
 152
 153     if (!dbuf.dptr) {
 154                 return 0;
 155         }
 156
 157         *entries = (int *)dbuf.dptr;
 158         count = (size_t)(dbuf.dsize / sizeof(int));
 159
 160         return count;
 161 }
 162
 163 /****************************************************************************
 164  Get the array of POSIX locks for an fsp. Caller must free. Returns
 165  number of entries.
 166 ****************************************************************************/
 167
 168 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
 169 {
 170         TDB_DATA kbuf = locking_key_fsp(fsp);
 171         TDB_DATA dbuf;
 172         size_t count = 0;
 173
 174         *entries = NULL;
 175
 176         dbuf.dptr = NULL;
 177
 178         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 179
 180     if (!dbuf.dptr) {
 181                 return 0;
 182         }
 183
 184         *entries = (struct posix_lock *)dbuf.dptr;
 185         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 186
 187         return count;
 188 }
 189
 190 /****************************************************************************
 191  Deal with pending closes needed by POSIX locking support.
 192  Note that posix_locking_close_file() is expected to have been called
 193  to delete all locks on this fsp before this function is called.
 194 ****************************************************************************/
 195
 196 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
 197 {
 198         int saved_errno = 0;
 199         int ret;
 200         size_t count, i;
 201         struct posix_lock *entries = NULL;
 202         int *fd_array = NULL;
 203         BOOL locks_on_other_fds = False;
 204
 205         if (!lp_posix_locking(SNUM(conn))) {
 206                 /*
 207                  * No POSIX to worry about, just close.
 208                  */
 209                 ret = conn->vfs_ops.close(fsp,fsp->fd);
 210                 fsp->fd = -1;
 211                 return ret;
 212         }
 213
 214         /*
 215          * Get the number of outstanding POSIX locks on this dev/inode pair.
 216          */
 217
 218         count = get_posix_lock_entries(fsp, &entries);
 219
 220         /*
 221          * Check if there are any outstanding locks belonging to
 222          * other fd's. This should never be the case if posix_locking_close_file()
 223          * has been called first, but it never hurts to be *sure*.
 224          */
 225
 226         for (i = 0; i < count; i++) {
 227                 if (entries[i].fd != fsp->fd) {
 228                         locks_on_other_fds = True;
 229                         break;
 230                 }
 231         }
 232
 233         if (locks_on_other_fds) {
 234
 235                 /*
 236                  * There are outstanding locks on this dev/inode pair on other fds.
 237                  * Add our fd to the pending close tdb and set fsp->fd to -1.
 238                  */
 239
 240                 if (!add_fd_to_close_entry(fsp)) {
 241                         SAFE_FREE(entries);
 242                         return False;
 243                 }
 244
 245                 SAFE_FREE(entries);
 246                 fsp->fd = -1;
 247                 return 0;
 248         }
 249
 250         SAFE_FREE(entries);
 251
 252         /*
 253          * No outstanding POSIX locks. Get the pending close fd's
 254          * from the tdb and close them all.
 255          */
 256
 257         count = get_posix_pending_close_entries(fsp, &fd_array);
 258
 259         if (count) {
 260                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
 261
 262                 for(i = 0; i < count; i++) {
 263                         if (conn->vfs_ops.close(fsp,fd_array[i]) == -1) {
 264                                 saved_errno = errno;
 265                         }
 266                 }
 267
 268                 /*
 269                  * Delete all fd's stored in the tdb
 270                  * for this dev/inode pair.
 271                  */
 272
 273                 delete_close_entries(fsp);
 274         }
 275
 276         SAFE_FREE(fd_array);
 277
 278         /*
 279          * Finally close the fd associated with this fsp.
 280          */
 281
 282         ret = conn->vfs_ops.close(fsp,fsp->fd);
 283
 284         if (saved_errno != 0) {
 285         errno = saved_errno;
 286                 ret = -1;
 287     }
 288
 289         fsp->fd = -1;
 290
 291         return ret;
 292 }
 293
 294 /****************************************************************************
 295  Debugging aid :-).
 296 ****************************************************************************/
 297
 298 static const char *posix_lock_type_name(int lock_type)
 299 {
 300         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
 301 }
 302
 303 /****************************************************************************
 304  Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
 305  then the POSIX fcntl lock fails.
 306 ****************************************************************************/
 307
 308 static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
 309 {
 310         TDB_DATA kbuf = locking_key_fsp(fsp);
 311         TDB_DATA dbuf;
 312         struct posix_lock *locks;
 313         size_t count;
 314
 315         dbuf.dptr = NULL;
 316
 317         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 318
 319         if (!dbuf.dptr) {
 320                 DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
 321                 goto fail;
 322         }
 323
 324         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 325         locks = (struct posix_lock *)dbuf.dptr;
 326
 327         if (count == 1) {
 328                 tdb_delete(posix_lock_tdb, kbuf);
 329         } else {
 330                 if (entry < count-1) {
 331                         memmove(&locks[entry], &locks[entry+1], sizeof(*locks)*((count-1) - entry));
 332                 }
 333                 dbuf.dsize -= sizeof(*locks);
 334                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
 335         }
 336
 337         SAFE_FREE(dbuf.dptr);
 338
 339         return True;
 340
 341  fail:
 342     SAFE_FREE(dbuf.dptr);
 343     return False;
 344 }
 345
 346 /****************************************************************************
 347  Add an entry into the POSIX locking tdb. We return the index number of the
 348  added lock (used in case we need to delete *exactly* this entry). Returns
 349  False on fail, True on success.
 350 ****************************************************************************/
 351
 352 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
 353 {
 354         TDB_DATA kbuf = locking_key_fsp(fsp);
 355         TDB_DATA dbuf;
 356         struct posix_lock pl;
 357         char *tp;
 358
 359         dbuf.dptr = NULL;
 360
 361         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 362
 363         *pentry_num = (size_t)(dbuf.dsize / sizeof(pl));
 364
 365         /*
 366          * Add new record.
 367          */
 368
 369         pl.fd = fsp->fd;
 370         pl.start = start;
 371         pl.size = size;
 372         pl.lock_type = lock_type;
 373
 374         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
 375         if (!tp) {
 376                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
 377                 goto fail;
 378         } else
 379                 dbuf.dptr = tp;
 380
 381         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
 382         dbuf.dsize += sizeof(pl);
 383
 384         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
 385                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
 386                 goto fail;
 387         }
 388
 389     SAFE_FREE(dbuf.dptr);
 390
 391         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
 392                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
 393                         (double)fsp->dev, (double)fsp->inode ));
 394
 395     return True;
 396
 397  fail:
 398     SAFE_FREE(dbuf.dptr);
 399     return False;
 400 }
 401
 402 /****************************************************************************
 403  Calculate if locks have any overlap at all.
 404 ****************************************************************************/
 405
 406 static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
 407 {
 408         if (start1 >= start2 && start1 <= start2 + size2)
 409                 return True;
 410
 411         if (start1 < start2 && start1 + size1 > start2)
 412                 return True;
 413
 414         return False;
 415 }
 416
 417 /****************************************************************************
 418  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
 419  deleted and the number of records that are overlapped by this one, or -1 on error.
 420 ****************************************************************************/
 421
 422 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
 423 {
 424         TDB_DATA kbuf = locking_key_fsp(fsp);
 425         TDB_DATA dbuf;
 426         struct posix_lock *locks;
 427         size_t i, count;
 428         BOOL found = False;
 429         int num_overlapping_records = 0;
 430
 431         dbuf.dptr = NULL;
 432
 433         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 434
 435         if (!dbuf.dptr) {
 436                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
 437                 goto fail;
 438         }
 439
 440         /* There are existing locks - find a match. */
 441         locks = (struct posix_lock *)dbuf.dptr;
 442         count = (size_t)(dbuf.dsize / sizeof(*locks));
 443
 444         /*
 445          * Search for and delete the first record that matches the
 446          * unlock criteria.
 447          */
 448
 449         for (i=0; i<count; i++) {
 450                 struct posix_lock *entry = &locks[i];
 451
 452                 if (entry->fd == fsp->fd &&
 453                         entry->start == start &&
 454                         entry->size == size) {
 455
 456                         /* Make a copy if requested. */
 457                         if (pl)
 458                                 *pl = *entry;
 459
 460                         /* Found it - delete it. */
 461                         if (count == 1) {
 462                                 tdb_delete(posix_lock_tdb, kbuf);
 463                         } else {
 464                                 if (i < count-1) {
 465                                         memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
 466                                 }
 467                                 dbuf.dsize -= sizeof(*locks);
 468                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
 469                         }
 470                         count--;
 471                         found = True;
 472                         break;
 473                 }
 474         }
 475
 476         if (!found)
 477                 goto fail;
 478
 479         /*
 480          * Count the number of entries that are
 481          * overlapped by this unlock request.
 482          */
 483
 484         for (i = 0; i < count; i++) {
 485                 struct posix_lock *entry = &locks[i];
 486
 487                 if (fsp->fd == entry->fd &&
 488                         does_lock_overlap( start, size, entry->start, entry->size))
 489                                 num_overlapping_records++;
 490         }
 491
 492         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
 493                         posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
 494                                 (unsigned int)num_overlapping_records ));
 495
 496     SAFE_FREE(dbuf.dptr);
 497
 498         return num_overlapping_records;
 499
 500  fail:
 501     SAFE_FREE(dbuf.dptr);
 502     return -1;
 503 }
 504
 505 /****************************************************************************
 506  Utility function to map a lock type correctly depending on the open
 507  mode of a file.
 508 ****************************************************************************/
 509
 510 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
 511 {
 512         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
 513                 /*
 514                  * Many UNIX's cannot get a write lock on a file opened read-only.
 515                  * Win32 locking semantics allow this.
 516                  * Do the best we can and attempt a read-only lock.
 517                  */
 518                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
 519                 return F_RDLCK;
 520         } else if((lock_type == READ_LOCK) && !fsp->can_read) {
 521                 /*
 522                  * Ditto for read locks on write only files.
 523                  */
 524                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
 525                 return F_WRLCK;
 526         }
 527
 528   /*
 529    * This return should be the most normal, as we attempt
 530    * to always open files read/write.
 531    */
 532
 533   return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
 534 }
 535
 536 /****************************************************************************
 537  Check to see if the given unsigned lock range is within the possible POSIX
 538  range. Modifies the given args to be in range if possible, just returns
 539  False if not.
 540 ****************************************************************************/
 541
 542 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
 543                                                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
 544 {
 545         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
 546         SMB_OFF_T count = (SMB_OFF_T)u_count;
 547
 548         /*
 549          * For the type of system we are, attempt to
 550          * find the maximum positive lock offset as an SMB_OFF_T.
 551          */
 552
 553 #if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
 554
 555         /*
 556          * In this case SMB_OFF_T is 64 bits,
 557          * and the underlying system can handle 64 bit signed locks.
 558          */
 559
 560     SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
 561     SMB_OFF_T mask = (mask2<<1);
 562     SMB_OFF_T max_positive_lock_offset = ~mask;
 563
 564 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
 565
 566         /*
 567          * In this case either SMB_OFF_T is 32 bits,
 568          * or the underlying system cannot handle 64 bit signed locks.
 569          * All offsets & counts must be 2^31 or less.
 570          */
 571
 572     SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
 573
 574 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
 575
 576         /*
 577          * POSIX locks of length zero mean lock to end-of-file.
 578          * Win32 locks of length zero are point probes. Ignore
 579          * any Win32 locks of length zero. JRA.
 580          */
 581
 582         if (count == (SMB_OFF_T)0) {
 583                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
 584                 return False;
 585         }
 586
 587         /*
 588          * If the given offset was > max_positive_lock_offset then we cannot map this at all
 589          * ignore this lock.
 590          */
 591
 592         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
 593                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
 594                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
 595                 return False;
 596         }
 597
 598         /*
 599          * We must truncate the offset and count to less than max_positive_lock_offset.
 600          */
 601
 602         offset &= max_positive_lock_offset;
 603         count &= max_positive_lock_offset;
 604
 605
 606         /*
 607          * Deal with a very common case of count of all ones.
 608          * (lock entire file).
 609          */
 610
 611         if(count == (SMB_OFF_T)-1)
 612                 count = max_positive_lock_offset;
 613
 614         /*
 615          * Truncate count to end at max lock offset.
 616          */
 617
 618         if (offset + count < 0 || offset + count > max_positive_lock_offset)
 619                 count = max_positive_lock_offset - offset;
 620
 621         /*
 622          * If we ate all the count, ignore this lock.
 623          */
 624
 625         if (count == 0) {
 626                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
 627                                 (double)u_offset, (double)u_count ));
 628                 return False;
 629         }
 630
 631         /*
 632          * The mapping was successful.
 633          */
 634
 635         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
 636                         (double)offset, (double)count ));
 637
 638         *offset_out = offset;
 639         *count_out = count;
 640
 641         return True;
 642 }
 643
 644 /****************************************************************************
 645  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
 646  broken NFS implementations.
 647 ****************************************************************************/
 648
 649 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
 650 {
 651         int ret;
 652         struct connection_struct *conn = fsp->conn;
 653
 654         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
 655
 656         ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
 657
 658         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 659
 660                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
 661                                         (double)offset,(double)count));
 662                 DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
 663                 DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
 664
 665                 /*
 666                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 667                  * 32 bit NFS mounted filesystems. Just ignore it.
 668                  */
 669
 670                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
 671                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 672                         return True;
 673                 }
 674
 675                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
 676                         /* 32 bit NFS file system, retry with smaller offset */
 677                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 678                         errno = 0;
 679                         count &= 0x7fffffff;
 680                         ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
 681                 }
 682         }
 683
 684         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
 685
 686         return ret;
 687 }
 688
 689 /****************************************************************************
 690  POSIX function to see if a file region is locked. Returns True if the
 691  region is locked, False otherwise.
 692 ****************************************************************************/
 693
 694 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
 695 {
 696         SMB_OFF_T offset;
 697         SMB_OFF_T count;
 698         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 699
 700         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
 701                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
 702
 703         /*
 704          * If the requested lock won't fit in the POSIX range, we will
 705          * never set it, so presume it is not locked.
 706          */
 707
 708         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
 709                 return False;
 710
 711         /*
 712          * Note that most UNIX's can *test* for a write lock on
 713          * a read-only fd, just not *set* a write lock on a read-only
 714          * fd. So we don't need to use map_lock_type here.
 715          */
 716
 717         return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
 718 }
 719
 720 /*
 721  * Structure used when splitting a lock range
 722  * into a POSIX lock range. Doubly linked list.
 723  */
 724
 725 struct lock_list {
 726     struct lock_list *next;
 727     struct lock_list *prev;
 728     SMB_OFF_T start;
 729     SMB_OFF_T size;
 730 };
 731
 732 /****************************************************************************
 733  Create a list of lock ranges that don't overlap a given range. Used in calculating
 734  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
 735  understand it :-).
 736 ****************************************************************************/
 737
 738 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
 739 {
 740         TDB_DATA kbuf = locking_key_fsp(fsp);
 741         TDB_DATA dbuf;
 742         struct posix_lock *locks;
 743         size_t num_locks, i;
 744
 745         dbuf.dptr = NULL;
 746
 747         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 748
 749         if (!dbuf.dptr)
 750                 return lhead;
 751
 752         locks = (struct posix_lock *)dbuf.dptr;
 753         num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
 754
 755         /*
 756          * Check the current lock list on this dev/inode pair.
 757          * Quit if the list is deleted.
 758          */
 759
 760         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
 761                 (double)lhead->start, (double)lhead->size ));
 762
 763         for (i=0; i<num_locks && lhead; i++) {
 764
 765                 struct posix_lock *lock = &locks[i];
 766                 struct lock_list *l_curr;
 767
 768                 /*
 769                  * Walk the lock list, checking for overlaps. Note that
 770                  * the lock list can expand within this loop if the current
 771                  * range being examined needs to be split.
 772                  */
 773
 774                 for (l_curr = lhead; l_curr;) {
 775
 776                         DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
 777                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
 778
 779                         if ( (l_curr->start >= (lock->start + lock->size)) ||
 780                                  (lock->start >= (l_curr->start + l_curr->size))) {
 781
 782                                 /* No overlap with this lock - leave this range alone. */
 783 /*********************************************
 784                                              +---------+
 785                                              | l_curr  |
 786                                              +---------+
 787                                 +-------+
 788                                 | lock  |
 789                                 +-------+
 790 OR....
 791              +---------+
 792              |  l_curr |
 793              +---------+
 794 **********************************************/
 795
 796                                 DEBUG(10,("no overlap case.\n" ));
 797
 798                                 l_curr = l_curr->next;
 799
 800                         } else if ( (l_curr->start >= lock->start) &&
 801                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 802
 803                                 /*
 804                                  * This unlock is completely overlapped by this existing lock range
 805                                  * and thus should have no effect (not be unlocked). Delete it from the list.
 806                                  */
 807 /*********************************************
 808                 +---------+
 809                 |  l_curr |
 810                 +---------+
 811         +---------------------------+
 812         |       lock                |
 813         +---------------------------+
 814 **********************************************/
 815                                 /* Save the next pointer */
 816                                 struct lock_list *ul_next = l_curr->next;
 817
 818                                 DEBUG(10,("delete case.\n" ));
 819
 820                                 DLIST_REMOVE(lhead, l_curr);
 821                                 if(lhead == NULL)
 822                                         break; /* No more list... */
 823
 824                                 l_curr = ul_next;
 825
 826                         } else if ( (l_curr->start >= lock->start) &&
 827                                                 (l_curr->start < lock->start + lock->size) &&
 828                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 829
 830                                 /*
 831                                  * This unlock overlaps the existing lock range at the high end.
 832                                  * Truncate by moving start to existing range end and reducing size.
 833                                  */
 834 /*********************************************
 835                 +---------------+
 836                 |  l_curr       |
 837                 +---------------+
 838         +---------------+
 839         |    lock       |
 840         +---------------+
 841 BECOMES....
 842                         +-------+
 843                         | l_curr|
 844                         +-------+
 845 **********************************************/
 846
 847                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
 848                                 l_curr->start = lock->start + lock->size;
 849
 850                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
 851                                                                 (double)l_curr->start, (double)l_curr->size ));
 852
 853                                 l_curr = l_curr->next;
 854
 855                         } else if ( (l_curr->start < lock->start) &&
 856                                                 (l_curr->start + l_curr->size > lock->start) &&
 857                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 858
 859                                 /*
 860                                  * This unlock overlaps the existing lock range at the low end.
 861                                  * Truncate by reducing size.
 862                                  */
 863 /*********************************************
 864    +---------------+
 865    |  l_curr       |
 866    +---------------+
 867            +---------------+
 868            |    lock       |
 869            +---------------+
 870 BECOMES....
 871    +-------+
 872    | l_curr|
 873    +-------+
 874 **********************************************/
 875
 876                                 l_curr->size = lock->start - l_curr->start;
 877
 878                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
 879                                                                 (double)l_curr->start, (double)l_curr->size ));
 880
 881                                 l_curr = l_curr->next;
 882
 883                         } else if ( (l_curr->start < lock->start) &&
 884                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 885                                 /*
 886                                  * Worst case scenario. Unlock request completely overlaps an existing
 887                                  * lock range. Split the request into two, push the new (upper) request
 888                                  * into the dlink list, and continue with the entry after ul_new (as we
 889                                  * know that ul_new will not overlap with this lock).
 890                                  */
 891 /*********************************************
 892         +---------------------------+
 893         |        l_curr             |
 894         +---------------------------+
 895                 +---------+
 896                 | lock    |
 897                 +---------+
 898 BECOMES.....
 899         +-------+         +---------+
 900         | l_curr|         | l_new   |
 901         +-------+         +---------+
 902 **********************************************/
 903                                 struct lock_list *l_new = (struct lock_list *)talloc(ctx,
 904                                                                                                         sizeof(struct lock_list));
 905
 906                                 if(l_new == NULL) {
 907                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
 908                                         return NULL; /* The talloc_destroy takes care of cleanup. */
 909                                 }
 910
 911                                 ZERO_STRUCTP(l_new);
 912                                 l_new->start = lock->start + lock->size;
 913                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
 914
 915                                 /* Truncate the l_curr. */
 916                                 l_curr->size = lock->start - l_curr->start;
 917
 918                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
 919 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
 920                                                                 (double)l_new->start, (double)l_new->size ));
 921
 922                                 /*
 923                                  * Add into the dlink list after the l_curr point - NOT at lhead.
 924                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
 925                                  */
 926
 927                                 l_new->prev = l_curr;
 928                                 l_new->next = l_curr->next;
 929                                 l_curr->next = l_new;
 930
 931                                 /* And move after the link we added. */
 932                                 l_curr = l_new->next;
 933
 934                         } else {
 935
 936                                 /*
 937                                  * This logic case should never happen. Ensure this is the
 938                                  * case by forcing an abort.... Remove in production.
 939                                  */
 940                                 pstring msg;
 941
 942                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
 943 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
 944
 945                                 smb_panic(msg);
 946                         }
 947                 } /* end for ( l_curr = lhead; l_curr;) */
 948         } /* end for (i=0; i<num_locks && ul_head; i++) */
 949
 950         SAFE_FREE(dbuf.dptr);
 951
 952         return lhead;
 953 }
 954
 955 /****************************************************************************
 956  POSIX function to acquire a lock. Returns True if the
 957  lock could be granted, False if not.
 958 ****************************************************************************/
 959
 960 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
 961 {
 962         SMB_OFF_T offset;
 963         SMB_OFF_T count;
 964         BOOL ret = True;
 965         size_t entry_num = 0;
 966         size_t lock_count;
 967         TALLOC_CTX *l_ctx = NULL;
 968         struct lock_list *llist = NULL;
 969         struct lock_list *ll = NULL;
 970         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 971
 972         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
 973                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
 974
 975         /*
 976          * If the requested lock won't fit in the POSIX range, we will
 977          * pretend it was successful.
 978          */
 979
 980         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
 981                 return True;
 982
 983         /*
 984          * Windows is very strange. It allows read locks to be overlayed
 985          * (even over a write lock), but leaves the write lock in force until the first
 986          * unlock. It also reference counts the locks. This means the following sequence :
 987          *
 988          * process1                                      process2
 989          * ------------------------------------------------------------------------
 990          * WRITE LOCK : start = 2, len = 10
 991          *                                            READ LOCK: start =0, len = 10 - FAIL
 992          * READ LOCK : start = 0, len = 14
 993          *                                            READ LOCK: start =0, len = 10 - FAIL
 994          * UNLOCK : start = 2, len = 10
 995          *                                            READ LOCK: start =0, len = 10 - OK
 996          *
 997          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
 998          * would leave a single read lock over the 0-14 region. In order to
 999          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
1000          * entries, one for each overlayed lock request. We are guarenteed by the brlock
1001          * semantics that if a write lock is added, then it will be first in the array.
1002          */
1003
1004         if ((l_ctx = talloc_init()) == NULL) {
1005                 DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1006                 return True; /* Not a fatal error. */
1007         }
1008
1009         if ((ll = (struct lock_list *)talloc(l_ctx, sizeof(struct lock_list))) == NULL) {
1010                 DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1011                 talloc_destroy(l_ctx);
1012                 return True; /* Not a fatal error. */
1013         }
1014
1015         /*
1016          * Create the initial list entry containing the
1017          * lock we want to add.
1018          */
1019
1020         ZERO_STRUCTP(ll);
1021         ll->start = offset;
1022         ll->size = count;
1023
1024         DLIST_ADD(llist, ll);
1025
1026         /*
1027          * The following call calculates if there are any
1028          * overlapping locks held by this process on
1029          * fd's open on the same file and splits this list
1030          * into a list of lock ranges that do not overlap with existing
1031          * POSIX locks.
1032          */
1033
1034         llist = posix_lock_list(l_ctx, llist, fsp);
1035
1036         /*
1037          * Now we have the list of ranges to lock it is safe to add the
1038          * entry into the POSIX lock tdb. We take note of the entry we
1039          * added here in case we have to remove it on POSIX lock fail.
1040          */
1041
1042         if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1043                 DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1044                 talloc_destroy(l_ctx);
1045                 return False;
1046         }
1047
1048         /*
1049          * Add the POSIX locks on the list of ranges returned.
1050          * As the lock is supposed to be added atomically, we need to
1051          * back out all the locks if any one of these calls fail.
1052          */
1053
1054         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1055                 offset = ll->start;
1056                 count = ll->size;
1057
1058                 DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1059                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1060
1061                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1062                         DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1063                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1064                         ret = False;
1065                         break;
1066                 }
1067         }
1068
1069         if (!ret) {
1070
1071                 /*
1072                  * Back out all the POSIX locks we have on fail.
1073                  */
1074
1075                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1076                         offset = ll->start;
1077                         count = ll->size;
1078
1079                         DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1080                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1081
1082                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1083                 }
1084
1085                 /*
1086                  * Remove the tdb entry for this lock.
1087                  */
1088
1089                 delete_posix_lock_entry_by_index(fsp,entry_num);
1090         }
1091
1092         talloc_destroy(l_ctx);
1093         return ret;
1094 }
1095
1096 /****************************************************************************
1097  POSIX function to release a lock. Returns True if the
1098  lock could be released, False if not.
1099 ****************************************************************************/
1100
1101 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1102 {
1103         SMB_OFF_T offset;
1104         SMB_OFF_T count;
1105         BOOL ret = True;
1106         TALLOC_CTX *ul_ctx = NULL;
1107         struct lock_list *ulist = NULL;
1108         struct lock_list *ul = NULL;
1109         struct posix_lock deleted_lock;
1110         int num_overlapped_entries;
1111
1112         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1113                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1114
1115         /*
1116          * If the requested lock won't fit in the POSIX range, we will
1117          * pretend it was successful.
1118          */
1119
1120         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1121                 return True;
1122
1123         /*
1124          * We treat this as one unlock request for POSIX accounting purposes even
1125          * if it may later be split into multiple smaller POSIX unlock ranges.
1126          * num_overlapped_entries is the number of existing locks that have any
1127          * overlap with this unlock request.
1128          */
1129
1130         num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1131
1132         if (num_overlapped_entries == -1) {
1133         smb_panic("release_posix_lock: unable find entry to delete !\n");
1134         }
1135
1136         /*
1137          * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1138          * a POSIX write lock, then before doing the unlock we need to downgrade
1139          * the POSIX lock to a read lock. This allows any overlapping read locks
1140          * to be atomically maintained.
1141          */
1142
1143         if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1144                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1145                         DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
1146                         return False;
1147                 }
1148         }
1149
1150         if ((ul_ctx = talloc_init()) == NULL) {
1151                 DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1152                 return True; /* Not a fatal error. */
1153         }
1154
1155         if ((ul = (struct lock_list *)talloc(ul_ctx, sizeof(struct lock_list))) == NULL) {
1156                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1157                 talloc_destroy(ul_ctx);
1158                 return True; /* Not a fatal error. */
1159         }
1160
1161         /*
1162          * Create the initial list entry containing the
1163          * lock we want to remove.
1164          */
1165
1166         ZERO_STRUCTP(ul);
1167         ul->start = offset;
1168         ul->size = count;
1169
1170         DLIST_ADD(ulist, ul);
1171
1172         /*
1173          * The following call calculates if there are any
1174          * overlapping locks held by this process on
1175          * fd's open on the same file and creates a
1176          * list of unlock ranges that will allow
1177          * POSIX lock ranges to remain on the file whilst the
1178          * unlocks are performed.
1179          */
1180
1181         ulist = posix_lock_list(ul_ctx, ulist, fsp);
1182
1183         /*
1184          * Release the POSIX locks on the list of ranges returned.
1185          */
1186
1187         for(; ulist; ulist = ulist->next) {
1188                 offset = ulist->start;
1189                 count = ulist->size;
1190
1191                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1192                         (double)offset, (double)count ));
1193
1194                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1195                         ret = False;
1196         }
1197
1198         talloc_destroy(ul_ctx);
1199
1200         return ret;
1201 }
1202
1203 /****************************************************************************
1204  Remove all lock entries for a specific dev/inode pair from the tdb.
1205 ****************************************************************************/
1206
1207 static void delete_posix_lock_entries(files_struct *fsp)
1208 {
1209         TDB_DATA kbuf = locking_key_fsp(fsp);
1210
1211         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1212                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1213 }
1214
1215 /****************************************************************************
1216  Debug function.
1217 ****************************************************************************/
1218
1219 static void dump_entry(struct posix_lock *pl)
1220 {
1221         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1222                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1223 }
1224
1225 /****************************************************************************
1226  Remove any locks on this fd. Called from file_close().
1227 ****************************************************************************/
1228
1229 void posix_locking_close_file(files_struct *fsp)
1230 {
1231         struct posix_lock *entries = NULL;
1232         size_t count, i;
1233
1234         /*
1235          * Optimization for the common case where we are the only
1236          * opener of a file. If all fd entries are our own, we don't
1237          * need to explicitly release all the locks via the POSIX functions,
1238          * we can just remove all the entries in the tdb and allow the
1239          * close to remove the real locks.
1240          */
1241
1242         count = get_posix_lock_entries(fsp, &entries);
1243
1244         if (count == 0) {
1245                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1246                 return;
1247         }
1248
1249         for (i = 0; i < count; i++) {
1250                 if (entries[i].fd != fsp->fd )
1251                         break;
1252
1253                 dump_entry(&entries[i]);
1254         }
1255
1256         if (i == count) {
1257                 /* All locks are ours. */
1258                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n",
1259                         fsp->fsp_name, (unsigned int)count ));
1260                 SAFE_FREE(entries);
1261                 delete_posix_lock_entries(fsp);
1262                 return;
1263         }
1264
1265         /*
1266          * Difficult case. We need to delete all our locks, whilst leaving
1267          * all other POSIX locks in place.
1268          */
1269
1270         for (i = 0; i < count; i++) {
1271                 struct posix_lock *pl = &entries[i];
1272                 if (pl->fd == fsp->fd)
1273                         release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1274         }
1275         SAFE_FREE(entries);
1276 }
1277
1278 /*******************************************************************
1279  Create the in-memory POSIX lock databases.
1280 ********************************************************************/
1281
1282 BOOL posix_locking_init(int read_only)
1283 {
1284         if (posix_lock_tdb && posix_pending_close_tdb)
1285                 return True;
1286
1287         if (!posix_lock_tdb)
1288                 posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1289                                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1290         if (!posix_lock_tdb) {
1291                 DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1292                 return False;
1293         }
1294         if (!posix_pending_close_tdb)
1295                 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1296                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1297         if (!posix_pending_close_tdb) {
1298                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
1299                 return False;
1300         }
1301
1302         return True;
1303 }
1304
1305 /*******************************************************************
1306  Delete the in-memory POSIX lock databases.
1307 ********************************************************************/
1308
1309 BOOL posix_locking_end(void)
1310 {
1311     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1312                 return False;
1313     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1314                 return False;
1315         return True;
1316 }