bool remove_oplock(files_struct *fsp);
bool downgrade_oplock(files_struct *fsp);
void reply_to_oplock_break_requests(files_struct *fsp);
-void release_level_2_oplocks_on_change(files_struct *fsp);
+void process_oplock_async_level2_break_message(struct messaging_context *msg_ctx,
+ void *private_data,
+ uint32_t msg_type,
+ struct server_id src,
+ DATA_BLOB *data);
+void contend_level2_oplocks_begin(files_struct *fsp,
+ enum level2_contention_type type);
+void contend_level2_oplocks_end(files_struct *fsp,
+ enum level2_contention_type type);
void share_mode_entry_to_message(char *msg, const struct share_mode_entry *e);
void message_to_share_mode_entry(struct share_mode_entry *e, char *msg);
bool init_oplocks(struct messaging_context *msg_ctx);
void *private_data;
};
+enum level2_contention_type {
+ LEVEL2_CONTEND_ALLOC_SHRINK,
+ LEVEL2_CONTEND_ALLOC_GROW,
+ LEVEL2_CONTEND_SET_FILE_LEN,
+ LEVEL2_CONTEND_FILL_SPARSE,
+ LEVEL2_CONTEND_WRITE,
+ LEVEL2_CONTEND_WINDOWS_BRL,
+ LEVEL2_CONTEND_POSIX_BRL
+};
+
/* if a kernel does support oplocks then a structure of the following
typee is used to describe how to interact with the kernel */
struct kernel_oplocks_ops {
files_struct *fsp, int oplock_type);
void (*release_oplock)(struct kernel_oplocks *ctx,
files_struct *fsp, int oplock_type);
+ void (*contend_level2_oplocks_begin)(files_struct *fsp,
+ enum level2_contention_type type);
+ void (*contend_level2_oplocks_end)(files_struct *fsp,
+ enum level2_contention_type type);
};
#include "smb_macros.h"
{
return NULL;
}
+
+/**
+ * The following two functions need to be called from inside the low-level BRL
+ * code for oplocks correctness in smbd. Since other utility binaries also
+ * link in some of the brl code directly, these dummy functions are necessary
+ * to avoid needing to link in the oplocks code and its dependencies to all of
+ * the utility binaries.
+ */
+void contend_level2_oplocks_begin(files_struct *fsp,
+ enum level2_contention_type type)
+{
+ return;
+}
+
+void contend_level2_oplocks_end(files_struct *fsp,
+ enum level2_contention_type type)
+{
+ return;
+}
unsigned int i;
files_struct *fsp = br_lck->fsp;
struct lock_struct *locks = br_lck->lock_data;
+ NTSTATUS status;
for (i=0; i < br_lck->num_locks; i++) {
/* Do any Windows or POSIX locks conflict ? */
#endif
}
+ if (!IS_PENDING_LOCK(plock->lock_type)) {
+ contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
+ }
+
/* We can get the Windows lock, now see if it needs to
be mapped into a lower level POSIX one, and if so can
we get it ? */
plock->context.smbpid = 0xFFFFFFFF;
if (errno_ret == EACCES || errno_ret == EAGAIN) {
- return NT_STATUS_FILE_LOCK_CONFLICT;
+ status = NT_STATUS_FILE_LOCK_CONFLICT;
+ goto fail;
} else {
- return map_nt_error_from_unix(errno);
+ status = map_nt_error_from_unix(errno);
+ goto fail;
}
}
}
/* no conflicts - add it to the list of locks */
locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
if (!locks) {
- return NT_STATUS_NO_MEMORY;
+ status = NT_STATUS_NO_MEMORY;
+ goto fail;
}
memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
br_lck->modified = True;
return NT_STATUS_OK;
+ fail:
+ if (!IS_PENDING_LOCK(plock->lock_type)) {
+ contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
+ }
+ return status;
}
/****************************************************************************
struct byte_range_lock *br_lck,
struct lock_struct *plock)
{
- unsigned int i, count;
+ unsigned int i, count, posix_count;
struct lock_struct *locks = br_lck->lock_data;
struct lock_struct *tp;
bool lock_was_added = False;
bool signal_pending_read = False;
+ bool break_oplocks = false;
+ NTSTATUS status;
/* No zero-zero locks for POSIX. */
if (plock->start == 0 && plock->size == 0) {
return NT_STATUS_NO_MEMORY;
}
- count = 0;
+ count = posix_count = 0;
for (i=0; i < br_lck->num_locks; i++) {
struct lock_struct *curr_lock = &locks[i];
memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
count++;
} else {
+ unsigned int tmp_count;
+
/* POSIX conflict semantics are different. */
if (brl_conflict_posix(curr_lock, plock)) {
/* Can't block ourselves with POSIX locks. */
}
/* Work out overlaps. */
- count += brlock_posix_split_merge(&tp[count], curr_lock, plock, &lock_was_added);
+ tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock, &lock_was_added);
+ posix_count += tmp_count;
+ count += tmp_count;
}
}
+ /*
+ * Break oplocks while we hold a brl. Since lock() and unlock() calls
+ * are not symetric with POSIX semantics, we cannot guarantee our
+ * contend_level2_oplocks_begin/end calls will be acquired and
+ * released one-for-one as with Windows semantics. Therefore we only
+ * call contend_level2_oplocks_begin if this is the first POSIX brl on
+ * the file.
+ */
+ break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
+ posix_count == 0);
+ if (break_oplocks) {
+ contend_level2_oplocks_begin(br_lck->fsp,
+ LEVEL2_CONTEND_POSIX_BRL);
+ }
+
if (!lock_was_added) {
memcpy(&tp[count], plock, sizeof(struct lock_struct));
count++;
if (errno_ret == EACCES || errno_ret == EAGAIN) {
SAFE_FREE(tp);
- return NT_STATUS_FILE_LOCK_CONFLICT;
+ status = NT_STATUS_FILE_LOCK_CONFLICT;
+ goto fail;
} else {
SAFE_FREE(tp);
- return map_nt_error_from_unix(errno);
+ status = map_nt_error_from_unix(errno);
+ goto fail;
}
}
}
/* Realloc so we don't leak entries per lock call. */
tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
if (!tp) {
- return NT_STATUS_NO_MEMORY;
+ status = NT_STATUS_NO_MEMORY;
+ goto fail;
}
br_lck->num_locks = count;
SAFE_FREE(br_lck->lock_data);
}
return NT_STATUS_OK;
+ fail:
+ if (break_oplocks) {
+ contend_level2_oplocks_end(br_lck->fsp,
+ LEVEL2_CONTEND_POSIX_BRL);
+ }
+ return status;
}
/****************************************************************************
}
}
+ contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
return True;
}
struct byte_range_lock *br_lck,
const struct lock_struct *plock)
{
- unsigned int i, j, count;
+ unsigned int i, j, count, posix_count;
struct lock_struct *tp;
struct lock_struct *locks = br_lck->lock_data;
bool overlap_found = False;
return False;
}
- count = 0;
+ count = posix_count = 0;
for (i = 0; i < br_lck->num_locks; i++) {
struct lock_struct *lock = &locks[i];
struct lock_struct tmp_lock[3];
/* No change in this lock. */
memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
count++;
+ posix_count++;
} else {
SMB_ASSERT(tmp_lock[0].lock_type == UNLOCK_LOCK);
overlap_found = True;
}
}
count++;
+ posix_count++;
continue;
} else {
/* tmp_count == 3 - (we split a lock range in two). */
memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
count++;
+ posix_count++;
memcpy(&tp[count], &tmp_lock[2], sizeof(struct lock_struct));
count++;
+ posix_count++;
overlap_found = True;
/* Optimisation... */
/* We know we're finished here as we can't overlap any
tp = NULL;
}
+ if (posix_count == 0) {
+ contend_level2_oplocks_end(br_lck->fsp,
+ LEVEL2_CONTEND_POSIX_BRL);
+ }
+
br_lck->num_locks = count;
SAFE_FREE(br_lck->lock_data);
locks = tp;
struct lock_struct *locks = br_lck->lock_data;
struct server_id pid = procid_self();
bool unlock_individually = False;
+ bool posix_level2_contention_ended = false;
if(lp_posix_locking(fsp->conn->params)) {
if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
del_this_lock = True;
num_deleted_windows_locks++;
+ contend_level2_oplocks_end(br_lck->fsp,
+ LEVEL2_CONTEND_WINDOWS_BRL);
} else if (lock->lock_flav == POSIX_LOCK) {
del_this_lock = True;
+
+ /* Only end level2 contention once for posix */
+ if (!posix_level2_contention_ended) {
+ posix_level2_contention_ended = true;
+ contend_level2_oplocks_end(br_lck->fsp,
+ LEVEL2_CONTEND_POSIX_BRL);
+ }
}
}
aio_ex->req = talloc_move(aio_ex, &req);
- release_level_2_oplocks_on_change(fsp);
+ /* This should actually be improved to span the write. */
+ contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WRITE);
+ contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WRITE);
if (!write_through && !lp_syncalways(SNUM(fsp->conn))
&& fsp->aio_write_behind) {
* the shared memory area whilst doing this.
*/
- release_level_2_oplocks_on_change(fsp);
+ /* This should actually be improved to span the write. */
+ contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WRITE);
+ contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WRITE);
#ifdef WITH_PROFILE
if (profile_p && profile_p->writecache_total_writes % 500 == 0) {
the client for LEVEL2.
*******************************************************************/
-static void process_oplock_async_level2_break_message(struct messaging_context *msg_ctx,
+void process_oplock_async_level2_break_message(struct messaging_context *msg_ctx,
void *private_data,
uint32_t msg_type,
struct server_id src,
none.
****************************************************************************/
-void release_level_2_oplocks_on_change(files_struct *fsp)
+static void contend_level2_oplocks_begin_default(files_struct *fsp,
+ enum level2_contention_type type)
{
int i;
struct share_mode_lock *lck;
TALLOC_FREE(lck);
}
+void contend_level2_oplocks_begin(files_struct *fsp,
+ enum level2_contention_type type)
+{
+ if (koplocks && koplocks->ops->contend_level2_oplocks_begin) {
+ koplocks->ops->contend_level2_oplocks_begin(fsp, type);
+ return;
+ }
+
+ contend_level2_oplocks_begin_default(fsp, type);
+}
+
+void contend_level2_oplocks_end(files_struct *fsp,
+ enum level2_contention_type type)
+{
+ /* Only kernel oplocks implement this so far */
+ if (koplocks && koplocks->ops->contend_level2_oplocks_end) {
+ koplocks->ops->contend_level2_oplocks_end(fsp, type);
+ }
+}
+
/****************************************************************************
Linearize a share mode entry struct to an internal oplock break message.
****************************************************************************/
****************************************************************************/
static const struct kernel_oplocks_ops irix_koplocks = {
- .set_oplock = irix_set_kernel_oplock,
- .release_oplock = irix_release_kernel_oplock,
+ .set_oplock = irix_set_kernel_oplock,
+ .release_oplock = irix_release_kernel_oplock,
+ .contend_level2_oplocks_begin = NULL,
+ .contend_level2_oplocks_end = NULL,
};
struct kernel_oplocks *irix_init_kernel_oplocks(TALLOC_CTX *mem_ctx)
****************************************************************************/
static const struct kernel_oplocks_ops linux_koplocks = {
- .set_oplock = linux_set_kernel_oplock,
- .release_oplock = linux_release_kernel_oplock,
+ .set_oplock = linux_set_kernel_oplock,
+ .release_oplock = linux_release_kernel_oplock,
+ .contend_level2_oplocks_begin = NULL,
+ .contend_level2_oplocks_end = NULL,
};
struct kernel_oplocks *linux_init_kernel_oplocks(TALLOC_CTX *mem_ctx)
return;
}
- release_level_2_oplocks_on_change(fsp);
-
numtoread = SVAL(req->vwv+1, 0);
startpos = IVAL_TO_SMB_OFF_T(req->vwv+2, 0);
return;
}
- release_level_2_oplocks_on_change(fsp);
-
count = (uint64_t)IVAL(req->vwv+1, 0);
offset = (uint64_t)IVAL(req->vwv+3, 0);
}
}
- /*
- * We do this check *after* we have checked this is not a oplock break
- * response message. JRA.
- */
-
- release_level_2_oplocks_on_change(fsp);
-
if (req->buflen <
(num_ulocks + num_locks) * (large_file_format ? 20 : 10)) {
reply_nterror(req, NT_STATUS_INVALID_PARAMETER);
uint64_t space_avail;
uint64_t bsize,dfree,dsize;
- release_level_2_oplocks_on_change(fsp);
-
/*
* Actually try and commit the space on disk....
*/
DEBUG(10,("vfs_allocate_file_space: file %s, shrink. Current size %.0f\n",
fsp->fsp_name, (double)st.st_size ));
+ contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_ALLOC_SHRINK);
+
flush_write_cache(fsp, SIZECHANGE_FLUSH);
if ((ret = SMB_VFS_FTRUNCATE(fsp, (SMB_OFF_T)len)) != -1) {
set_filelen_write_cache(fsp, len);
}
+
+ contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_ALLOC_SHRINK);
+
return ret;
}
/* Grow - we need to test if we have enough space. */
+ contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_ALLOC_GROW);
+ contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_ALLOC_GROW);
+
if (!lp_strict_allocate(SNUM(fsp->conn)))
return 0;
{
int ret;
- release_level_2_oplocks_on_change(fsp);
+ contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_SET_FILE_LEN);
+
DEBUG(10,("vfs_set_filelen: ftruncate %s to len %.0f\n", fsp->fsp_name, (double)len));
flush_write_cache(fsp, SIZECHANGE_FLUSH);
if ((ret = SMB_VFS_FTRUNCATE(fsp, len)) != -1) {
fsp->fsp_name);
}
+ contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_SET_FILE_LEN);
+
return ret;
}
size_t num_to_write;
ssize_t pwrite_ret;
- release_level_2_oplocks_on_change(fsp);
ret = SMB_VFS_FSTAT(fsp, &st);
if (ret == -1) {
return ret;
DEBUG(10,("vfs_fill_sparse: write zeros in file %s from len %.0f to len %.0f (%.0f bytes)\n",
fsp->fsp_name, (double)st.st_size, (double)len, (double)(len - st.st_size)));
+ contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_FILL_SPARSE);
+
flush_write_cache(fsp, SIZECHANGE_FLUSH);
if (!sparse_buf) {
sparse_buf = SMB_CALLOC_ARRAY(char, SPARSE_BUF_WRITE_SIZE);
if (!sparse_buf) {
errno = ENOMEM;
- return -1;
+ ret = -1;
+ goto out;
}
}
if (pwrite_ret == -1) {
DEBUG(10,("vfs_fill_sparse: SMB_VFS_PWRITE for file %s failed with error %s\n",
fsp->fsp_name, strerror(errno) ));
- return -1;
+ ret = -1;
+ goto out;
}
if (pwrite_ret == 0) {
- return 0;
+ ret = 0;
+ goto out;
}
total += pwrite_ret;
}
set_filelen_write_cache(fsp, len);
- return 0;
+
+ ret = 0;
+ out:
+ contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_FILL_SPARSE);
+ return ret;
}
/****************************************************************************