s3 OneFS: Add kernel oplocks implementation
authorTim Prouty <tprouty@samba.org>
Wed, 28 Jan 2009 00:13:35 +0000 (16:13 -0800)
committerTim Prouty <tprouty@samba.org>
Tue, 10 Feb 2009 07:47:45 +0000 (23:47 -0800)
A few functions in oplocks_onefs.c need to be accessed from the onefs
vfs module.  It would be ideal if oplocks were implemented at the vfs
layer, but since they aren't yet, a new header is added to
source3/include to make these functions available to the onefs vfs
module.  oplocks_onefs.o doesn't need to be linked into the onefs vfs
module explicitly, since it is already linked into smbd by default.

source3/Makefile.in
source3/configure.in
source3/include/oplock_onefs.h [new file with mode: 0644]
source3/include/proto.h
source3/modules/onefs.h
source3/modules/onefs_open.c
source3/modules/onefs_system.c
source3/smbd/oplock.c
source3/smbd/oplock_onefs.c [new file with mode: 0644]

index 942c5b33ae07d24bcb37daa772de79f9b994c55b..204995328487af8a9f4579a9872e9e26e47556d3 100644 (file)
@@ -620,7 +620,7 @@ PROFILES_OBJ = utils/profiles.o \
                $(LIB_OBJ) $(LIB_DUMMY_OBJ) \
                $(POPT_LIB_OBJ)
 
-OPLOCK_OBJ = smbd/oplock.o smbd/oplock_irix.o smbd/oplock_linux.o
+OPLOCK_OBJ = smbd/oplock.o smbd/oplock_irix.o smbd/oplock_linux.o smbd/oplock_onefs.o
 
 NOTIFY_OBJ = smbd/notify.o smbd/notify_inotify.o smbd/notify_internal.o
 
index b81e768073e6f313ed9b6d799699f8844fc2cb7e..10ce6f6e5e126d430232a9a68dbebf39d283ab04 100644 (file)
@@ -1099,6 +1099,8 @@ if test x"$samba_cv_HAVE_ONEFS" = x"yes"; then
     AC_DEFINE(HAVE_ONEFS,1,[Whether building on Isilon OneFS])
     default_shared_modules="$default_shared_modules vfs_onefs perfcount_onefs"
     ONEFS_LIBS="-lisi_acl"
+    # Need to also add general libs for oplocks support
+    save_LIBS="$save_LIBS -lisi_ecs -lisi_event -lisi_util -ldevstat"
 fi
 AC_SUBST(ONEFS_LIBS)
 LIBS="$save_LIBS"
diff --git a/source3/include/oplock_onefs.h b/source3/include/oplock_onefs.h
new file mode 100644 (file)
index 0000000..a20becd
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Unix SMB/CIFS implementation.
+ * Support for OneFS kernel oplocks
+ *
+ * Copyright (C) Tim Prouty, 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _OPLOCK_ONEFS_H
+#define _OPLOCK_ONEFS_H
+
+#if HAVE_ONEFS
+
+#include <sys/isi_oplock.h>
+
+struct deferred_open_record {
+       bool delayed_for_oplocks;
+       bool failed; /* added for onefs_oplocks */
+       struct file_id id;
+};
+
+/*
+ * OneFS oplock utility functions
+ */
+const char *onefs_oplock_str(enum oplock_type onefs_oplock_type);
+int onefs_oplock_to_samba_oplock(enum oplock_type onefs_oplock);
+enum oplock_type onefs_samba_oplock_to_oplock(int samba_oplock_type);
+
+/*
+ * OneFS oplock callback tracking
+ */
+void destroy_onefs_callback_record(uint64 id);
+uint64 onefs_oplock_wait_record(uint16 mid);
+void onefs_set_oplock_callback(uint64 id, files_struct *fsp);
+
+#endif /* HAVE_ONEFS */
+
+#endif /* _OPLOCK_ONEFS_H */
index f553f7625ece77a7d9cebbeaa05e43373989648e..1566a01dc5cd6ae424bfe47edc737ccad076e6bd 100644 (file)
@@ -6924,6 +6924,10 @@ int linux_set_lease_sighandler(int fd);
 int linux_setlease(int fd, int leasetype);
 struct kernel_oplocks *linux_init_kernel_oplocks(TALLOC_CTX *mem_ctx) ;
 
+/* The following definitions come from smbd/oplock_onefs.c  */
+
+struct kernel_oplocks *onefs_init_kernel_oplocks(TALLOC_CTX *mem_ctx);
+
 /* The following definitions come from smbd/password.c  */
 
 user_struct *get_valid_user_struct(uint16 vuid);
index 2044ebec48c4a965a81a7e747887062a3a2da113..c8f19f4b31bafcb8683db715d9a29f2d006aae77 100644 (file)
@@ -22,7 +22,7 @@
 #define _ONEFS_H
 
 #include "includes.h"
-
+#include "oplock_onefs.h"
 #include <sys/isi_acl.h>
 
 /* OneFS Module smb.conf parameters and defaults */
index c8415de521f4711e03dcb828fe1815c632d1a12c..b2b11ebaaca799fa2c5f31806a83c6f7a6e87c79 100644 (file)
  */
 
 #include "onefs.h"
+#include "smbd/globals.h"
 
 extern const struct generic_mapping file_generic_mapping;
-extern bool global_client_failed_oplock_break;
 
-struct deferred_open_record {
-       bool delayed_for_oplocks;
-       bool failed; /* added for onefs_oplocks */
-       struct file_id id;
+struct onefs_fsp_data {
+       uint64_t oplock_callback_id;
 };
 
 static NTSTATUS onefs_create_file_unixpath(connection_struct *conn,
@@ -55,9 +53,9 @@ static NTSTATUS onefs_create_file_unixpath(connection_struct *conn,
                              uint64_t allocation_size,
                              struct security_descriptor *sd,
                              struct ea_list *ea_list,
-
                              files_struct **result,
                              int *pinfo,
+                             struct onefs_fsp_data *fsp_data,
                              SMB_STRUCT_STAT *psbuf);
 
 /****************************************************************************
@@ -189,11 +187,6 @@ static NTSTATUS onefs_open_file(files_struct *fsp,
                flags |= O_NOFOLLOW;
        }
 #endif
-       /* Don't request an oplock if oplocks are turned off for the
-        * share. */
-       if (!lp_oplocks(SNUM(conn)))
-               oplock_request = 0;
-
        /* Stream handling */
        if (is_ntfs_stream_name(path)) {
                status = onefs_split_ntfs_stream_name(talloc_tos(), path,
@@ -203,6 +196,22 @@ static NTSTATUS onefs_open_file(files_struct *fsp,
        if (stream != NULL) {
                SMB_ASSERT(fsp->base_fsp);
 
+               /*
+                * We have never seen an oplock taken on a stream, and our
+                * current implementation doesn't support it.  If a request is
+                * seen, log a loud error message and ignore the requested
+                * oplock.
+                */
+               if ((oplock_request & ~SAMBA_PRIVATE_OPLOCK_MASK) !=
+                    NO_OPLOCK) {
+                       DEBUG(0,("Oplock(%d) being requested on a stream! "
+                               "Ignoring oplock request: base=%s, stream=%s",
+                               oplock_request & ~SAMBA_PRIVATE_OPLOCK_MASK,
+                               base, stream));
+                       /* Recover by requesting NO_OPLOCK instead. */
+                       oplock_request &= SAMBA_PRIVATE_OPLOCK_MASK;
+               }
+
                DEBUG(10,("Opening a stream: base=%s(%d), stream=%s",
                          base, fsp->base_fsp->fh->fd, stream));
 
@@ -242,8 +251,8 @@ static NTSTATUS onefs_open_file(files_struct *fsp,
 
                status = map_nt_error_from_unix(errno);
                DEBUG(3,("Error opening file %s (%s) (local_flags=%d) "
-                         "(flags=%d)\n",
-                         path,nt_errstr(status),local_flags,flags));
+                       "(flags=%d)\n",
+                       path, strerror(errno), local_flags, flags));
                return status;
        }
 
@@ -407,7 +416,11 @@ static void schedule_defer_open(struct share_mode_lock *lck,
         * measure here in case the other smbd is stuck
         * somewhere else. */
 
-       timeout = timeval_set(OPLOCK_BREAK_TIMEOUT*2, 0);
+       /*
+        * On OneFS, the kernel will always send an oplock_revoked message
+        * before this timeout is hit.
+        */
+       timeout = timeval_set(OPLOCK_BREAK_TIMEOUT*10, 0);
 
        /* Nothing actually uses state.delayed_for_oplocks
           but it's handy to differentiate in debug messages
@@ -415,7 +428,7 @@ static void schedule_defer_open(struct share_mode_lock *lck,
           a 1 second delay for share mode conflicts. */
 
        state.delayed_for_oplocks = True;
-       state.failed = False;
+       state.failed = false;
        state.id = lck->id;
 
        if (!request_timed_out(request_time, timeout)) {
@@ -438,6 +451,7 @@ NTSTATUS onefs_open_file_ntcreate(connection_struct *conn,
                                  struct security_descriptor *sd,
                                  files_struct *fsp,
                                  int *pinfo,
+                                 struct onefs_fsp_data *fsp_data,
                                  SMB_STRUCT_STAT *psbuf)
 {
        int flags=0;
@@ -461,7 +475,7 @@ NTSTATUS onefs_open_file_ntcreate(connection_struct *conn,
        char *parent_dir;
        const char *newname;
        int granted_oplock;
-       uint64 oplock_waiter;
+       uint64_t oplock_callback_id = 0;
        uint32 createfile_attributes = 0;
 
        ZERO_STRUCT(id);
@@ -505,6 +519,30 @@ NTSTATUS onefs_open_file_ntcreate(connection_struct *conn,
                  create_disposition, create_options, unx_mode,
                  oplock_request));
 
+       /*
+        * Any non-stat-only open has the potential to contend oplocks, which
+        * means to avoid blocking in the kernel (which is unacceptable), the
+        * open must be deferred.  In order to defer opens, req must not be
+        * NULL.  The known cases of calling with a NULL req:
+        *
+        *   1. Open the base file of a stream: Always done stat-only
+        *
+        *   2. Open the stream: Oplocks are disallowed on streams, so an
+        *      oplock will never be contended.
+        *
+        *   3. open_file_fchmod(), which is called from 3 places:
+        *      A. try_chown: Posix acls only. Never called on onefs.
+        *      B. set_ea_dos_attributes: Can't be called from onefs, because
+        *         SMB_VFS_SETXATTR return ENOSYS.
+        *      C. file_set_dos_mode: This would only happen if the "dos
+        *         filemode" smb.conf parameter is set to yes.  We ship with
+        *         it off, but if a customer were to turn it on it would be
+        *         bad.
+        */
+       if (req == NULL && !is_stat_open(access_mask) && !is_ntfs_stream_name(fname)) {
+               smb_panic("NULL req on a non-stat-open!");
+       }
+
        if ((req == NULL) && ((oplock_request & INTERNAL_OPEN_ONLY) == 0)) {
                DEBUG(0, ("No smb request but not an internal only open!\n"));
                return NT_STATUS_INTERNAL_ERROR;
@@ -839,10 +877,22 @@ NTSTATUS onefs_open_file_ntcreate(connection_struct *conn,
                 (unsigned int)unx_mode, (unsigned int)access_mask,
                 (unsigned int)open_access_mask));
 
-       oplock_waiter = 1; //ifs_oplock_wait_record(mid);
-
-       if (oplock_waiter == 0) {
-               return NT_STATUS_NO_MEMORY;
+       /*
+        * Since the open is guaranteed to be stat only if req == NULL, a
+        * callback record is only needed if req != NULL.
+        */
+       if (req) {
+               SMB_ASSERT(fsp_data);
+               oplock_callback_id = onefs_oplock_wait_record(req->mid);
+               if (oplock_callback_id == 0) {
+                       return NT_STATUS_NO_MEMORY;
+               }
+       } else {
+               /*
+                * It is also already asserted it's either a stream or a
+                * stat-only open at this point.
+                */
+               SMB_ASSERT(fsp->oplock_type == NO_OPLOCK);
        }
 
        /* Do the open. */
@@ -858,7 +908,7 @@ NTSTATUS onefs_open_file_ntcreate(connection_struct *conn,
                                 access_mask,
                                 open_access_mask,
                                 fsp->oplock_type,
-                                oplock_waiter,
+                                oplock_callback_id,
                                 share_access,
                                 create_options,
                                 createfile_attributes,
@@ -910,6 +960,9 @@ NTSTATUS onefs_open_file_ntcreate(connection_struct *conn,
                                goto cleanup_destroy;
                        }
                        /* Waiting for an oplock */
+                       DEBUG(5,("Async createfile because a client has an "
+                                "oplock on %s\n", fname));
+
                        SMB_ASSERT(req);
                        schedule_defer_open(lck, request_time, req);
                        goto cleanup;
@@ -1044,7 +1097,9 @@ NTSTATUS onefs_open_file_ntcreate(connection_struct *conn,
                 * Normal error, for example EACCES
                 */
         cleanup_destroy:
-               //destroy_ifs_callback_record(oplock_waiter);
+               if (oplock_callback_id != 0) {
+                       destroy_onefs_callback_record(oplock_callback_id);
+               }
         cleanup:
                TALLOC_FREE(lck);
                return status;
@@ -1052,9 +1107,12 @@ NTSTATUS onefs_open_file_ntcreate(connection_struct *conn,
 
        fsp->oplock_type = granted_oplock;
 
-       /* XXX uncomment for oplocks */
-       //ifs_set_oplock_callback(oplock_waiter, fsp);
-       //fsp->oplock_callback_id = oplock_waiter;
+       if (oplock_callback_id != 0) {
+               onefs_set_oplock_callback(oplock_callback_id, fsp);
+               fsp_data->oplock_callback_id = oplock_callback_id;
+       } else {
+               SMB_ASSERT(fsp->oplock_type == NO_OPLOCK);
+       }
 
        if (!file_existed) {
                struct timespec old_write_time = get_mtimespec(psbuf);
@@ -1195,6 +1253,16 @@ NTSTATUS onefs_open_file_ntcreate(connection_struct *conn,
                }
        }
 
+       if (fsp->oplock_type == LEVEL_II_OPLOCK &&
+           (!lp_level2_oplocks(SNUM(conn)) ||
+               !(global_client_caps & CAP_LEVEL_II_OPLOCKS))) {
+
+               DEBUG(5, ("Downgrading level2 oplock on open "
+                         "because level2 oplocks = off\n"));
+
+               release_file_oplock(fsp);
+       }
+
        if (info == FILE_WAS_OVERWRITTEN || info == FILE_WAS_CREATED ||
            info == FILE_WAS_SUPERSEDED) {
                new_file_created = True;
@@ -1654,6 +1722,7 @@ static NTSTATUS open_streams_for_delete(connection_struct *conn,
                         NULL,                  /* ea_list */
                         &streams[i],           /* result */
                         NULL,                  /* pinfo */
+                        NULL,                  /* fsp_data */
                         NULL);                 /* psbuf */
 
                TALLOC_FREE(streamname);
@@ -1701,6 +1770,7 @@ static NTSTATUS onefs_create_file_unixpath(connection_struct *conn,
                                           struct ea_list *ea_list,
                                           files_struct **result,
                                           int *pinfo,
+                                          struct onefs_fsp_data *fsp_data,
                                           SMB_STRUCT_STAT *psbuf)
 {
        SMB_STRUCT_STAT sbuf;
@@ -1733,6 +1803,8 @@ static NTSTATUS onefs_create_file_unixpath(connection_struct *conn,
        }
 
        if (req == NULL) {
+               SMB_ASSERT((oplock_request & ~SAMBA_PRIVATE_OPLOCK_MASK) ==
+                           NO_OPLOCK);
                oplock_request |= INTERNAL_OPEN_ONLY;
        }
 
@@ -1793,7 +1865,7 @@ static NTSTATUS onefs_create_file_unixpath(connection_struct *conn,
                        conn,                           /* conn */
                        NULL,                           /* req */
                        base,                           /* fname */
-                       0,                              /* access_mask */
+                       SYNCHRONIZE_ACCESS,             /* access_mask */
                        (FILE_SHARE_READ |
                            FILE_SHARE_WRITE |
                            FILE_SHARE_DELETE),         /* share_access */
@@ -1806,6 +1878,7 @@ static NTSTATUS onefs_create_file_unixpath(connection_struct *conn,
                        NULL,                           /* ea_list */
                        &base_fsp,                      /* result */
                        NULL,                           /* pinfo */
+                       NULL,                           /* fsp_data */
                        NULL);                          /* psbuf */
 
                if (!NT_STATUS_IS_OK(status)) {
@@ -1890,6 +1963,7 @@ static NTSTATUS onefs_create_file_unixpath(connection_struct *conn,
                        sd,                             /* sd */
                        fsp,                            /* result */
                        &info,                          /* pinfo */
+                       fsp_data,                       /* fsp_data */
                        &sbuf);                         /* psbuf */
 
                if(!NT_STATUS_IS_OK(status)) {
@@ -2013,6 +2087,13 @@ static NTSTATUS onefs_create_file_unixpath(connection_struct *conn,
        return status;
 }
 
+static void destroy_onefs_fsp_data(void *p_data)
+{
+       struct onefs_fsp_data *fsp_data = (struct onefs_fsp_data *)p_data;
+
+       destroy_onefs_callback_record(fsp_data->oplock_callback_id);
+}
+
 /**
  * SMB_VFS_CREATE_FILE interface to onefs.
  */
@@ -2036,6 +2117,7 @@ NTSTATUS onefs_create_file(vfs_handle_struct *handle,
 {
        connection_struct *conn = handle->conn;
        struct case_semantics_state *case_state = NULL;
+       struct onefs_fsp_data fsp_data = {};
        SMB_STRUCT_STAT sbuf;
        int info = FILE_WAS_OPENED;
        files_struct *fsp = NULL;
@@ -2139,6 +2221,7 @@ NTSTATUS onefs_create_file(vfs_handle_struct *handle,
                ea_list,                                /* ea_list */
                &fsp,                                   /* result */
                &info,                                  /* pinfo */
+               &fsp_data,                              /* fsp_data */
                &sbuf);                                 /* psbuf */
 
        if (!NT_STATUS_IS_OK(status)) {
@@ -2147,6 +2230,26 @@ NTSTATUS onefs_create_file(vfs_handle_struct *handle,
 
        DEBUG(10, ("onefs_create_file: info=%d\n", info));
 
+       /*
+        * Setup private onefs_fsp_data.  Currently the private data struct is
+        * only used to store the oplock_callback_id so that when the file is
+        * closed, the onefs_callback_record can be properly cleaned up in the
+        * oplock_onefs sub-system.
+        */
+       if (fsp) {
+               struct onefs_fsp_data *fsp_data_tmp = NULL;
+               fsp_data_tmp = (struct onefs_fsp_data *)
+                   VFS_ADD_FSP_EXTENSION(handle, fsp, struct onefs_fsp_data,
+                       &destroy_onefs_fsp_data);
+
+               if (fsp_data_tmp == NULL) {
+                       status = NT_STATUS_NO_MEMORY;
+                       goto fail;
+               }
+
+               *fsp_data_tmp = fsp_data;
+       }
+
        *result = fsp;
        if (pinfo != NULL) {
                *pinfo = info;
index 4ebdf12a50342ac96ae968bd5baf65ce948aee61..acc38fba30710340357882fdc689140a79bb1f33 100644 (file)
@@ -68,58 +68,6 @@ static void smlock_dump(int debuglevel, const struct sm_lock *sml)
               (int)sml->sm_timeout.tv_usec));
 }
 
-/*
- * Return string value of onefs oplock types.
- */
-static const char *onefs_oplock_str(enum oplock_type onefs_oplock_type)
-{
-       switch (onefs_oplock_type) {
-       case OPLOCK_NONE:
-               return "OPLOCK_NONE";
-       case OPLOCK_EXCLUSIVE:
-               return "OPLOCK_EXCLUSIVE";
-       case OPLOCK_BATCH:
-               return "OPLOCK_BATCH";
-       case OPLOCK_SHARED:
-               return "OPLOCK_SHARED";
-       default:
-               break;
-       }
-       return "UNKNOWN";
-}
-
-/*
- * Convert from onefs to samba oplock.
- */
-static int onefs_oplock_to_samba_oplock(enum oplock_type onefs_oplock)
-{
-       switch (onefs_oplock) {
-       case OPLOCK_NONE:
-               return NO_OPLOCK;
-       case OPLOCK_EXCLUSIVE:
-               return EXCLUSIVE_OPLOCK;
-       case OPLOCK_BATCH:
-               return BATCH_OPLOCK;
-       case OPLOCK_SHARED:
-               return LEVEL_II_OPLOCK;
-       default:
-               DEBUG(0, ("unknown oplock type %d found\n", onefs_oplock));
-               break;
-       }
-       return NO_OPLOCK;
-}
-
-/*
- * Convert from samba to onefs oplock.
- */
-static enum oplock_type onefs_samba_oplock_to_oplock(int samba_oplock_type)
-{
-       if (BATCH_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_BATCH;
-       if (EXCLUSIVE_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_EXCLUSIVE;
-       if (LEVEL_II_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_SHARED;
-       return OPLOCK_NONE;
-}
-
 /**
  * External interface to ifs_createfile
  */
@@ -164,10 +112,12 @@ int onefs_sys_create_file(connection_struct *conn,
                pifs_sd = &ifs_sd;
        }
 
+       /* Stripping off private bits will be done for us. */
        onefs_oplock = onefs_samba_oplock_to_oplock(oplock_request);
 
-       /* Temporary until oplock work is added to vfs_onefs */
-       onefs_oplock = OPLOCK_NONE;
+       if (!lp_oplocks(SNUM(conn))) {
+               SMB_ASSERT(onefs_oplock == OPLOCK_NONE);
+       }
 
        /* Convert samba dos flags to UF_DOS_* attributes. */
        onefs_dos_attributes = dos_attributes_to_stat_dos_flags(dos_flags);
index a6ec9cfa2dfc514407c1fc016a7a5238ee67be34..b39e5bf6349d852bca47a837ccf9365c81c2878d 100644 (file)
@@ -122,6 +122,11 @@ void release_file_oplock(files_struct *fsp)
 
 static void downgrade_file_oplock(files_struct *fsp)
 {
+       if (!EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) {
+               DEBUG(0, ("trying to downgrade an already-downgraded oplock!\n"));
+               return;
+       }
+
        if (koplocks) {
                koplocks->ops->release_oplock(koplocks, fsp, LEVEL_II_OPLOCK);
        }
@@ -916,6 +921,8 @@ bool init_oplocks(struct messaging_context *msg_ctx)
                koplocks = irix_init_kernel_oplocks(talloc_autofree_context());
 #elif HAVE_KERNEL_OPLOCKS_LINUX
                koplocks = linux_init_kernel_oplocks(talloc_autofree_context());
+#elif HAVE_ONEFS
+               koplocks = onefs_init_kernel_oplocks(talloc_autofree_context());
 #endif
        }
 
diff --git a/source3/smbd/oplock_onefs.c b/source3/smbd/oplock_onefs.c
new file mode 100644 (file)
index 0000000..0908ce4
--- /dev/null
@@ -0,0 +1,798 @@
+/*
+ * Unix SMB/CIFS implementation.
+ * Support for OneFS kernel oplocks
+ *
+ * Copyright (C) Volker Lendecke 2007
+ * Copyright (C) Tim Prouty, 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define DBGC_CLASS DBGC_LOCKING
+
+#include "includes.h"
+
+#if HAVE_ONEFS
+#include "oplock_onefs.h"
+#include "smbd/globals.h"
+
+#include <ifs/ifs_syscalls.h>
+#include <isi_ecs/isi_ecs_oplocks.h>
+#include <sys/proc.h>
+
+struct onefs_oplocks_context {
+       struct kernel_oplocks *ctx;
+       const struct oplocks_event_ops *onefs_ops;
+       int onefs_event_fd;
+       struct fd_event *read_fde;
+};
+
+enum onefs_callback_state {
+       ONEFS_OPEN_FILE,
+       ONEFS_WAITING_FOR_OPLOCK
+};
+
+struct onefs_callback_record {
+       struct onefs_callback_record *prev, *next;
+       uint64_t id;
+       enum onefs_callback_state state;
+       union {
+               files_struct *fsp;      /* ONEFS_OPEN_FILE */
+               uint16_t mid;           /* ONEFS_WAITING_FOR_OPLOCK */
+       } data;
+};
+
+/**
+ * Internal list of files (along with additional state) that have outstanding
+ * oplocks or requests for oplocks.
+ */
+struct onefs_callback_record *callback_recs;
+
+/**
+ * Convert a onefs_callback_record to a string.
+ */
+static char *onefs_callback_record_str_static(const struct onefs_callback_record *r)
+{
+       static fstring result;
+
+       if (r == NULL) {
+               fstrcpy(result, "NULL callback record");
+               return result;
+       }
+
+       switch (r->state) {
+       case ONEFS_OPEN_FILE:
+               fstr_sprintf(result, "cb record %llu for file %s",
+                            r->id, r->data.fsp->fsp_name);
+               break;
+       case ONEFS_WAITING_FOR_OPLOCK:
+               fstr_sprintf(result, "cb record %llu for pending mid %d",
+                            r->id, (int)r->data.mid);
+               break;
+       default:
+               fstr_sprintf(result, "cb record %llu unknown state %d",
+                            r->id, r->state);
+               break;
+       }
+
+       return result;
+}
+
+/**
+ * Traverse the list of onefs_callback_records and print all entries.
+ */
+static void debug_cb_records(const char *fn)
+{
+       struct onefs_callback_record *rec;
+
+       if (DEBUGLEVEL < 10)
+               return;
+
+       DEBUG(10, ("cb records (%s):\n", fn));
+
+       for (rec = callback_recs; rec; rec = rec->next) {
+               DEBUGADD(10, ("%s\n", onefs_callback_record_str_static(rec)));
+       }
+}
+
+/**
+ * Find a callback record in the list of outstanding oplock operations.
+ *
+ * Once n ifs_createfile requests an oplock on a file, the kernel communicates
+ * with samba via the oplock event channel by sending events that reference an
+ * id.  This function maps that id to the onefs_callback_record that was
+ * created for it during the initial setup on open (onefs_oplock_wait_record).
+ * When a matching id is found in the onefs_callback_record list, the
+ * callback_type is checked to make sure the record is in in the correct
+ * state.
+ */
+static struct onefs_callback_record *onefs_find_cb(uint64_t id,
+    enum onefs_callback_state expected_state)
+{
+       struct onefs_callback_record *rec;
+
+       debug_cb_records("onefs_find_cb");
+
+       for (rec = callback_recs; rec; rec = rec->next) {
+               if (rec->id == id) {
+                       DEBUG(10, ("found %s\n",
+                                  onefs_callback_record_str_static(rec)));
+                       break;
+               }
+       }
+
+       if (rec == NULL) {
+               DEBUG(5, ("Could not find callback record for id %llu\n", id));
+               return NULL;
+       }
+
+       if (rec->state != expected_state) {
+               DEBUG(0, ("Expected cb type %d, got %s", expected_state,
+                         onefs_callback_record_str_static(rec)));
+               SMB_ASSERT(0);
+               return NULL;
+       }
+
+       return rec;
+}
+
+/**
+ * Remove and free a callback record from the callback record list.
+ */
+void destroy_onefs_callback_record(uint64_t id)
+{
+       struct onefs_callback_record *rec;
+
+       debug_cb_records("destroy_onefs_callback_record");
+
+       if (id == 0) {
+               DEBUG(10, ("destroy_onefs_callback_record: Nothing to "
+                          "destroy\n"));
+               return;
+       }
+
+       for (rec = callback_recs; rec; rec = rec->next) {
+               if (rec->id == id) {
+                       DLIST_REMOVE(callback_recs, rec);
+                       SAFE_FREE(rec);
+                       DEBUG(10, ("removed cb rec %llu\n", id));
+                       return;
+               }
+       }
+
+       DEBUG(0, ("Could not find cb rec %llu to delete", id));
+       SMB_ASSERT(0);
+}
+
+/**
+ * Initialize a callback record and add it to the list of outstanding callback
+ * records.
+ *
+ * This is called in the open path before ifs_createfile so an id can be
+ * passed in.  Each callback record can be in one of two states:
+ *
+ *   1. WAITING_FOR_OPLOCK: This is the initial state for all callback
+ *   records.  If ifs_createfile can be completed syncronously without needing
+ *   to break any level I oplocks, the state is transitioned to OPEN_FILE.
+ *   Otherwise ifs_createfile will finish asynchronously and the open is
+ *   deferred.  When the necessary level I opocks have been broken, and the
+ *   open can be done, an event is sent by the kernel on the oplock event
+ *   channel, which is handled by semlock_available_handler.  At this point
+ *   the deferred open is retried.  Unless a level I oplock was acquired by
+ *   another client, ifs_createfile will now complete synchronously.
+ *
+ *   2. OPEN_FILE: Once ifs_createfile completes, the callback record is
+ *   transitioned to this state via onefs_set_oplock_callback.
+ */
+uint64_t onefs_oplock_wait_record(uint16_t mid)
+{
+       struct onefs_callback_record *result;
+       static uint64_t id_generator = 0;
+
+       if (!(result = SMB_MALLOC_P(struct onefs_callback_record))) {
+               DEBUG(0, ("talloc failed\n"));
+               return 0;
+       }
+
+       memset(result, '\0', sizeof(result));
+
+       id_generator += 1;
+       if (id_generator == 0) {
+               /* Wow, that's a long-running smbd... */
+               id_generator += 1;
+       }
+
+       result->id = id_generator;
+
+       result->state = ONEFS_WAITING_FOR_OPLOCK;
+       result->data.mid = mid;
+       DLIST_ADD(callback_recs, result);
+
+       DEBUG(10, ("New cb rec %llu created\n", result->id));
+
+       return result->id;
+}
+
+/**
+ * Transition the callback record state to OPEN_FILE.
+ *
+ * This is called after the file is opened and an fsp struct has been
+ * allocated.  The mid is dropped in favor of storing the fsp.
+ */
+void onefs_set_oplock_callback(uint64_t id, files_struct *fsp)
+{
+       struct onefs_callback_record *cb;
+       char *msg;
+
+       DEBUG(10, ("onefs_set_oplock_callback called for cb rec %llu\n", id));
+
+       if (!(cb = onefs_find_cb(id, ONEFS_WAITING_FOR_OPLOCK))) {
+               if (asprintf(&msg, "Got invalid callback %lld\n", id) != -1) {
+                       smb_panic(msg);
+               }
+               smb_panic("Got invalid callback id\n");
+       }
+
+       /*
+        * Paranoia check
+        */
+       if (open_was_deferred(cb->data.mid)) {
+               if (asprintf(&msg, "Trying to upgrade callback for deferred "
+                            "open mid=%d\n", cb->data.mid) != -1) {
+                       smb_panic(msg);
+               }
+               smb_panic("Trying to upgrade callback for deferred open "
+                         "mid\n");
+       }
+
+       cb->state = ONEFS_OPEN_FILE;
+       cb->data.fsp = fsp;
+}
+
+/**
+ * Using a callback record, initialize a share mode entry to pass to
+ * share_mode_entry_to_message to send samba IPC messages.
+ */
+static void init_share_mode_entry(struct share_mode_entry *sme,
+                                 struct onefs_callback_record *cb,
+                                 int op_type)
+{
+       ZERO_STRUCT(*sme);
+
+       sme->pid = procid_self();
+       sme->op_type = op_type;
+       sme->id = cb->data.fsp->file_id;
+       sme->share_file_id = cb->data.fsp->fh->gen_id;
+}
+
+/**
+ * Callback when a break-to-none event is received from the kernel.
+ *
+ * On OneFS level 1 oplocks are always broken to level 2 first, therefore an
+ * async level 2 break message is always sent when breaking to none.  The
+ * downside of this is that OneFS currently has no way to express breaking
+ * directly from level 1 to none.
+ */
+static void oplock_break_to_none_handler(uint64_t id)
+{
+       struct onefs_callback_record *cb;
+       struct share_mode_entry sme;
+       char msg[MSG_SMB_SHARE_MODE_ENTRY_SIZE];
+
+       DEBUG(10, ("oplock_break_to_none_handler called for id %llu\n", id));
+
+       if (!(cb = onefs_find_cb(id, ONEFS_OPEN_FILE))) {
+               DEBUG(3, ("oplock_break_to_none_handler: could not find "
+                         "callback id %llu\n", id));
+               return;
+       }
+
+       DEBUG(10, ("oplock_break_to_none_handler called for file %s\n",
+                  cb->data.fsp->fsp_name));
+
+       init_share_mode_entry(&sme, cb, FORCE_OPLOCK_BREAK_TO_NONE);
+       share_mode_entry_to_message(msg, &sme);
+       messaging_send_buf(smbd_messaging_context(),
+                          sme.pid,
+                          MSG_SMB_ASYNC_LEVEL2_BREAK,
+                          (uint8_t *)msg,
+                          MSG_SMB_SHARE_MODE_ENTRY_SIZE);
+
+       /*
+        * We could still receive an OPLOCK_REVOKED message, so keep the
+        * oplock_callback_id around.
+        */
+}
+
+/**
+ * Callback when a break-to-level2 event is received from the kernel.
+ *
+ * Breaks from level 1 to level 2.
+ */
+static void oplock_break_to_level_two_handler(uint64_t id)
+{
+       struct onefs_callback_record *cb;
+       struct share_mode_entry sme;
+       char msg[MSG_SMB_SHARE_MODE_ENTRY_SIZE];
+
+       DEBUG(10, ("oplock_break_to_level_two_handler called for id %llu\n",
+                  id));
+
+       if (!(cb = onefs_find_cb(id, ONEFS_OPEN_FILE))) {
+               DEBUG(3, ("oplock_break_to_level_two_handler: could not find "
+                         "callback id %llu\n", id));
+               return;
+       }
+
+       DEBUG(10, ("oplock_break_to_level_two_handler called for file %s\n",
+                  cb->data.fsp->fsp_name));
+
+       init_share_mode_entry(&sme, cb, LEVEL_II_OPLOCK);
+       share_mode_entry_to_message(msg, &sme);
+       messaging_send_buf(smbd_messaging_context(),
+                         sme.pid,
+                         MSG_SMB_BREAK_REQUEST,
+                         (uint8_t *)msg,
+                         MSG_SMB_SHARE_MODE_ENTRY_SIZE);
+
+       /*
+        * We could still receive an OPLOCK_REVOKED or OPLOCK_BREAK_TO_NONE
+        * message, so keep the oplock_callback_id around.
+        */
+}
+
+/**
+ * Revoke an oplock from an unresponsive client.
+ *
+ * The kernel will send this message when it times out waiting for a level 1
+ * oplock break to be acknowledged by the client.  The oplock is then
+ * immediately removed.
+ */
+static void oplock_revoked_handler(uint64_t id)
+{
+       struct onefs_callback_record *cb;
+       files_struct *fsp = NULL;
+
+       DEBUG(10, ("oplock_revoked_handler called for id %llu\n", id));
+
+       if (!(cb = onefs_find_cb(id, ONEFS_OPEN_FILE))) {
+               DEBUG(3, ("oplock_revoked_handler: could not find "
+                         "callback id %llu\n", id));
+               return;
+       }
+
+       fsp = cb->data.fsp;
+
+       SMB_ASSERT(fsp->oplock_timeout == NULL);
+
+       DEBUG(0,("Level 1 oplock break failed for file %s. Forcefully "
+                "revoking oplock\n", fsp->fsp_name));
+
+       global_client_failed_oplock_break = True;
+       remove_oplock(fsp);
+
+       /*
+        * cb record is cleaned up in fsp ext data destructor on close, so
+        * leave it in the list.
+        */
+}
+
+/**
+ * Asynchronous ifs_createfile callback
+ *
+ * If ifs_createfile had to asynchronously break any oplocks, this function is
+ * called when the kernel sends an event that the open can be retried.
+ */
+static void semlock_available_handler(uint64_t id)
+{
+       struct onefs_callback_record *cb;
+
+       DEBUG(10, ("semlock_available_handler called: %llu\n", id));
+
+       if (!(cb = onefs_find_cb(id, ONEFS_WAITING_FOR_OPLOCK))) {
+               DEBUG(5, ("semlock_available_handler: Did not find callback "
+                         "%llu\n", id));
+               return;
+       }
+
+       DEBUG(10, ("Got semlock available for mid %d\n", cb->data.mid));
+
+       /* Paranoia check */
+       if (!(open_was_deferred(cb->data.mid))) {
+               char *msg;
+               if (asprintf(&msg, "Semlock available on an open that wasn't "
+                            "deferred: %s\n",
+                             onefs_callback_record_str_static(cb)) != -1) {
+                       smb_panic(msg);
+               }
+               smb_panic("Semlock available on an open that wasn't "
+                         "deferred\n");
+       }
+
+       schedule_deferred_open_smb_message(cb->data.mid);
+
+       /* Cleanup the callback record since the open will be retried. */
+       destroy_onefs_callback_record(id);
+
+       return;
+}
+
+/**
+ * Asynchronous ifs_createfile failure callback
+ *
+ * If ifs_createfile had to asynchronously break any oplocks, but an error was
+ * encountered in the kernel, the open will be retried with the state->failed
+ * set to true.  This will prompt the open path to send an INTERNAL_ERROR
+ * error message to the client.
+ */
+static void semlock_async_failure_handler(uint64_t id)
+{
+       struct onefs_callback_record *cb;
+       struct pending_message_list *pml;
+       struct deferred_open_record *state;
+
+       DEBUG(1, ("semlock_async_failure_handler called: %llu\n", id));
+
+       if (!(cb = onefs_find_cb(id, ONEFS_WAITING_FOR_OPLOCK))) {
+               DEBUG(5, ("semlock_async_failure_handler: Did not find callback "
+                         "%llu\n", id));
+               return;
+       }
+
+       DEBUG(1, ("Got semlock_async_failure message for mid %d\n", cb->data.mid));
+
+       /* Paranoia check */
+       if (!(open_was_deferred(cb->data.mid))) {
+               char *msg;
+               if (asprintf(&msg, "Semlock failure on an open that wasn't "
+                            "deferred: %s\n",
+                             onefs_callback_record_str_static(cb)) != -1) {
+                       smb_panic(msg);
+               }
+               smb_panic("Semlock failure on an open that wasn't deferred\n");
+       }
+
+       /* Find the actual deferred open record. */
+       if (!(pml = get_open_deferred_message(cb->data.mid))) {
+               DEBUG(0, ("Could not find deferred request for "
+                         "mid %d\n", cb->data.mid));
+               destroy_onefs_callback_record(id);
+               return;
+       }
+       state = (struct deferred_open_record *)pml->private_data.data;
+
+       /* Update to failed so the client can be notified on retried open. */
+       state->failed = true;
+
+       /* Schedule deferred open for immediate retry. */
+       schedule_deferred_open_smb_message(cb->data.mid);
+
+       /* Cleanup the callback record here since the open will be retried. */
+       destroy_onefs_callback_record(id);
+
+       return;
+}
+
+/**
+ * OneFS acquires all oplocks via ifs_createfile, so this is a no-op.
+ */
+static bool onefs_set_kernel_oplock(struct kernel_oplocks *_ctx,
+                                   files_struct *fsp, int oplock_type) {
+       return true;
+}
+
+/**
+ * Release the kernel oplock.
+ */
+static void onefs_release_kernel_oplock(struct kernel_oplocks *_ctx,
+                                       files_struct *fsp, int oplock_type)
+{
+       enum oplock_type oplock = onefs_samba_oplock_to_oplock(oplock_type);
+
+       DEBUG(10, ("onefs_release_kernel_oplock: Releasing %s to type %s\n",
+                  fsp->fsp_name, onefs_oplock_str(oplock)));
+
+       if (fsp->fh->fd == -1) {
+               DEBUG(1, ("no fd\n"));
+               return;
+       }
+
+       /* Downgrade oplock to either SHARED or NONE. */
+       if (ifs_oplock_downgrade(fsp->fh->fd, oplock)) {
+               DEBUG(1,("ifs_oplock_downgrade failed: %s\n",
+                        strerror(errno)));
+       }
+}
+
+/**
+ * Wrap ifs_semlock_write so it is only called on operations that aren't
+ * already contended in the kernel.
+ */
+static void onefs_semlock_write(int fd, enum level2_contention_type type,
+                               enum semlock_operation semlock_op)
+{
+       int ret;
+
+       switch (type) {
+       case LEVEL2_CONTEND_ALLOC_GROW:
+       case LEVEL2_CONTEND_WINDOWS_BRL:
+       case LEVEL2_CONTEND_POSIX_BRL:
+               DEBUG(10, ("Taking %d write semlock for cmd %d on fd: %d\n",
+                          semlock_op, type, fd));
+               ret = ifs_semlock_write(fd, semlock_op);
+               if (ret) {
+                       DEBUG(0,("ifs_semlock_write failed taking %d write "
+                                "semlock for cmd %d on fd: %d: %s",
+                                semlock_op, type, fd, strerror(errno)));
+               }
+               break;
+       default:
+               DEBUG(10, ("Skipping write semlock for cmd %d on fd: %d\n",
+                          type, fd));
+       }
+}
+
+/**
+ * Contend level 2 oplocks in the kernel and smbd.
+ *
+ * Taking a write semlock will contend all level 2 oplocks in all smbds across
+ * the cluster except the fsp's own level 2 oplock.  This lack of
+ * self-contention is a limitation of the current OneFS kernel oplocks
+ * implementation.  Luckily it is easy to contend our own level 2 oplock by
+ * iterating the share mode entries and only breaking the oplock if the pid
+ * matches our's.
+ */
+static void onefs_contend_level2_oplocks_begin(files_struct *fsp,
+                                              enum level2_contention_type type)
+{
+       int i;
+       struct share_mode_lock *lck;
+
+       /* Take care of level 2 kernel contention. */
+       onefs_semlock_write(fsp->fh->fd, type, SEMLOCK_LOCK);
+
+       /*
+        * If this file is level II oplocked then we need
+        * to grab the shared memory lock and inform all
+        * other files with a level II lock that they need
+        * to flush their read caches. We keep the lock over
+        * the shared memory area whilst doing this.
+        */
+
+       if (!LEVEL_II_OPLOCK_TYPE(fsp->oplock_type))
+               return;
+
+       lck = get_share_mode_lock(talloc_tos(), fsp->file_id, NULL, NULL,
+                                 NULL);
+       if (lck == NULL) {
+               DEBUG(0,("onefs_contend_level2_oplocks_begin: failed to lock "
+                        "share mode entry for file %s.\n", fsp->fsp_name ));
+               return;
+       }
+
+       DEBUG(10,("onefs_contend_level2_oplocks_begin: num_share_modes = %d\n",
+                 lck->num_share_modes ));
+
+       for(i = 0; i < lck->num_share_modes; i++) {
+               struct share_mode_entry *share_entry = &lck->share_modes[i];
+               char msg[MSG_SMB_SHARE_MODE_ENTRY_SIZE];
+
+               if (!is_valid_share_mode_entry(share_entry)) {
+                       continue;
+               }
+
+               DEBUG(10,("onefs_contend_level2_oplocks_begin: "
+                         "share_entry[%i]->op_type == %d\n",
+                         i, share_entry->op_type ));
+
+               if (share_entry->op_type == NO_OPLOCK) {
+                       continue;
+               }
+
+               /* Paranoia .... */
+               if (EXCLUSIVE_OPLOCK_TYPE(share_entry->op_type)) {
+                       DEBUG(0,("onefs_contend_level2_oplocks_begin: PANIC. "
+                                "share mode entry %d is an exlusive "
+                                "oplock !\n", i ));
+                       TALLOC_FREE(lck);
+                       abort();
+               }
+
+               share_mode_entry_to_message(msg, share_entry);
+
+               /*
+                * Only contend our own level 2 oplock.  The other processes
+                * will be get break events from the kernel.
+                */
+               if (procid_is_me(&share_entry->pid)) {
+                       DATA_BLOB blob = data_blob_const(msg,
+                           MSG_SMB_SHARE_MODE_ENTRY_SIZE);
+                       process_oplock_async_level2_break_message(
+                               smbd_messaging_context(),
+                               NULL,
+                               MSG_SMB_ASYNC_LEVEL2_BREAK,
+                               share_entry->pid,
+                               &blob);
+               }
+       }
+
+       /* We let the message receivers handle removing the oplock state
+          in the share mode lock db. */
+
+       TALLOC_FREE(lck);
+}
+
+/**
+ * Unlock the write semlock when the level 2 contending operation ends.
+ */
+static void onefs_contend_level2_oplocks_end(files_struct *fsp,
+                                            enum level2_contention_type type)
+{
+       /* Take care of level 2 kernel contention. */
+       onefs_semlock_write(fsp->fh->fd, type, SEMLOCK_UNLOCK);
+}
+
+/**
+ * Return string value of onefs oplock types.
+ */
+const char *onefs_oplock_str(enum oplock_type onefs_oplock_type)
+{
+       switch (onefs_oplock_type) {
+       case OPLOCK_NONE:
+               return "OPLOCK_NONE";
+       case OPLOCK_EXCLUSIVE:
+               return "OPLOCK_EXCLUSIVE";
+       case OPLOCK_BATCH:
+               return "OPLOCK_BATCH";
+       case OPLOCK_SHARED:
+               return "OPLOCK_SHARED";
+       default:
+               break;
+       }
+       return "UNKNOWN";
+}
+
+/**
+ * Convert from onefs to samba oplock.
+ */
+int onefs_oplock_to_samba_oplock(enum oplock_type onefs_oplock)
+{
+       switch (onefs_oplock) {
+       case OPLOCK_NONE:
+               return NO_OPLOCK;
+       case OPLOCK_EXCLUSIVE:
+               return EXCLUSIVE_OPLOCK;
+       case OPLOCK_BATCH:
+               return BATCH_OPLOCK;
+       case OPLOCK_SHARED:
+               return LEVEL_II_OPLOCK;
+       default:
+               DEBUG(0, ("unknown oplock type %d found\n", onefs_oplock));
+               break;
+       }
+       return NO_OPLOCK;
+}
+
+/**
+ * Convert from samba to onefs oplock.
+ */
+enum oplock_type onefs_samba_oplock_to_oplock(int samba_oplock_type)
+{
+       if (BATCH_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_BATCH;
+       if (EXCLUSIVE_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_EXCLUSIVE;
+       if (LEVEL_II_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_SHARED;
+       return OPLOCK_NONE;
+}
+
+/**
+ * Oplock event handler.
+ *
+ * Call into the event system dispatcher to handle each event.
+ */
+static void onefs_oplocks_read_fde_handler(struct event_context *ev,
+                                          struct fd_event *fde,
+                                          uint16_t flags,
+                                          void *private_data)
+{
+       struct onefs_oplocks_context *ctx =
+           talloc_get_type(private_data, struct onefs_oplocks_context);
+
+       if (oplocks_event_dispatcher(ctx->onefs_ops)) {
+               DEBUG(0, ("oplocks_event_dispatcher failed: %s\n",
+                         strerror(errno)));
+       }
+}
+
+/**
+ * Setup kernel oplocks
+ */
+static const struct kernel_oplocks_ops onefs_koplocks_ops = {
+       .set_oplock                     = onefs_set_kernel_oplock,
+       .release_oplock                 = onefs_release_kernel_oplock,
+       .contend_level2_oplocks_begin   = onefs_contend_level2_oplocks_begin,
+       .contend_level2_oplocks_end     = onefs_contend_level2_oplocks_end,
+};
+
+static const struct oplocks_event_ops onefs_dispatch_ops = {
+       .oplock_break_to_none = oplock_break_to_none_handler,
+       .oplock_break_to_level_two = oplock_break_to_level_two_handler,
+       .oplock_revoked = oplock_revoked_handler,
+       .semlock_available = semlock_available_handler,
+       .semlock_async_failure = semlock_async_failure_handler,
+};
+
+struct kernel_oplocks *onefs_init_kernel_oplocks(TALLOC_CTX *mem_ctx)
+{
+       struct kernel_oplocks *_ctx = NULL;
+       struct onefs_oplocks_context *ctx = NULL;
+        struct procoptions po = PROCOPTIONS_INIT;
+
+       DEBUG(10, ("onefs_init_kernel_oplocks called\n"));
+
+       /* Set the non-blocking proc flag */
+       po.po_flags_on |= P_NON_BLOCKING_SEMLOCK;
+       if (setprocoptions(&po) != 0) {
+               DEBUG(0, ("setprocoptions failed: %s.\n", strerror(errno)));
+               goto err_out;
+       }
+
+       /* Setup the oplock contexts */
+       _ctx = talloc_zero(mem_ctx, struct kernel_oplocks);
+       if (!_ctx) {
+               goto err_out;
+       }
+
+       ctx = talloc_zero(_ctx, struct onefs_oplocks_context);
+       if (!ctx) {
+               goto err_out;
+       }
+
+       _ctx->ops = &onefs_koplocks_ops;
+       _ctx->flags = (KOPLOCKS_LEVEL2_SUPPORTED |
+                      KOPLOCKS_DEFERRED_OPEN_NOTIFICATION |
+                      KOPLOCKS_TIMEOUT_NOTIFICATION |
+                      KOPLOCKS_OPLOCK_BROKEN_NOTIFICATION);
+       _ctx->private_data = ctx;
+       ctx->ctx = _ctx;
+       ctx->onefs_ops = &onefs_dispatch_ops;
+
+       /* Register an kernel event channel for oplocks */
+       ctx->onefs_event_fd = oplocks_event_register();
+       if (ctx->onefs_event_fd == -1) {
+               DEBUG(0, ("oplocks_event_register failed: %s\n",
+                          strerror(errno)));
+               goto err_out;
+       }
+
+       DEBUG(10, ("oplock event_fd = %d\n", ctx->onefs_event_fd));
+
+       /* Register the oplock event_fd with samba's event system */
+       ctx->read_fde = event_add_fd(smbd_event_context(),
+                                    ctx,
+                                    ctx->onefs_event_fd,
+                                    EVENT_FD_READ,
+                                    onefs_oplocks_read_fde_handler,
+                                    ctx);
+       return _ctx;
+
+ err_out:
+       talloc_free(_ctx);
+       talloc_free(ctx);
+       return NULL;
+}
+
+#else
+ void oplock_onefs_dummy(void);
+ void oplock_onefs_dummy(void) {}
+#endif /* HAVE_ONEFS */