smbd: Simplify open_file_ntcreate
[mat/samba.git] / source3 / smbd / open.c
index bea4d99285de5aae2b49174107abb0f6af876d89..fa52fccc088595ef56beb612db3d0452a2acd9df 100644 (file)
 #include "../libcli/security/security.h"
 #include "../librpc/gen_ndr/ndr_security.h"
 #include "../librpc/gen_ndr/open_files.h"
+#include "../librpc/gen_ndr/idmap.h"
+#include "passdb/lookup_sid.h"
 #include "auth.h"
 #include "serverid.h"
 #include "messages.h"
+#include "source3/lib/dbwrap/dbwrap_watch.h"
 
 extern const struct generic_mapping file_generic_mapping;
 
@@ -73,6 +76,7 @@ NTSTATUS smbd_check_access_rights(struct connection_struct *conn,
        struct security_descriptor *sd = NULL;
        uint32_t rejected_share_access;
        uint32_t rejected_mask = access_mask;
+       uint32_t do_not_check_mask = 0;
 
        rejected_share_access = access_mask & ~(conn->share_access);
 
@@ -115,7 +119,7 @@ NTSTATUS smbd_check_access_rights(struct connection_struct *conn,
        status = SMB_VFS_GET_NT_ACL(conn, smb_fname->base_name,
                        (SECINFO_OWNER |
                        SECINFO_GROUP |
-                       SECINFO_DACL),&sd);
+                        SECINFO_DACL), talloc_tos(), &sd);
 
        if (!NT_STATUS_IS_OK(status)) {
                DEBUG(10, ("smbd_check_access_rights: Could not get acl "
@@ -131,13 +135,32 @@ NTSTATUS smbd_check_access_rights(struct connection_struct *conn,
        }
 
        /*
-        * Never test FILE_READ_ATTRIBUTES. se_file_access_check() also takes care of
+        * If we can access the path to this file, by
+        * default we have FILE_READ_ATTRIBUTES from the
+        * containing directory. See the section:
+        * "Algorithm to Check Access to an Existing File"
+        * in MS-FSA.pdf.
+        *
+        * se_file_access_check() also takes care of
         * owner WRITE_DAC and READ_CONTROL.
         */
+       do_not_check_mask = FILE_READ_ATTRIBUTES;
+
+       /*
+        * Samba 3.6 and earlier granted execute access even
+        * if the ACL did not contain execute rights.
+        * Samba 4.0 is more correct and checks it.
+        * The compatibilty mode allows to skip this check
+        * to smoothen upgrades.
+        */
+       if (lp_acl_allow_execute_always(SNUM(conn))) {
+               do_not_check_mask |= FILE_EXECUTE;
+       }
+
        status = se_file_access_check(sd,
                                get_current_nttok(conn),
                                use_privs,
-                               (access_mask & ~FILE_READ_ATTRIBUTES),
+                               (access_mask & ~do_not_check_mask),
                                &rejected_mask);
 
        DEBUG(10,("smbd_check_access_rights: file %s requesting "
@@ -237,6 +260,7 @@ static NTSTATUS check_parent_access(struct connection_struct *conn,
        status = SMB_VFS_GET_NT_ACL(conn,
                                parent_dir,
                                SECINFO_DACL,
+                                   talloc_tos(),
                                &parent_sd);
 
        if (!NT_STATUS_IS_OK(status)) {
@@ -248,7 +272,13 @@ static NTSTATUS check_parent_access(struct connection_struct *conn,
        }
 
        /*
-        * Never test FILE_READ_ATTRIBUTES. se_file_access_check() also takes care of
+        * If we can access the path to this file, by
+        * default we have FILE_READ_ATTRIBUTES from the
+        * containing directory. See the section:
+        * "Algorithm to Check Access to an Existing File"
+        * in MS-FSA.pdf.
+        *
+        * se_file_access_check() also takes care of
         * owner WRITE_DAC and READ_CONTROL.
         */
        status = se_file_access_check(parent_sd,
@@ -372,13 +402,12 @@ void change_file_owner_to_parent(connection_struct *conn,
                                        const char *inherit_from_dir,
                                        files_struct *fsp)
 {
-       struct smb_filename *smb_fname_parent = NULL;
-       NTSTATUS status;
+       struct smb_filename *smb_fname_parent;
        int ret;
 
-       status = create_synthetic_smb_fname(talloc_tos(), inherit_from_dir,
-                                           NULL, NULL, &smb_fname_parent);
-       if (!NT_STATUS_IS_OK(status)) {
+       smb_fname_parent = synthetic_smb_fname(talloc_tos(), inherit_from_dir,
+                                              NULL, NULL);
+       if (smb_fname_parent == NULL) {
                return;
        }
 
@@ -427,17 +456,17 @@ NTSTATUS change_dir_owner_to_parent(connection_struct *conn,
                                       const char *fname,
                                       SMB_STRUCT_STAT *psbuf)
 {
-       struct smb_filename *smb_fname_parent = NULL;
+       struct smb_filename *smb_fname_parent;
        struct smb_filename *smb_fname_cwd = NULL;
        char *saved_dir = NULL;
        TALLOC_CTX *ctx = talloc_tos();
        NTSTATUS status = NT_STATUS_OK;
        int ret;
 
-       status = create_synthetic_smb_fname(ctx, inherit_from_dir, NULL, NULL,
-                                           &smb_fname_parent);
-       if (!NT_STATUS_IS_OK(status)) {
-               return status;
+       smb_fname_parent = synthetic_smb_fname(ctx, inherit_from_dir,
+                                              NULL, NULL);
+       if (smb_fname_parent == NULL) {
+               return NT_STATUS_NO_MEMORY;
        }
 
        ret = SMB_VFS_STAT(conn, smb_fname_parent);
@@ -475,10 +504,10 @@ NTSTATUS change_dir_owner_to_parent(connection_struct *conn,
                goto chdir;
        }
 
-       status = create_synthetic_smb_fname(ctx, ".", NULL, NULL,
-                                           &smb_fname_cwd);
-       if (!NT_STATUS_IS_OK(status)) {
-               return status;
+       smb_fname_cwd = synthetic_smb_fname(ctx, ".", NULL, NULL);
+       if (smb_fname_cwd == NULL) {
+               status = NT_STATUS_NO_MEMORY;
+               goto chdir;
        }
 
        ret = SMB_VFS_STAT(conn, smb_fname_cwd);
@@ -1010,15 +1039,6 @@ static void validate_my_share_entries(struct smbd_server_connection *sconn,
                return;
        }
 
-       if (is_deferred_open_entry(share_entry) &&
-           !open_was_deferred(sconn, share_entry->op_mid)) {
-               char *str = talloc_asprintf(talloc_tos(),
-                       "Got a deferred entry without a request: "
-                       "PANIC: %s\n",
-                       share_mode_str(talloc_tos(), num, share_entry));
-               smb_panic(str);
-       }
-
        if (!is_valid_share_mode_entry(share_entry)) {
                return;
        }
@@ -1032,17 +1052,6 @@ static void validate_my_share_entries(struct smbd_server_connection *sconn,
                          "share entry with an open file\n");
        }
 
-       if (is_deferred_open_entry(share_entry)) {
-               goto panic;
-       }
-
-       if ((share_entry->op_type == NO_OPLOCK) &&
-           (fsp->oplock_type == FAKE_LEVEL_II_OPLOCK)) {
-               /* Someone has already written to it, but I haven't yet
-                * noticed */
-               return;
-       }
-
        if (((uint16)fsp->oplock_type) != share_entry->op_type) {
                goto panic;
        }
@@ -1076,6 +1085,26 @@ bool is_stat_open(uint32 access_mask)
                ((access_mask & ~stat_open_bits) == 0));
 }
 
+static bool has_delete_on_close(struct share_mode_lock *lck,
+                               uint32_t name_hash)
+{
+       struct share_mode_data *d = lck->data;
+       uint32_t i;
+
+       if (d->num_share_modes == 0) {
+               return false;
+       }
+       if (!is_delete_on_close_set(lck, name_hash)) {
+               return false;
+       }
+       for (i=0; i<d->num_share_modes; i++) {
+               if (!share_mode_stale_pid(d, i)) {
+                       return true;
+               }
+       }
+       return false;
+}
+
 /****************************************************************************
  Deal with share modes
  Invarient: Share mode must be locked on entry and exit.
@@ -1084,11 +1113,8 @@ bool is_stat_open(uint32 access_mask)
 
 static NTSTATUS open_mode_check(connection_struct *conn,
                                struct share_mode_lock *lck,
-                               uint32_t name_hash,
                                uint32 access_mask,
-                               uint32 share_access,
-                               uint32 create_options,
-                               bool *file_existed)
+                               uint32 share_access)
 {
        int i;
 
@@ -1096,25 +1122,6 @@ static NTSTATUS open_mode_check(connection_struct *conn,
                return NT_STATUS_OK;
        }
 
-       /* A delete on close prohibits everything */
-
-       if (is_delete_on_close_set(lck, name_hash)) {
-               /*
-                * Check the delete on close token
-                * is valid. It could have been left
-                * after a server crash.
-                */
-               for(i = 0; i < lck->data->num_share_modes; i++) {
-                       if (!share_mode_stale_pid(lck->data, i)) {
-
-                               *file_existed = true;
-
-                               return NT_STATUS_DELETE_PENDING;
-                       }
-               }
-               return NT_STATUS_OK;
-       }
-
        if (is_stat_open(access_mask)) {
                /* Stat open that doesn't trigger oplock breaks or share mode
                 * checks... ! JRA. */
@@ -1148,16 +1155,10 @@ static NTSTATUS open_mode_check(connection_struct *conn,
                                continue;
                        }
 
-                       *file_existed = true;
-
                        return NT_STATUS_SHARING_VIOLATION;
                }
        }
 
-       if (lck->data->num_share_modes != 0) {
-               *file_existed = true;
-       }
-
        return NT_STATUS_OK;
 }
 
@@ -1181,18 +1182,9 @@ static NTSTATUS send_break_message(files_struct *fsp,
        /* Create the message. */
        share_mode_entry_to_message(msg, exclusive);
 
-       /* Add in the FORCE_OPLOCK_BREAK_TO_NONE bit in the message if set. We
-          don't want this set in the share mode struct pointed to by lck. */
-
-       if (oplock_request & FORCE_OPLOCK_BREAK_TO_NONE) {
-               SSVAL(msg,OP_BREAK_MSG_OP_TYPE_OFFSET,
-                       exclusive->op_type | FORCE_OPLOCK_BREAK_TO_NONE);
-       }
-
        status = messaging_send_buf(fsp->conn->sconn->msg_ctx, exclusive->pid,
                                    MSG_SMB_BREAK_REQUEST,
-                                   (uint8 *)msg,
-                                   MSG_SMB_SHARE_MODE_ENTRY_SIZE);
+                                   (uint8 *)msg, sizeof(msg));
        if (!NT_STATUS_IS_OK(status)) {
                DEBUG(3, ("Could not send oplock break message: %s\n",
                          nt_errstr(status)));
@@ -1210,105 +1202,129 @@ static NTSTATUS send_break_message(files_struct *fsp,
  * Do internal consistency checks on the share mode for a file.
  */
 
-static void find_oplock_types(files_struct *fsp,
-                               int oplock_request,
-                               const struct share_mode_lock *lck,
-                               struct share_mode_entry **pp_batch,
-                               struct share_mode_entry **pp_ex_or_batch,
-                               bool *got_level2,
-                               bool *got_no_oplock)
+static bool validate_oplock_types(files_struct *fsp,
+                                 int oplock_request,
+                                 struct share_mode_lock *lck)
 {
-       int i;
-
-       *pp_batch = NULL;
-       *pp_ex_or_batch = NULL;
-       *got_level2 = false;
-       *got_no_oplock = false;
+       struct share_mode_data *d = lck->data;
+       bool batch = false;
+       bool ex_or_batch = false;
+       bool level2 = false;
+       bool no_oplock = false;
+       uint32_t i;
 
        /* Ignore stat or internal opens, as is done in
                delay_for_batch_oplocks() and
                delay_for_exclusive_oplocks().
         */
        if ((oplock_request & INTERNAL_OPEN_ONLY) || is_stat_open(fsp->access_mask)) {
-               return;
+               return true;
        }
 
-       for (i=0; i<lck->data->num_share_modes; i++) {
-               if (!is_valid_share_mode_entry(&lck->data->share_modes[i])) {
+       for (i=0; i<d->num_share_modes; i++) {
+               struct share_mode_entry *e = &d->share_modes[i];
+
+               if (!is_valid_share_mode_entry(e)) {
                        continue;
                }
 
-               if (lck->data->share_modes[i].op_type == NO_OPLOCK &&
-                               is_stat_open(lck->data->share_modes[i].access_mask)) {
+               if (e->op_type == NO_OPLOCK && is_stat_open(e->access_mask)) {
                        /* We ignore stat opens in the table - they
                           always have NO_OPLOCK and never get or
                           cause breaks. JRA. */
                        continue;
                }
 
-               if (BATCH_OPLOCK_TYPE(lck->data->share_modes[i].op_type)) {
+               if (BATCH_OPLOCK_TYPE(e->op_type)) {
                        /* batch - can only be one. */
-                       if (share_mode_stale_pid(lck->data, i)) {
+                       if (share_mode_stale_pid(d, i)) {
                                DEBUG(10, ("Found stale batch oplock\n"));
                                continue;
                        }
-                       if (*pp_ex_or_batch || *pp_batch || *got_level2 || *got_no_oplock) {
-                               smb_panic("Bad batch oplock entry.");
+                       if (ex_or_batch || batch || level2 || no_oplock) {
+                               DEBUG(0, ("Bad batch oplock entry %u.",
+                                         (unsigned)i));
+                               return false;
                        }
-                       *pp_batch = &lck->data->share_modes[i];
+                       batch = true;
                }
 
-               if (EXCLUSIVE_OPLOCK_TYPE(lck->data->share_modes[i].op_type)) {
-                       if (share_mode_stale_pid(lck->data, i)) {
+               if (EXCLUSIVE_OPLOCK_TYPE(e->op_type)) {
+                       if (share_mode_stale_pid(d, i)) {
                                DEBUG(10, ("Found stale duplicate oplock\n"));
                                continue;
                        }
                        /* Exclusive or batch - can only be one. */
-                       if (*pp_ex_or_batch || *got_level2 || *got_no_oplock) {
-                               smb_panic("Bad exclusive or batch oplock entry.");
+                       if (ex_or_batch || level2 || no_oplock) {
+                               DEBUG(0, ("Bad exclusive or batch oplock "
+                                         "entry %u.", (unsigned)i));
+                               return false;
                        }
-                       *pp_ex_or_batch = &lck->data->share_modes[i];
+                       ex_or_batch = true;
                }
 
-               if (LEVEL_II_OPLOCK_TYPE(lck->data->share_modes[i].op_type)) {
-                       if (*pp_batch || *pp_ex_or_batch) {
-                               if (share_mode_stale_pid(lck->data, i)) {
+               if (LEVEL_II_OPLOCK_TYPE(e->op_type)) {
+                       if (batch || ex_or_batch) {
+                               if (share_mode_stale_pid(d, i)) {
                                        DEBUG(10, ("Found stale LevelII "
                                                   "oplock\n"));
                                        continue;
                                }
-                               smb_panic("Bad levelII oplock entry.");
+                               DEBUG(0, ("Bad levelII oplock entry %u.",
+                                         (unsigned)i));
+                               return false;
                        }
-                       *got_level2 = true;
+                       level2 = true;
                }
 
-               if (lck->data->share_modes[i].op_type == NO_OPLOCK) {
-                       if (*pp_batch || *pp_ex_or_batch) {
-                               if (share_mode_stale_pid(lck->data, i)) {
+               if (e->op_type == NO_OPLOCK) {
+                       if (batch || ex_or_batch) {
+                               if (share_mode_stale_pid(d, i)) {
                                        DEBUG(10, ("Found stale NO_OPLOCK "
                                                   "entry\n"));
                                        continue;
                                }
-                               smb_panic("Bad no oplock entry.");
+                               DEBUG(0, ("Bad no oplock entry %u.",
+                                         (unsigned)i));
+                               return false;
                        }
-                       *got_no_oplock = true;
+                       no_oplock = true;
                }
        }
+
+       remove_stale_share_mode_entries(d);
+
+       if ((batch || ex_or_batch) && (d->num_share_modes != 1)) {
+               DEBUG(1, ("got batch (%d) or ex (%d) non-exclusively (%d)\n",
+                         (int)batch, (int)ex_or_batch,
+                         (int)d->num_share_modes));
+               return false;
+       }
+
+       return true;
 }
 
-static bool delay_for_batch_oplocks(files_struct *fsp,
-                                       uint64_t mid,
-                                       int oplock_request,
-                                       struct share_mode_entry *batch_entry)
+static bool delay_for_oplock(files_struct *fsp,
+                            uint64_t mid,
+                            int oplock_request,
+                            struct share_mode_lock *lck,
+                            bool have_sharing_violation)
 {
+       struct share_mode_data *d = lck->data;
+       struct share_mode_entry *entry;
+
        if ((oplock_request & INTERNAL_OPEN_ONLY) || is_stat_open(fsp->access_mask)) {
                return false;
        }
-       if (batch_entry == NULL) {
+       if (lck->data->num_share_modes != 1) {
+               /*
+                * More than one. There can't be any exclusive or batch left.
+                */
                return false;
        }
+       entry = &d->share_modes[0];
 
-       if (server_id_is_disconnected(&batch_entry->pid)) {
+       if (server_id_is_disconnected(&entry->pid)) {
                /*
                 * TODO: clean up.
                 * This could be achieved by sending a break message
@@ -1321,33 +1337,31 @@ static bool delay_for_batch_oplocks(files_struct *fsp,
                return false;
        }
 
-       /* Found a batch oplock */
-       send_break_message(fsp, batch_entry, mid, oplock_request);
-       return true;
-}
-
-static bool delay_for_exclusive_oplocks(files_struct *fsp,
-                                       uint64_t mid,
-                                       int oplock_request,
-                                       struct share_mode_entry *ex_entry)
-{
-       if ((oplock_request & INTERNAL_OPEN_ONLY) || is_stat_open(fsp->access_mask)) {
-               return false;
+       if (have_sharing_violation && (entry->op_type & BATCH_OPLOCK)) {
+               if (share_mode_stale_pid(d, 0)) {
+                       return false;
+               }
+               send_break_message(fsp, entry, mid, oplock_request);
+               return true;
        }
-       if (ex_entry == NULL) {
+       if (have_sharing_violation) {
+               /*
+                * Non-batch exclusive is not broken if we have a sharing
+                * violation
+                */
                return false;
        }
-
-       if (server_id_is_disconnected(&ex_entry->pid)) {
+       if (!EXCLUSIVE_OPLOCK_TYPE(entry->op_type)) {
                /*
-                * since only durable handles can get disconnected,
-                * and we can only get durable handles with batch oplocks,
-                * this should actually never be reached...
+                * No break for NO_OPLOCK or LEVEL2_OPLOCK oplocks
                 */
                return false;
        }
+       if (share_mode_stale_pid(d, 0)) {
+               return false;
+       }
 
-       send_break_message(fsp, ex_entry, mid, oplock_request);
+       send_break_message(fsp, entry, mid, oplock_request);
        return true;
 }
 
@@ -1359,16 +1373,17 @@ static bool file_has_brlocks(files_struct *fsp)
        if (!br_lck)
                return false;
 
-       return br_lck->num_locks > 0 ? true : false;
+       return (brl_num_locks(br_lck) > 0);
 }
 
 static void grant_fsp_oplock_type(files_struct *fsp,
-                               int oplock_request,
-                               bool got_level2_oplock,
-                               bool got_a_none_oplock)
+                                 struct share_mode_lock *lck,
+                                 int oplock_request)
 {
        bool allow_level2 = (global_client_caps & CAP_LEVEL_II_OPLOCKS) &&
                            lp_level2_oplocks(SNUM(fsp->conn));
+       bool got_level2_oplock, got_a_none_oplock;
+       uint32_t i;
 
        /* Start by granting what the client asked for,
           but ensure no SAMBA_PRIVATE bits can be set. */
@@ -1395,29 +1410,29 @@ static void grant_fsp_oplock_type(files_struct *fsp,
                return;
        }
 
+       got_level2_oplock = false;
+       got_a_none_oplock = false;
+
+       for (i=0; i<lck->data->num_share_modes; i++) {
+               int op_type = lck->data->share_modes[i].op_type;
+
+               if (LEVEL_II_OPLOCK_TYPE(op_type)) {
+                       got_level2_oplock = true;
+               }
+               if (op_type == NO_OPLOCK) {
+                       got_a_none_oplock = true;
+               }
+       }
+
        /*
         * Match what was requested (fsp->oplock_type) with
         * what was found in the existing share modes.
         */
 
-       if (got_a_none_oplock) {
-               fsp->oplock_type = NO_OPLOCK;
-       } else if (got_level2_oplock) {
-               if (fsp->oplock_type == NO_OPLOCK ||
-                               fsp->oplock_type == FAKE_LEVEL_II_OPLOCK) {
-                       /* Store a level2 oplock, but don't tell the client */
-                       fsp->oplock_type = FAKE_LEVEL_II_OPLOCK;
-               } else {
+       if (got_level2_oplock || got_a_none_oplock) {
+               if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) {
                        fsp->oplock_type = LEVEL_II_OPLOCK;
                }
-       } else {
-               /* All share_mode_entries are placeholders or deferred.
-                * Silently upgrade to fake levelII if the client didn't
-                * ask for an oplock. */
-               if (fsp->oplock_type == NO_OPLOCK) {
-                       /* Store a level2 oplock, but don't tell the client */
-                       fsp->oplock_type = FAKE_LEVEL_II_OPLOCK;
-               }
        }
 
        /*
@@ -1425,7 +1440,20 @@ static void grant_fsp_oplock_type(files_struct *fsp,
         * or if we've turned them off.
         */
        if (fsp->oplock_type == LEVEL_II_OPLOCK && !allow_level2) {
-               fsp->oplock_type = FAKE_LEVEL_II_OPLOCK;
+               fsp->oplock_type = NO_OPLOCK;
+       }
+
+       if (fsp->oplock_type == LEVEL_II_OPLOCK && !got_level2_oplock) {
+               /*
+                * We're the first level2 oplock. Indicate that in brlock.tdb.
+                */
+               struct byte_range_lock *brl;
+
+               brl = brl_get_locks(talloc_tos(), fsp);
+               if (brl != NULL) {
+                       brl_set_have_read_oplocks(brl, true);
+                       TALLOC_FREE(brl);
+               }
        }
 
        DEBUG(10,("grant_fsp_oplock_type: oplock type 0x%x on file %s\n",
@@ -1441,6 +1469,13 @@ static bool request_timed_out(struct timeval request_time,
        return (timeval_compare(&end_time, &now) < 0);
 }
 
+struct defer_open_state {
+       struct smbd_server_connection *sconn;
+       uint64_t mid;
+};
+
+static void defer_open_done(struct tevent_req *req);
+
 /****************************************************************************
  Handle the 1 second delay in returning a SHARING_VIOLATION error.
 ****************************************************************************/
@@ -1451,30 +1486,6 @@ static void defer_open(struct share_mode_lock *lck,
                       struct smb_request *req,
                       struct deferred_open_record *state)
 {
-       struct server_id self = messaging_server_id(req->sconn->msg_ctx);
-
-       /* Paranoia check */
-
-       if (lck) {
-               int i;
-
-               for (i=0; i<lck->data->num_share_modes; i++) {
-                       struct share_mode_entry *e = &lck->data->share_modes[i];
-
-                       if (is_deferred_open_entry(e) &&
-                           serverid_equal(&self, &e->pid) &&
-                           (e->op_mid == req->mid)) {
-                               DEBUG(0, ("Trying to defer an already deferred "
-                                       "request: mid=%llu, exiting\n",
-                                       (unsigned long long)req->mid));
-                               TALLOC_FREE(lck);
-                               exit_server("attempt to defer a deferred request");
-                       }
-               }
-       }
-
-       /* End paranoia check */
-
        DEBUG(10,("defer_open_sharing_error: time [%u.%06u] adding deferred "
                  "open entry for mid %llu\n",
                  (unsigned int)request_time.tv_sec,
@@ -1487,10 +1498,61 @@ static void defer_open(struct share_mode_lock *lck,
                exit_server("push_deferred_open_message_smb failed");
        }
        if (lck) {
-               add_deferred_open(lck, req->mid, request_time, self, state->id);
+               struct defer_open_state *watch_state;
+               struct tevent_req *watch_req;
+               bool ret;
+
+               watch_state = talloc(req->sconn, struct defer_open_state);
+               if (watch_state == NULL) {
+                       exit_server("talloc failed");
+               }
+               watch_state->sconn = req->sconn;
+               watch_state->mid = req->mid;
+
+               DEBUG(10, ("defering mid %llu\n",
+                          (unsigned long long)req->mid));
+
+               watch_req = dbwrap_record_watch_send(
+                       watch_state, req->sconn->ev_ctx, lck->data->record,
+                       req->sconn->msg_ctx);
+               if (watch_req == NULL) {
+                       exit_server("Could not watch share mode record");
+               }
+               tevent_req_set_callback(watch_req, defer_open_done,
+                                       watch_state);
+
+               ret = tevent_req_set_endtime(
+                       watch_req, req->sconn->ev_ctx,
+                       timeval_sum(&request_time, &timeout));
+               SMB_ASSERT(ret);
        }
 }
 
+static void defer_open_done(struct tevent_req *req)
+{
+       struct defer_open_state *state = tevent_req_callback_data(
+               req, struct defer_open_state);
+       NTSTATUS status;
+       bool ret;
+
+       status = dbwrap_record_watch_recv(req, talloc_tos(), NULL);
+       TALLOC_FREE(req);
+       if (!NT_STATUS_IS_OK(status)) {
+               DEBUG(5, ("dbwrap_record_watch_recv returned %s\n",
+                         nt_errstr(status)));
+               /*
+                * Even if it failed, retry anyway. TODO: We need a way to
+                * tell a re-scheduled open about that error.
+                */
+       }
+
+       DEBUG(10, ("scheduling mid %llu\n", (unsigned long long)state->mid));
+
+       ret = schedule_deferred_open_message_smb(state->sconn, state->mid);
+       SMB_ASSERT(ret);
+       TALLOC_FREE(state);
+}
+
 
 /****************************************************************************
  On overwrite open ensure that the attributes match.
@@ -1683,7 +1745,8 @@ static NTSTATUS smbd_calculate_maximum_allowed_access(
        status = SMB_VFS_GET_NT_ACL(conn, smb_fname->base_name,
                                    (SECINFO_OWNER |
                                     SECINFO_GROUP |
-                                    SECINFO_DACL),&sd);
+                                    SECINFO_DACL),
+                                   talloc_tos(), &sd);
 
        if (NT_STATUS_EQUAL(status, NT_STATUS_OBJECT_NAME_NOT_FOUND)) {
                /*
@@ -1700,7 +1763,13 @@ static NTSTATUS smbd_calculate_maximum_allowed_access(
        }
 
        /*
-        * Never test FILE_READ_ATTRIBUTES. se_file_access_check()
+        * If we can access the path to this file, by
+        * default we have FILE_READ_ATTRIBUTES from the
+        * containing directory. See the section:
+        * "Algorithm to Check Access to an Existing File"
+        * in MS-FSA.pdf.
+        *
+        * se_file_access_check()
         * also takes care of owner WRITE_DAC and READ_CONTROL.
         */
        status = se_file_access_check(sd,
@@ -1778,19 +1847,6 @@ NTSTATUS smbd_calculate_access_mask(connection_struct *conn,
  Remove the deferred open entry under lock.
 ****************************************************************************/
 
-void remove_deferred_open_entry(struct file_id id, uint64_t mid,
-                               struct server_id pid)
-{
-       struct share_mode_lock *lck = get_existing_share_mode_lock(
-               talloc_tos(), id);
-       if (lck == NULL) {
-               DEBUG(0, ("could not get share mode lock\n"));
-               return;
-       }
-       del_deferred_open_entry(lck, mid, pid);
-       TALLOC_FREE(lck);
-}
-
 /****************************************************************************
  Return true if this is a state pointer to an asynchronous create.
 ****************************************************************************/
@@ -1882,10 +1938,7 @@ static int calculate_open_access_flags(uint32_t access_mask,
         * mean the same thing under DOS and Unix.
         */
 
-       need_write =
-               ((access_mask & (FILE_WRITE_DATA | FILE_APPEND_DATA)) ||
-                (oplock_request & FORCE_OPLOCK_BREAK_TO_NONE));
-
+       need_write = (access_mask & (FILE_WRITE_DATA | FILE_APPEND_DATA));
        if (!need_write) {
                return O_RDONLY;
        }
@@ -1941,10 +1994,6 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        NTSTATUS status;
        char *parent_dir;
        SMB_STRUCT_STAT saved_stat = smb_fname->st;
-       struct share_mode_entry *batch_entry = NULL;
-       struct share_mode_entry *exclusive_entry = NULL;
-       bool got_level2_oplock = false;
-       bool got_a_none_oplock = false;
        struct timespec old_write_time;
        struct file_id id;
 
@@ -2026,12 +2075,6 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                        if (is_deferred_open_async(ptr)) {
                                SET_STAT_INVALID(smb_fname->st);
                                file_existed = false;
-                       } else {
-                               struct deferred_open_record *state = (struct deferred_open_record *)ptr;
-                               /* Remove the deferred open entry under lock. */
-                               remove_deferred_open_entry(
-                                       state->id, req->mid,
-                                       messaging_server_id(req->sconn->msg_ctx));
                        }
 
                        /* Ensure we don't reprocess this message. */
@@ -2155,7 +2198,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
 
        open_access_mask = access_mask;
 
-       if ((flags2 & O_TRUNC) || (oplock_request & FORCE_OPLOCK_BREAK_TO_NONE)) {
+       if (flags2 & O_TRUNC) {
                open_access_mask |= FILE_WRITE_DATA; /* This will cause oplock breaks. */
        }
 
@@ -2291,12 +2334,11 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                        return NT_STATUS_SHARING_VIOLATION;
                }
 
-               find_oplock_types(fsp, 0, lck, &batch_entry, &exclusive_entry,
-                                 &got_level2_oplock, &got_a_none_oplock);
+               if (!validate_oplock_types(fsp, 0, lck)) {
+                       smb_panic("validate_oplock_types failed");
+               }
 
-               if (delay_for_batch_oplocks(fsp, req->mid, 0, batch_entry) ||
-                   delay_for_exclusive_oplocks(fsp, req->mid, 0,
-                                               exclusive_entry)) {
+               if (delay_for_oplock(fsp, req->mid, 0, lck, false)) {
                        schedule_defer_open(lck, request_time, req);
                        TALLOC_FREE(lck);
                        DEBUG(10, ("Sent oplock break request to kernel "
@@ -2322,7 +2364,6 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                if (NT_STATUS_EQUAL(fsp_open, NT_STATUS_RETRY)) {
                        schedule_async_open(request_time, req);
                }
-               TALLOC_FREE(lck);
                return fsp_open;
        }
 
@@ -2336,7 +2377,6 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                 * just fail the open to prevent creating any problems
                 * in the open file db having the wrong dev/ino key.
                 */
-               TALLOC_FREE(lck);
                fd_close(fsp);
                DEBUG(1,("open_file_ntcreate: file %s - dev/ino mismatch. "
                        "Old (dev=0x%llu, ino =0x%llu). "
@@ -2382,53 +2422,39 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        }
 
        /* Get the types we need to examine. */
-       find_oplock_types(fsp,
-                         oplock_request,
-                         lck,
-                         &batch_entry,
-                         &exclusive_entry,
-                         &got_level2_oplock,
-                         &got_a_none_oplock);
-
-       /* First pass - send break only on batch oplocks. */
-       if ((req != NULL) &&
-           delay_for_batch_oplocks(fsp,
-                                   req->mid,
-                                   oplock_request,
-                                   batch_entry)) {
-               schedule_defer_open(lck, request_time, req);
+       if (!validate_oplock_types(fsp, oplock_request, lck)) {
+               smb_panic("validate_oplock_types failed");
+       }
+
+       if (has_delete_on_close(lck, fsp->name_hash)) {
                TALLOC_FREE(lck);
                fd_close(fsp);
-               return NT_STATUS_SHARING_VIOLATION;
+               return NT_STATUS_DELETE_PENDING;
        }
 
-       status = open_mode_check(conn, lck, fsp->name_hash,
-                                access_mask, share_access,
-                                create_options, &file_existed);
+       status = open_mode_check(conn, lck,
+                                access_mask, share_access);
 
-       if (NT_STATUS_IS_OK(status)) {
-               /* We might be going to allow this open. Check oplock
-                * status again. */
-               /* Second pass - send break for both batch or
-                * exclusive oplocks. */
-               if ((req != NULL) &&
-                   delay_for_exclusive_oplocks(
-                           fsp,
-                           req->mid,
-                           oplock_request,
-                           exclusive_entry)) {
-                       schedule_defer_open(lck, request_time, req);
-                       TALLOC_FREE(lck);
-                       fd_close(fsp);
-                       return NT_STATUS_SHARING_VIOLATION;
-               }
+       if (NT_STATUS_EQUAL(status, NT_STATUS_SHARING_VIOLATION) ||
+           (lck->data->num_share_modes > 0)) {
+               /*
+                * This comes from ancient times out of open_mode_check. I
+                * have no clue whether this is still necessary. I can't think
+                * of a case where this would actually matter further down in
+                * this function. I leave it here for further investigation
+                * :-)
+                */
+               file_existed = true;
        }
 
-       if (NT_STATUS_EQUAL(status, NT_STATUS_DELETE_PENDING)) {
-               /* DELETE_PENDING is not deferred for a second */
+       if ((req != NULL) &&
+           delay_for_oplock(
+                   fsp, req->mid, oplock_request, lck,
+                   NT_STATUS_EQUAL(status, NT_STATUS_SHARING_VIOLATION))) {
+               schedule_defer_open(lck, request_time, req);
                TALLOC_FREE(lck);
                fd_close(fsp);
-               return status;
+               return NT_STATUS_SHARING_VIOLATION;
        }
 
        if (!NT_STATUS_IS_OK(status)) {
@@ -2501,10 +2527,11 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
 
                /*
                 * If we're returning a share violation, ensure we
-                * cope with the braindead 1 second delay.
+                * cope with the braindead 1 second delay (SMB1 only).
                 */
 
                if (!(oplock_request & INTERNAL_OPEN_ONLY) &&
+                   !conn->sconn->using_smb2 &&
                    lp_defer_sharing_violations()) {
                        struct timeval timeout;
                        struct deferred_open_record state;
@@ -2557,10 +2584,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                return status;
        }
 
-       grant_fsp_oplock_type(fsp,
-                             oplock_request,
-                             got_level2_oplock,
-                             got_a_none_oplock);
+       grant_fsp_oplock_type(fsp, lck, oplock_request);
 
        /*
         * We have the share entry *locked*.....
@@ -2649,15 +2673,18 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
        status = set_file_oplock(fsp, fsp->oplock_type);
        if (!NT_STATUS_IS_OK(status)) {
                /*
-                * Could not get the kernel oplock or there are byte-range
-                * locks on the file.
+                * Could not get the kernel oplock
                 */
                fsp->oplock_type = NO_OPLOCK;
        }
 
-       set_share_mode(lck, fsp, get_current_uid(conn),
-                       req ? req->mid : 0,
-                      fsp->oplock_type);
+       if (!set_share_mode(lck, fsp, get_current_uid(conn),
+                           req ? req->mid : 0,
+                           fsp->oplock_type)) {
+               TALLOC_FREE(lck);
+               fd_close(fsp);
+               return NT_STATUS_NO_MEMORY;
+       }
 
        /* Handle strange delete on close create semantics. */
        if (create_options & FILE_DELETE_ON_CLOSE) {
@@ -2744,12 +2771,6 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn,
                                  (unsigned int)new_unx_mode));
        }
 
-       /* If this is a successful open, we must remove any deferred open
-        * records. */
-       if (req != NULL) {
-               del_deferred_open_entry(lck, req->mid,
-                                       messaging_server_id(req->sconn->msg_ctx));
-       }
        TALLOC_FREE(lck);
 
        return NT_STATUS_OK;
@@ -3142,9 +3163,15 @@ static NTSTATUS open_directory(connection_struct *conn,
                return NT_STATUS_SHARING_VIOLATION;
        }
 
-       status = open_mode_check(conn, lck, fsp->name_hash,
-                               access_mask, share_access,
-                                create_options, &dir_existed);
+       if (has_delete_on_close(lck, fsp->name_hash)) {
+               TALLOC_FREE(lck);
+               fd_close(fsp);
+               file_free(req, fsp);
+               return NT_STATUS_DELETE_PENDING;
+       }
+
+       status = open_mode_check(conn, lck,
+                                access_mask, share_access);
 
        if (!NT_STATUS_IS_OK(status)) {
                TALLOC_FREE(lck);
@@ -3153,14 +3180,20 @@ static NTSTATUS open_directory(connection_struct *conn,
                return status;
        }
 
-       set_share_mode(lck, fsp, get_current_uid(conn),
-                       req ? req->mid : 0, NO_OPLOCK);
+       if (!set_share_mode(lck, fsp, get_current_uid(conn),
+                           req ? req->mid : 0, NO_OPLOCK)) {
+               TALLOC_FREE(lck);
+               fd_close(fsp);
+               file_free(req, fsp);
+               return NT_STATUS_NO_MEMORY;
+       }
 
        /* For directories the delete on close bit at open time seems
           always to be honored on close... See test 19 in Samba4 BASE-DELETE. */
        if (create_options & FILE_DELETE_ON_CLOSE) {
                status = can_set_delete_on_close(fsp, 0);
                if (!NT_STATUS_IS_OK(status) && !NT_STATUS_EQUAL(status, NT_STATUS_DIRECTORY_NOT_EMPTY)) {
+                       del_share_mode(lck, fsp);
                        TALLOC_FREE(lck);
                        fd_close(fsp);
                        file_free(req, fsp);
@@ -3259,9 +3292,9 @@ void msg_file_was_renamed(struct messaging_context *msg,
                stream_name = NULL;
        }
 
-       status = create_synthetic_smb_fname(talloc_tos(), base_name,
-                                           stream_name, NULL, &smb_fname);
-       if (!NT_STATUS_IS_OK(status)) {
+       smb_fname = synthetic_smb_fname(talloc_tos(), base_name,
+                                       stream_name, NULL);
+       if (smb_fname == NULL) {
                return;
        }
 
@@ -3348,17 +3381,17 @@ NTSTATUS open_streams_for_delete(connection_struct *conn,
        }
 
        for (i=0; i<num_streams; i++) {
-               struct smb_filename *smb_fname = NULL;
+               struct smb_filename *smb_fname;
 
                if (strequal(stream_info[i].name, "::$DATA")) {
                        streams[i] = NULL;
                        continue;
                }
 
-               status = create_synthetic_smb_fname(talloc_tos(), fname,
-                                                   stream_info[i].name,
-                                                   NULL, &smb_fname);
-               if (!NT_STATUS_IS_OK(status)) {
+               smb_fname = synthetic_smb_fname(
+                       talloc_tos(), fname, stream_info[i].name, NULL);
+               if (smb_fname == NULL) {
+                       status = NT_STATUS_NO_MEMORY;
                        goto fail;
                }
 
@@ -3425,30 +3458,37 @@ NTSTATUS open_streams_for_delete(connection_struct *conn,
 
 static NTSTATUS inherit_new_acl(files_struct *fsp)
 {
-       TALLOC_CTX *ctx = talloc_tos();
+       TALLOC_CTX *frame = talloc_stackframe();
        char *parent_name = NULL;
        struct security_descriptor *parent_desc = NULL;
        NTSTATUS status = NT_STATUS_OK;
        struct security_descriptor *psd = NULL;
-       struct dom_sid *owner_sid = NULL;
-       struct dom_sid *group_sid = NULL;
+       const struct dom_sid *owner_sid = NULL;
+       const struct dom_sid *group_sid = NULL;
        uint32_t security_info_sent = (SECINFO_OWNER | SECINFO_GROUP | SECINFO_DACL);
+       struct security_token *token = fsp->conn->session_info->security_token;
        bool inherit_owner = lp_inherit_owner(SNUM(fsp->conn));
        bool inheritable_components = false;
+       bool try_builtin_administrators = false;
+       const struct dom_sid *BA_U_sid = NULL;
+       const struct dom_sid *BA_G_sid = NULL;
+       bool try_system = false;
+       const struct dom_sid *SY_U_sid = NULL;
+       const struct dom_sid *SY_G_sid = NULL;
        size_t size = 0;
-       int orig_security_mask = 0;
-       int orig_directory_security_mask = 0;
-       int snum = SNUM(fsp->conn);
 
-       if (!parent_dirname(ctx, fsp->fsp_name->base_name, &parent_name, NULL)) {
+       if (!parent_dirname(frame, fsp->fsp_name->base_name, &parent_name, NULL)) {
+               TALLOC_FREE(frame);
                return NT_STATUS_NO_MEMORY;
        }
 
        status = SMB_VFS_GET_NT_ACL(fsp->conn,
-                               parent_name,
-                               (SECINFO_OWNER | SECINFO_GROUP | SECINFO_DACL),
-                               &parent_desc);
+                                   parent_name,
+                                   (SECINFO_OWNER | SECINFO_GROUP | SECINFO_DACL),
+                                   frame,
+                                   &parent_desc);
        if (!NT_STATUS_IS_OK(status)) {
+               TALLOC_FREE(frame);
                return status;
        }
 
@@ -3456,6 +3496,7 @@ static NTSTATUS inherit_new_acl(files_struct *fsp)
                                        fsp->is_directory);
 
        if (!inheritable_components && !inherit_owner) {
+               TALLOC_FREE(frame);
                /* Nothing to inherit and not setting owner. */
                return NT_STATUS_OK;
        }
@@ -3475,13 +3516,99 @@ static NTSTATUS inherit_new_acl(files_struct *fsp)
        }
 
        if (owner_sid == NULL) {
-               owner_sid = &fsp->conn->session_info->security_token->sids[PRIMARY_USER_SID_INDEX];
+               if (security_token_has_builtin_administrators(token)) {
+                       try_builtin_administrators = true;
+               } else if (security_token_is_system(token)) {
+                       try_builtin_administrators = true;
+                       try_system = true;
+               }
+       }
+
+       if (group_sid == NULL &&
+           token->num_sids == PRIMARY_GROUP_SID_INDEX)
+       {
+               if (security_token_is_system(token)) {
+                       try_builtin_administrators = true;
+                       try_system = true;
+               }
+       }
+
+       if (try_builtin_administrators) {
+               struct unixid ids;
+               bool ok;
+
+               ZERO_STRUCT(ids);
+               ok = sids_to_unixids(&global_sid_Builtin_Administrators, 1, &ids);
+               if (ok) {
+                       switch (ids.type) {
+                       case ID_TYPE_BOTH:
+                               BA_U_sid = &global_sid_Builtin_Administrators;
+                               BA_G_sid = &global_sid_Builtin_Administrators;
+                               break;
+                       case ID_TYPE_UID:
+                               BA_U_sid = &global_sid_Builtin_Administrators;
+                               break;
+                       case ID_TYPE_GID:
+                               BA_G_sid = &global_sid_Builtin_Administrators;
+                               break;
+                       default:
+                               break;
+                       }
+               }
+       }
+
+       if (try_system) {
+               struct unixid ids;
+               bool ok;
+
+               ZERO_STRUCT(ids);
+               ok = sids_to_unixids(&global_sid_System, 1, &ids);
+               if (ok) {
+                       switch (ids.type) {
+                       case ID_TYPE_BOTH:
+                               SY_U_sid = &global_sid_System;
+                               SY_G_sid = &global_sid_System;
+                               break;
+                       case ID_TYPE_UID:
+                               SY_U_sid = &global_sid_System;
+                               break;
+                       case ID_TYPE_GID:
+                               SY_G_sid = &global_sid_System;
+                               break;
+                       default:
+                               break;
+                       }
+               }
+       }
+
+       if (owner_sid == NULL) {
+               owner_sid = BA_U_sid;
+       }
+
+       if (owner_sid == NULL) {
+               owner_sid = SY_U_sid;
+       }
+
+       if (group_sid == NULL) {
+               group_sid = SY_G_sid;
+       }
+
+       if (try_system && group_sid == NULL) {
+               group_sid = BA_G_sid;
+       }
+
+       if (owner_sid == NULL) {
+               owner_sid = &token->sids[PRIMARY_USER_SID_INDEX];
        }
        if (group_sid == NULL) {
-               group_sid = &fsp->conn->session_info->security_token->sids[PRIMARY_GROUP_SID_INDEX];
+               if (token->num_sids == PRIMARY_GROUP_SID_INDEX) {
+                       group_sid = &token->sids[PRIMARY_USER_SID_INDEX];
+               } else {
+                       group_sid = &token->sids[PRIMARY_GROUP_SID_INDEX];
+               }
        }
 
-       status = se_create_child_secdesc(ctx,
+       status = se_create_child_secdesc(frame,
                        &psd,
                        &size,
                        parent_desc,
@@ -3489,6 +3616,7 @@ static NTSTATUS inherit_new_acl(files_struct *fsp)
                        group_sid,
                        fsp->is_directory);
        if (!NT_STATUS_IS_OK(status)) {
+               TALLOC_FREE(frame);
                return status;
        }
 
@@ -3509,14 +3637,6 @@ static NTSTATUS inherit_new_acl(files_struct *fsp)
                NDR_PRINT_DEBUG(security_descriptor, psd);
        }
 
-       /* Temporarily replace the security masks with the create masks,
-          as we're actually doing a create here - we only call this
-          when we've created a file or directory - but there's no
-          way for FSET_NT_ACL to know the difference. */
-
-       orig_security_mask = lp_set_security_mask(snum, lp_create_mask(snum));
-       orig_directory_security_mask = lp_set_directory_security_mask(snum, lp_dir_mask(snum));
-
        if (inherit_owner) {
                /* We need to be root to force this. */
                become_root();
@@ -3527,10 +3647,7 @@ static NTSTATUS inherit_new_acl(files_struct *fsp)
        if (inherit_owner) {
                unbecome_root();
        }
-
-       (void)lp_set_security_mask(snum, orig_security_mask);
-       (void)lp_set_directory_security_mask(snum, orig_directory_security_mask);
-
+       TALLOC_FREE(frame);
        return status;
 }
 
@@ -3634,11 +3751,11 @@ static NTSTATUS create_file_unixpath(connection_struct *conn,
                }
 
                /* Create an smb_filename with stream_name == NULL. */
-               status = create_synthetic_smb_fname(talloc_tos(),
-                                                   smb_fname->base_name,
-                                                   NULL, NULL,
-                                                   &smb_fname_base);
-               if (!NT_STATUS_IS_OK(status)) {
+               smb_fname_base = synthetic_smb_fname(talloc_tos(),
+                                                    smb_fname->base_name,
+                                                    NULL, NULL);
+               if (smb_fname_base == NULL) {
+                       status = NT_STATUS_NO_MEMORY;
                        goto fail;
                }