drs: Fail replication transaction instead of dropping links
authorTim Beale <timbeale@catalyst.net.nz>
Tue, 13 Jun 2017 23:35:36 +0000 (11:35 +1200)
committerAndrew Bartlett <abartlet@samba.org>
Fri, 18 Aug 2017 04:07:11 +0000 (06:07 +0200)
If the DRS client received a linked attribute that it couldn't resolve
the target for, then it would just ignore that link and keep going. That
link would then be lost forever (although a full-sync would resolve
this). Instead of silently ignoring the link, fail the transaction.

This *can* happen on Samba, but it is unusual. The target object and
linked-attribute would need to be added while a replication is still in
progress. It can also happen fairly easily when talking to a Windows DC.

There are two import exceptions to this:

1). Linked attributes that span partitions. We can never guarantee that
we will have received the target object, because it may be in a partition
we haven't replicated yet. Samba doesn't have a great way of handling
this currently, but we shouldn't fail the replication (because that breaks
basic join tests). Just skip that linked attribute and hope that a
subsequent full-sync will fix it.
(I queried Microsoft and they said resolving cross-partition linked
attributes is a implementation-specific problem to solve. GET_TGT won't
resolve it)

2). When the replication involves a subset of objects, e.g.
critical-only. In these cases, we don't increase the highwater-mark, so
it is probably not such a dire problem if we don't add the link. In the
case of critical-only, we will do a subsequent full sync which will then
add the links.

Signed-off-by: Tim Beale <timbeale@catalyst.net.nz>
Reviewed-by: Garming Sam <garming@samba.org>
Reviewed-by: Andrew Bartlett <abartlet@samba.org>
BUG: https://bugzilla.samba.org/show_bug.cgi?id=12972

source4/dsdb/repl/drepl_out_helpers.c
source4/dsdb/samdb/ldb_modules/repl_meta_data.c
source4/dsdb/samdb/samdb.h
source4/libnet/libnet_vampire.c

index 079edc8ba46bb1dea83483011926d336d47901e1..ad4801adec0bc135a1e68d4ea8511cb21651bd0e 100644 (file)
@@ -795,6 +795,10 @@ static void dreplsrv_op_pull_source_apply_changes_trigger(struct tevent_req *req
        if (state->op->options & DRSUAPI_DRS_SPECIAL_SECRET_PROCESSING) {
                dsdb_repl_flags |= DSDB_REPL_FLAG_EXPECT_NO_SECRETS;
        }
+       if (state->op->options & DRSUAPI_DRS_CRITICAL_ONLY ||
+           state->op->extended_op != DRSUAPI_EXOP_NONE) {
+               dsdb_repl_flags |= DSDB_REPL_FLAG_OBJECT_SUBSET;
+       }
 
        if (state->op->extended_op != DRSUAPI_EXOP_NONE) {
                ret = dsdb_find_nc_root(service->samdb, partition,
index 53e2c12c841101e7f0dfc683e4d03e4f15854cbf..dc1667405c20ff36f90418a1c4150f9d20571f72 100644 (file)
@@ -74,6 +74,7 @@ struct replmd_private {
 struct la_entry {
        struct la_entry *next, *prev;
        struct drsuapi_DsReplicaLinkedAttribute *la;
+       bool incomplete_replica;
 };
 
 struct replmd_replicated_request {
@@ -6535,6 +6536,7 @@ static int replmd_extended_replicated_objects(struct ldb_module *module, struct
        struct ldb_control **ctrls;
        int ret;
        uint32_t i;
+       bool incomplete_subset;
        struct replmd_private *replmd_private =
                talloc_get_type(ldb_module_get_private(module), struct replmd_private);
 
@@ -6592,6 +6594,7 @@ static int replmd_extended_replicated_objects(struct ldb_module *module, struct
 
        ar->controls = req->controls;
        req->controls = ctrls;
+       incomplete_subset = (ar->objs->dsdb_repl_flags & DSDB_REPL_FLAG_OBJECT_SUBSET);
 
        DEBUG(4,("linked_attributes_count=%u\n", objs->linked_attributes_count));
 
@@ -6616,6 +6619,13 @@ static int replmd_extended_replicated_objects(struct ldb_module *module, struct
                }
                *la_entry->la = ar->objs->linked_attributes[i];
 
+               /*
+                * we may not be able to resolve link targets properly when
+                * dealing with subsets of objects, e.g. the source is a
+                * critical object and the target isn't
+                */
+               la_entry->incomplete_replica = incomplete_subset;
+
                /* we need to steal the non-scalars so they stay
                   around until the end of the transaction */
                talloc_steal(la_entry->la, la_entry->la->identifier);
@@ -6627,6 +6637,46 @@ static int replmd_extended_replicated_objects(struct ldb_module *module, struct
        return replmd_replicated_apply_next(ar);
 }
 
+/**
+ * Returns True if the source and target DNs both have the same naming context,
+ * i.e. they're both in the same partition.
+ */
+static bool replmd_objects_have_same_nc(struct ldb_context *ldb,
+                                       TALLOC_CTX *mem_ctx,
+                                       struct ldb_dn *source_dn,
+                                       struct ldb_dn *target_dn)
+{
+       TALLOC_CTX *tmp_ctx;
+       struct ldb_dn *source_nc;
+       struct ldb_dn *target_nc;
+       int ret;
+       bool same_nc = true;
+
+       tmp_ctx = talloc_new(mem_ctx);
+
+       ret = dsdb_find_nc_root(ldb, tmp_ctx, source_dn, &source_nc);
+       if (ret != LDB_SUCCESS) {
+               DBG_ERR("Failed to find base DN for source %s\n",
+                       ldb_dn_get_linearized(source_dn));
+               talloc_free(tmp_ctx);
+               return true;
+       }
+
+       ret = dsdb_find_nc_root(ldb, tmp_ctx, target_dn, &target_nc);
+       if (ret != LDB_SUCCESS) {
+               DBG_ERR("Failed to find base DN for target %s\n",
+                       ldb_dn_get_linearized(target_dn));
+               talloc_free(tmp_ctx);
+               return true;
+       }
+
+       same_nc = (ldb_dn_compare(source_nc, target_nc) == 0);
+
+       talloc_free(tmp_ctx);
+
+       return same_nc;
+}
+
 /**
  * Checks that the target object for a linked attribute exists.
  * If it exists, its GUID and deletion-state are returned
@@ -6705,9 +6755,45 @@ static int replmd_check_target_exists(struct ldb_module *module,
        }
 
        if (target_res->count == 0) {
-               DEBUG(2,(__location__ ": WARNING: Failed to re-resolve GUID %s - using %s\n",
-                        GUID_string(tmp_ctx, guid),
-                        ldb_dn_get_linearized(dsdb_dn->dn)));
+
+               /*
+                * TODO:
+                * When we implement Trusted Domains we need to consider
+                * whether they get treated as an incomplete replica here or not
+                */
+               if (la_entry->incomplete_replica) {
+
+                       /*
+                        * If we're only replicating a subset of objects (e.g.
+                        * critical-only, single-object), then an unknown target
+                        * is probably not a critical problem. We don't increase
+                        * the highwater-mark so subsequent replications should
+                        * resolve any missing links
+                        */
+                       DEBUG(2,(__location__
+                                ": Failed to find target %s linked from %s\n",
+                                ldb_dn_get_linearized(dsdb_dn->dn),
+                                ldb_dn_get_linearized(source_dn)));
+
+               } else if (replmd_objects_have_same_nc(ldb, tmp_ctx, source_dn,
+                                                      dsdb_dn->dn)) {
+                       ldb_asprintf_errstring(ldb, "Unknown target %s GUID %s linked from %s\n",
+                                              ldb_dn_get_linearized(dsdb_dn->dn),
+                                              GUID_string(tmp_ctx, guid),
+                                              ldb_dn_get_linearized(source_dn));
+                       ret = LDB_ERR_NO_SUCH_OBJECT;
+               } else {
+
+                       /*
+                        * TODO:
+                        * We don't handle cross-partition links well here (we
+                        * could potentially lose them), but don't fail the
+                        * replication.
+                        */
+                       DEBUG(2,("Failed to resolve cross-partition link between %s and %s\n",
+                                ldb_dn_get_linearized(source_dn),
+                                ldb_dn_get_linearized(dsdb_dn->dn)));
+               }
        } else if (target_res->count != 1) {
                ldb_asprintf_errstring(ldb, "More than one object found matching objectGUID %s\n",
                                       GUID_string(tmp_ctx, guid));
index c8658dc42fa39a5e32767e2662665cdf7791390c..2abaf4a8f1bab1e5fe48bdbd4fc2a0085de50a1f 100644 (file)
@@ -64,7 +64,7 @@ struct dsdb_control_current_partition {
 #define DSDB_REPL_FLAG_PARTIAL_REPLICA     2
 #define DSDB_REPL_FLAG_ADD_NCNAME         4
 #define DSDB_REPL_FLAG_EXPECT_NO_SECRETS   8
-
+#define DSDB_REPL_FLAG_OBJECT_SUBSET       16
 
 #define DSDB_CONTROL_REPLICATED_UPDATE_OID "1.3.6.1.4.1.7165.4.3.3"
 
index 7f25a3aa07835531ddadef6d84554f6ddfc8119c..d89256b02a80d3ac7fe0d0c769f16b8312a95c13 100644 (file)
@@ -660,6 +660,7 @@ WERROR libnet_vampire_cb_store_chunk(void *private_data,
                 */
                ZERO_STRUCT(s_dsa->highwatermark);
                uptodateness_vector = NULL;
+               dsdb_repl_flags |= DSDB_REPL_FLAG_OBJECT_SUBSET;
        }
 
        /* TODO: avoid hardcoded flags */