ldb: relatively efficient functions for finding duplicate values
authorDouglas Bagnall <douglas.bagnall@catalyst.net.nz>
Wed, 14 Jun 2017 23:30:33 +0000 (11:30 +1200)
committerAndrew Bartlett <abartlet@samba.org>
Thu, 15 Jun 2017 15:33:10 +0000 (17:33 +0200)
ldb backends need to make sure they are not adding duplicate values to
multi-valued attributes in ADD and MODIFY operations. Until now they
have done this inefficiently using nested loops. Here we add common
functions that deal with large numbers of values in O(n log n) time,
but continue to use the simple methods for small numbers of values.

These functions take a struct ldb_context pointer and an options flag
arguments, although the ldb is not used, and only one bit of the
options has meaning. This is to allow further patches to switch on
schema-aware comparisons.

This entails an ABI jump to add the two new functions.

Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Reviewed-by: Andrew Bartlett <abartlet@samba.org>
lib/ldb/common/ldb_msg.c
lib/ldb/include/ldb_private.h
lib/ldb/ldb_sqlite3/ldb_sqlite3.c
lib/ldb/ldb_tdb/ldb_tdb.c
lib/ldb/tests/ldb_msg.c [new file with mode: 0644]
lib/ldb/wscript

index 6a379b1f43d9cee9187f71585be86ff53156a2ca..abad5a8320551c09e64539b993b8c5408ccdd32a 100644 (file)
@@ -89,6 +89,224 @@ struct ldb_val *ldb_msg_find_val(const struct ldb_message_element *el,
        return NULL;
 }
 
+
+static int ldb_val_cmp(const struct ldb_val *v1, const struct ldb_val *v2)
+{
+       if (v1->length != v2->length) {
+               return v1->length - v2->length;
+       }
+       return memcmp(v1->data, v2->data, v1->length);
+}
+
+
+/*
+  ldb_msg_find_duplicate_val() will set the **duplicate pointer to the first
+  duplicate value it finds. It does a case sensitive comparison (memcmp).
+
+  LDB_ERR_OPERATIONS_ERROR indicates an allocation failure or an unknown
+  options flag, otherwise LDB_SUCCESS.
+*/
+#define LDB_DUP_QUADRATIC_THRESHOLD 10
+
+int ldb_msg_find_duplicate_val(struct ldb_context *ldb,
+                              TALLOC_CTX *mem_ctx,
+                              const struct ldb_message_element *el,
+                              struct ldb_val **duplicate,
+                              uint32_t options)
+{
+       unsigned int i, j;
+       struct ldb_val *val;
+
+       if (options != 0) {
+               return LDB_ERR_OPERATIONS_ERROR;
+       }
+
+       *duplicate = NULL;
+
+       /*
+          If there are not many values, it is best to avoid the talloc
+          overhead and just do a brute force search.
+        */
+       if (el->num_values < LDB_DUP_QUADRATIC_THRESHOLD) {
+               for (j = 0; j < el->num_values; j++) {
+                       val = &el->values[j];
+                       for ( i = j + 1; i < el->num_values; i++) {
+                               if (ldb_val_equal_exact(val, &el->values[i])) {
+                                       *duplicate = val;
+                                       return LDB_SUCCESS;
+                               }
+                       }
+               }
+       } else {
+               struct ldb_val *values;
+               values = talloc_array(mem_ctx, struct ldb_val, el->num_values);
+               if (values == NULL) {
+                       return LDB_ERR_OPERATIONS_ERROR;
+               }
+
+               memcpy(values, el->values,
+                      el->num_values * sizeof(struct ldb_val));
+               TYPESAFE_QSORT(values, el->num_values, ldb_val_cmp);
+               for (i = 1; i < el->num_values; i++) {
+                       if (ldb_val_equal_exact(&values[i],
+                                               &values[i - 1])) {
+                               /* find the original location */
+                               for (j = 0; j < el->num_values; j++) {
+                                       if (ldb_val_equal_exact(&values[i],
+                                                               &el->values[j])
+                                               ) {
+                                               *duplicate = &el->values[j];
+                                               break;
+                                       }
+                               }
+                               talloc_free(values);
+                               if (*duplicate == NULL) {
+                                       /* how we got here, I don't know */
+                                       return LDB_ERR_OPERATIONS_ERROR;
+                               }
+                               return LDB_SUCCESS;
+                       }
+               }
+               talloc_free(values);
+       }
+       return LDB_SUCCESS;
+}
+
+
+/*
+  Determine whether the values in an element are also in another element.
+
+  Without any flags, return LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS if the elements
+  share values, or LDB_SUCCESS if they don't. In this case, the function
+  simply determines the set intersection and it doesn't matter in which order
+  the elements are provided.
+
+  With the LDB_MSG_FIND_COMMON_REMOVE_DUPLICATES flag, any values in common are
+  removed from the first element and LDB_SUCCESS is returned.
+
+  LDB_ERR_OPERATIONS_ERROR indicates an allocation failure or an unknown option.
+  LDB_ERR_INAPPROPRIATE_MATCHING is returned if the elements differ in name.
+*/
+
+int ldb_msg_find_common_values(struct ldb_context *ldb,
+                              TALLOC_CTX *mem_ctx,
+                              struct ldb_message_element *el,
+                              struct ldb_message_element *el2,
+                              uint32_t options)
+{
+       struct ldb_val *values;
+       struct ldb_val *values2;
+       unsigned int i, j, k, n_values;
+
+       bool remove_duplicates = options & LDB_MSG_FIND_COMMON_REMOVE_DUPLICATES;
+
+       if ((options & ~LDB_MSG_FIND_COMMON_REMOVE_DUPLICATES) != 0) {
+               return LDB_ERR_OPERATIONS_ERROR;
+       }
+
+       if (strcmp(el->name, el2->name) != 0) {
+               return LDB_ERR_INAPPROPRIATE_MATCHING;
+       }
+       /*
+          With few values, it is better to do the brute-force search than the
+          clever search involving tallocs, memcpys, sorts, etc.
+       */
+       if (MIN(el->num_values, el2->num_values) == 1 ||
+           MAX(el->num_values, el2->num_values) < LDB_DUP_QUADRATIC_THRESHOLD) {
+               for (i = 0; i < el2->num_values; i++) {
+                       for (j = 0; j < el->num_values; j++) {
+                               if (ldb_val_equal_exact(&el->values[j],
+                                                       &el2->values[i])) {
+                                       if (! remove_duplicates) {
+                                           return                      \
+                                             LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS;
+                                       }
+                                       /*
+                                         With the remove_duplicates flag, we
+                                         resolve the intersection by removing
+                                         the offending one from el.
+                                       */
+                                       el->num_values--;
+                                       for (k = j; k < el->num_values; k++) {
+                                               el->values[k] = \
+                                                       el->values[k + 1];
+                                       }
+                                       j--; /* rewind */
+                               }
+                       }
+               }
+               return LDB_SUCCESS;
+       }
+
+       values = talloc_array(mem_ctx, struct ldb_val, el->num_values);
+       if (values == NULL) {
+               return LDB_ERR_OPERATIONS_ERROR;
+       }
+       values2 = talloc_array(mem_ctx, struct ldb_val,
+                                   el2->num_values);
+       if (values2 == NULL) {
+               return LDB_ERR_OPERATIONS_ERROR;
+       }
+
+       memcpy(values, el->values,
+              el->num_values * sizeof(struct ldb_val));
+       memcpy(values2, el2->values,
+              el2->num_values * sizeof(struct ldb_val));
+       TYPESAFE_QSORT(values, el->num_values, ldb_val_cmp);
+       TYPESAFE_QSORT(values2, el2->num_values, ldb_val_cmp);
+
+       /*
+          el->n_values may diverge from the number of values in the sorted
+          list when the remove_duplicates flag is used.
+       */
+       n_values = el->num_values;
+       i = 0;
+       j = 0;
+       while (i != n_values) {
+               int ret = ldb_val_cmp(&values[i], &values2[j]);
+               if (ret < 0) {
+                       i++;
+               } else if (ret > 0) {
+                       j++;
+                       if (j == el2->num_values) {
+                               /*
+                                 We have walked past the end of the second
+                                 list, meaning the remainder of the first
+                                 list cannot collide and we're done.
+                               */
+                               break;
+                       }
+               } else {
+                       /* we have a collision */
+                       if (! remove_duplicates) {
+                               TALLOC_FREE(values);
+                               TALLOC_FREE(values2);
+                               return LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS;
+                       }
+                       /*
+                          With the remove_duplicates flag we need to find
+                          this in the original list and remove it, which is
+                          inefficient but hopefully rare.
+                       */
+                       for (k = 0; k < el->num_values; k++) {
+                               if (ldb_val_equal_exact(&el->values[k],
+                                                       &values[i])) {
+                                       break;
+                               }
+                       }
+                       el->num_values--;
+                       for (; k < el->num_values; k++) {
+                               el->values[k] = el->values[k + 1];
+                       }
+                       i++;
+               }
+       }
+       TALLOC_FREE(values);
+       TALLOC_FREE(values2);
+
+       return LDB_SUCCESS;
+}
+
 /*
   duplicate a ldb_val structure
 */
index 8b233d3aa956823810435dbbe276e04f85b54b46..ae7ec3cfd0d4d74f2c0861d737b150ea9a1fd862 100644 (file)
@@ -237,4 +237,55 @@ char *ldb_ldif_write_redacted_trace_string(struct ldb_context *ldb, TALLOC_CTX *
  */
 struct ldb_context *ldb_dn_get_ldb_context(struct ldb_dn *dn);
 
+#define LDB_MSG_FIND_COMMON_REMOVE_DUPLICATES 1
+
+/**
+  Determine whether any values in an element are also in another element,
+  and optionally fix that.
+
+  \param ldb      an ldb context
+  \param mem_ctx  a talloc context
+  \param el       an element
+  \param other_el another element
+  \param options  flags controlling the function behaviour
+
+  Without the LDB_MSG_FIND_COMMON_REMOVE_DUPLICATES flag, return
+  LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS if the elements share values, and
+  LDB_SUCCESS if they don't. That is, determine whether there is an
+  intersection without changing anything.
+
+  With the LDB_MSG_FIND_COMMON_REMOVE_DUPLICATES flag, any values in common
+  are removed from the first element and LDB_SUCCESS is returned.
+
+  LDB_ERR_OPERATIONS_ERROR indicates an allocation failure or an unknown option.
+  LDB_ERR_INAPPROPRIATE_MATCHING means the elements differ in name.
+*/
+
+int ldb_msg_find_common_values(struct ldb_context *ldb,
+                              TALLOC_CTX *mem_ctx,
+                              struct ldb_message_element *el,
+                              struct ldb_message_element *other_el,
+                              uint32_t options);
+
+/**
+   Detect whether an element contains duplicate values
+
+   \param ldb a currently unused ldb_context struct
+   \param mem_ctx a talloc context
+   \param el the element to search
+   \param duplicate will point to a duplicate value if there are duplicates,
+   or NULL otherwise.
+   \param options is a flags field. All values are reserved.
+
+   \return an ldb error code. LDB_ERR_OPERATIONS_ERROR indicates an allocation
+   failure or an unknown option flag. Otherwise LDB_SUCCESS.
+
+   \note This search is case sensitive
+*/
+int ldb_msg_find_duplicate_val(struct ldb_context *ldb,
+                              TALLOC_CTX *mem_ctx,
+                              const struct ldb_message_element *el,
+                              struct ldb_val **duplicate,
+                              uint32_t options);
+
 #endif
index 60b39e89790f2c3ec8b6ea90d19061716d98b4ac..f94dc993904ba4673647c9056d94caeebc2c7677 100644 (file)
@@ -1152,12 +1152,24 @@ static int lsql_modify(struct lsql_context *ctx)
                switch (flags) {
 
                case LDB_FLAG_MOD_REPLACE:
+                       struct ldb_val *duplicate = NULL;
 
-                       for (j=0; j<el->num_values; j++) {
-                               if (ldb_msg_find_val(el, &el->values[j]) != &el->values[j]) {
-                                       ldb_asprintf_errstring(ldb, "%s: value #%d provided more than once", el->name, j);
-                                       return LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS;
-                               }
+                       ret = ldb_msg_find_duplicate_val(ldb, el, el,
+                                                        &duplicate, 0);
+                       if (ret != LDB_SUCCESS) {
+                               return ret;
+                       }
+                       if (duplicate != NULL) {
+                               ldb_asprintf_errstring(
+                                       ldb,
+                                       "attribute '%s': value '%.*s' "
+                                       "on '%s' provided more than "
+                                       "once in REPLACE",
+                                       el->name,
+                                       (int)duplicate->length,
+                                       duplicate->data,
+                                       ldb_dn_get_linearized(msg2->dn));
+                               return LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS;
                        }
 
                        /* remove all attributes before adding the replacements */
index 261011eb99cb69ffb3339f5f0549d8e1222e5d49..1822ea01a08380e8f43899c4aee335fdf265bdfb 100644 (file)
@@ -330,7 +330,7 @@ static int ltdb_add_internal(struct ldb_module *module,
 {
        struct ldb_context *ldb = ldb_module_get_ctx(module);
        int ret = LDB_SUCCESS;
-       unsigned int i, j;
+       unsigned int i;
 
        for (i=0;i<msg->num_elements;i++) {
                struct ldb_message_element *el = &msg->elements[i];
@@ -356,16 +356,23 @@ static int ltdb_add_internal(struct ldb_module *module,
                }
 
                if (check_single_value) {
-                       /* TODO: This is O(n^2) - replace with more efficient check */
-                       for (j=0; j<el->num_values; j++) {
-                               if (ldb_msg_find_val(el, &el->values[j]) != &el->values[j]) {
-                                       ldb_asprintf_errstring(ldb,
-                                                              "attribute '%s': value #%u on '%s' "
-                                                              "provided more than once in ADD object",
-                                                              el->name, j, 
-                                                              ldb_dn_get_linearized(msg->dn));
-                                       return LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS;
-                               }
+                       struct ldb_val *duplicate = NULL;
+
+                       ret = ldb_msg_find_duplicate_val(ldb, discard_const(msg),
+                                                        el, &duplicate, 0);
+                       if (ret != LDB_SUCCESS) {
+                               return ret;
+                       }
+                       if (duplicate != NULL) {
+                               ldb_asprintf_errstring(
+                                       ldb,
+                                       "attribute '%s': value '%.*s' on '%s' "
+                                       "provided more than once in ADD object",
+                                       el->name,
+                                       (int)duplicate->length,
+                                       duplicate->data,
+                                       ldb_dn_get_linearized(msg->dn));
+                               return LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS;
                        }
                }
        }
@@ -681,7 +688,7 @@ int ltdb_modify_internal(struct ldb_module *module,
        TDB_DATA tdb_key, tdb_data;
        struct ldb_val ldb_data;
        struct ldb_message *msg2;
-       unsigned int i, j, k;
+       unsigned int i, j;
        int ret = LDB_SUCCESS, idx;
        struct ldb_control *control_permissive = NULL;
 
@@ -727,6 +734,10 @@ int ltdb_modify_internal(struct ldb_module *module,
                struct ldb_val *vals;
                const struct ldb_schema_attribute *a = ldb_schema_attribute_by_name(ldb, el->name);
                const char *dn;
+               uint32_t options = 0;
+               if (control_permissive != NULL) {
+                       options |= LDB_MSG_FIND_COMMON_REMOVE_DUPLICATES;
+               }
 
                switch (msg->elements[i].flags & LDB_FLAG_MOD_MASK) {
                case LDB_FLAG_MOD_ADD:
@@ -795,34 +806,45 @@ int ltdb_modify_internal(struct ldb_module *module,
 
                                /* Check that values don't exist yet on multi-
                                   valued attributes or aren't provided twice */
-                               /* TODO: This is O(n^2) - replace with more efficient check */
-                               if (!(el->flags & LDB_FLAG_INTERNAL_DISABLE_SINGLE_VALUE_CHECK)) {
-                                       for (j = 0; j < el->num_values; j++) {
-                                               if (ldb_msg_find_val(el2, &el->values[j]) != NULL) {
-                                                       if (control_permissive) {
-                                                               /* remove this one as if it was never added */
-                                                               el->num_values--;
-                                                               for (k = j; k < el->num_values; k++) {
-                                                                       el->values[k] = el->values[k + 1];
-                                                               }
-                                                               j--; /* rewind */
-
-                                                               continue;
-                                                       }
-
-                                                       ldb_asprintf_errstring(ldb,
-                                                                              "attribute '%s': value #%u on '%s' already exists",
-                                                                              el->name, j, ldb_dn_get_linearized(msg2->dn));
-                                                       ret = LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS;
-                                                       goto done;
-                                               }
-                                               if (ldb_msg_find_val(el, &el->values[j]) != &el->values[j]) {
-                                                       ldb_asprintf_errstring(ldb,
-                                                                              "attribute '%s': value #%u on '%s' provided more than once in ADD",
-                                                                              el->name, j, ldb_dn_get_linearized(msg2->dn));
-                                                       ret = LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS;
-                                                       goto done;
-                                               }
+                               if (!(el->flags &
+                                     LDB_FLAG_INTERNAL_DISABLE_SINGLE_VALUE_CHECK)) {
+                                       struct ldb_val *duplicate = NULL;
+                                       ret = ldb_msg_find_common_values(ldb,
+                                                                        msg2,
+                                                                        el,
+                                                                        el2,
+                                                                        options);
+
+                                       if (ret ==
+                                           LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS) {
+                                               ldb_asprintf_errstring(ldb,
+                                                       "attribute '%s': value "
+                                                       "#%u on '%s' already "
+                                                       "exists", el->name, j,
+                                                       ldb_dn_get_linearized(msg2->dn));
+                                               goto done;
+                                       } else if (ret != LDB_SUCCESS) {
+                                               goto done;
+                                       }
+
+                                       ret = ldb_msg_find_duplicate_val(
+                                               ldb, msg2, el, &duplicate, 0);
+                                       if (ret != LDB_SUCCESS) {
+                                               goto done;
+                                       }
+                                       if (duplicate != NULL) {
+                                               ldb_asprintf_errstring(
+                                                       ldb,
+                                                       "attribute '%s': value "
+                                                       "'%.*s' on '%s' "
+                                                       "provided more than "
+                                                       "once in ADD",
+                                                       el->name,
+                                                       (int)duplicate->length,
+                                                       duplicate->data,
+                                                       ldb_dn_get_linearized(msg->dn));
+                                               ret = LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS;
+                                               goto done;
                                        }
                                }
 
@@ -868,16 +890,26 @@ int ltdb_modify_internal(struct ldb_module *module,
                         * in Samba, or someone else who can claim to
                         * know what they are doing. 
                         */
-                       if (!(el->flags & LDB_FLAG_INTERNAL_DISABLE_SINGLE_VALUE_CHECK)) { 
-                               /* TODO: This is O(n^2) - replace with more efficient check */
-                               for (j=0; j<el->num_values; j++) {
-                                       if (ldb_msg_find_val(el, &el->values[j]) != &el->values[j]) {
-                                               ldb_asprintf_errstring(ldb,
-                                                                      "attribute '%s': value #%u on '%s' provided more than once in REPLACE",
-                                                                      el->name, j, ldb_dn_get_linearized(msg2->dn));
-                                               ret = LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS;
-                                               goto done;
-                                       }
+                       if (!(el->flags & LDB_FLAG_INTERNAL_DISABLE_SINGLE_VALUE_CHECK)) {
+                               struct ldb_val *duplicate = NULL;
+
+                               ret = ldb_msg_find_duplicate_val(ldb, msg2, el,
+                                                                &duplicate, 0);
+                               if (ret != LDB_SUCCESS) {
+                                       goto done;
+                               }
+                               if (duplicate != NULL) {
+                                       ldb_asprintf_errstring(
+                                               ldb,
+                                               "attribute '%s': value '%.*s' "
+                                               "on '%s' provided more than "
+                                               "once in REPLACE",
+                                               el->name,
+                                               (int)duplicate->length,
+                                               duplicate->data,
+                                               ldb_dn_get_linearized(msg2->dn));
+                                       ret = LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS;
+                                       goto done;
                                }
                        }
 
diff --git a/lib/ldb/tests/ldb_msg.c b/lib/ldb/tests/ldb_msg.c
new file mode 100644 (file)
index 0000000..5261a73
--- /dev/null
@@ -0,0 +1,352 @@
+/*
+ * from cmocka.c:
+ * These headers or their equivalents should be included prior to
+ * including
+ * this header file.
+ *
+ * #include <stdarg.h>
+ * #include <stddef.h>
+ * #include <setjmp.h>
+ *
+ * This allows test applications to use custom definitions of C standard
+ * library functions and types.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <errno.h>
+#include <unistd.h>
+#include <talloc.h>
+
+#include <ldb.h>
+#include <ldb_private.h>
+#include <string.h>
+#include <ctype.h>
+
+struct test_ctx {
+       struct ldb_message *msg;
+};
+
+static int ldb_msg_setup(void **state)
+{
+       struct test_ctx *test_ctx;
+
+       test_ctx = talloc_zero(NULL, struct test_ctx);
+       assert_non_null(test_ctx);
+
+       test_ctx->msg = ldb_msg_new(test_ctx);
+
+       *state = test_ctx;
+       return 0;
+}
+
+static int ldb_msg_teardown(void **state)
+{
+       struct test_ctx *test_ctx = talloc_get_type_abort(*state,
+                                                         struct test_ctx);
+
+       talloc_free(test_ctx);
+       return 0;
+}
+
+
+static void add_uint_value(struct test_ctx *test_ctx,
+                          struct ldb_message *msg,
+                          const char *attr,
+                          unsigned int x)
+{
+       int ret;
+       struct ldb_val v, v_dup;
+       char s[5];
+       snprintf(s, sizeof(s), "%04x", x);
+       v.data = (uint8_t *)s;
+       v.length = 4;
+       v_dup = ldb_val_dup(test_ctx, &v);
+       assert_non_null(v_dup.data);
+       assert_ptr_not_equal(v_dup.data, v.data);
+       assert_int_equal(v_dup.length, 4);
+
+       ret = ldb_msg_add_value(msg, attr, &v_dup, NULL);
+       assert_int_equal(ret, LDB_SUCCESS);
+}
+
+
+static void test_ldb_msg_find_duplicate_val(void **state)
+{
+       int ret;
+       unsigned int i;
+       struct test_ctx *test_ctx = talloc_get_type_abort(*state,
+                                                         struct test_ctx);
+       struct ldb_message *msg = test_ctx->msg;
+       struct ldb_message_element *el;
+       struct ldb_val dummy;
+       struct ldb_val *dupe = &dummy;  /* so we can tell it was modified to NULL, not left as NULL */
+
+       ldb_msg_add_empty(msg, "el1", 0, &el);
+
+       for (i = 0; i < 5; i++) {
+               add_uint_value(test_ctx, msg, "el1", i);
+       }
+       /* at this point there are no duplicates, and the check uses the naive
+          quadratic path */
+       ret = ldb_msg_find_duplicate_val(NULL, test_ctx, el, &dupe, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+       assert_null(dupe);
+
+       /* add a duplicate, still using quadratric path */
+       add_uint_value(test_ctx, msg, "el1", 3);
+       ret = ldb_msg_find_duplicate_val(NULL, test_ctx, el, &dupe, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+       assert_non_null(dupe);
+       assert_int_equal(dupe->length, 4);
+       assert_memory_equal(dupe->data, "0003", 4);
+
+       /* add some more, triggering algorithmic jump */
+       for (i = 2; i < 11; i++) {
+               add_uint_value(test_ctx, msg, "el1", i);
+       }
+       ret = ldb_msg_find_duplicate_val(NULL, test_ctx, el, &dupe, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+       assert_non_null(dupe);
+       assert_int_equal(dupe->length, 4);
+       /*XXX not really guaranteed by the API */
+       assert_memory_equal(dupe->data, "0002", 4);
+
+       /* start a new element without duplicates, for the clever algorithm */
+       ldb_msg_add_empty(msg, "el2", 0, &el);
+       for (i = 0; i < 12; i++) {
+               add_uint_value(test_ctx, msg, "el2", i);
+       }
+       ret = ldb_msg_find_duplicate_val(NULL, test_ctx, el, &dupe, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+       assert_null(dupe);
+}
+
+
+static struct ldb_message_element *new_msg_element(TALLOC_CTX *mem_ctx,
+                                                  const char *name,
+                                                  unsigned int value_offset,
+                                                  unsigned int num_values)
+{
+       unsigned int i, x;
+       struct ldb_message_element *el = talloc_zero(mem_ctx,
+                                                    struct ldb_message_element);
+
+       el->values = talloc_array(el, struct ldb_val, num_values);
+       for (i = 0; i < num_values; i++) {
+               struct ldb_val v;
+               char s[50];
+               v.data = (uint8_t *)s;
+               /* % 3 is to ensure the values list is unsorted */
+               x = i + value_offset;
+               v.length = snprintf(s, sizeof(s), "%u %u", x % 3, x);
+               el->values[i] = ldb_val_dup(mem_ctx, &v);
+       }
+       el->name = name;
+       el->num_values = num_values;
+       return el;
+}
+
+static void _assert_element_equal(struct ldb_message_element *a,
+                                 struct ldb_message_element *b,
+                                 const char * const file,
+                                 const int line)
+{
+       unsigned int i;
+       _assert_int_equal(a->num_values, b->num_values, file, line);
+       _assert_int_equal(a->flags, b->flags, file, line);
+       _assert_string_equal(a->name, b->name, file, line);
+       for (i = 0; i < a->num_values; i++) {
+               struct ldb_val *v1 = &a->values[i];
+               struct ldb_val *v2 = &b->values[i];
+               _assert_int_equal(v1->length, v2->length, file, line);
+               _assert_memory_equal(v1->data, v2->data, v1->length,
+                                    file, line);
+       }
+}
+
+#define assert_element_equal(a, b)                             \
+       _assert_element_equal((a), (b),                         \
+                             __FILE__, __LINE__)
+
+
+static void test_ldb_msg_find_common_values(void **state)
+{
+       /* we only use the state as a talloc context */
+       struct ldb_message_element *el, *el2, *el3, *el4, *el2b;
+       struct ldb_message_element *orig, *orig2, *orig3, *orig4;
+       int ret;
+       const uint32_t remove_dupes = LDB_MSG_FIND_COMMON_REMOVE_DUPLICATES;
+       el = new_msg_element(*state, "test", 0, 4);
+       el2 = new_msg_element(*state, "test", 4, 4);
+       el3 = new_msg_element(*state, "test", 6, 4);
+       orig = new_msg_element(*state, "test", 0, 4);
+       orig2 = new_msg_element(*state, "test", 4, 4);
+       orig3 = new_msg_element(*state, "test", 6, 4);
+
+       /* first round is with short value arrays, using quadratic method */
+
+       /* we expect no collisions here */
+       ret = ldb_msg_find_common_values(NULL, *state, el, el2, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+
+       /*or here */
+       ret = ldb_msg_find_common_values(NULL, *state, el, el3, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+
+       /* the same elements in reverse order */
+       ret = ldb_msg_find_common_values(NULL, *state, el2, el, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+
+       ret = ldb_msg_find_common_values(NULL, *state, el3, el, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+
+       /* 6, 7 collide */
+       ret = ldb_msg_find_common_values(NULL, *state, el2, el3, 0);
+       assert_int_equal(ret, LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS);
+
+       /* and again */
+       ret = ldb_msg_find_common_values(NULL, *state, el3, el2, 0);
+       assert_int_equal(ret, LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS);
+
+       /* make sure the arrays haven't changed */
+       assert_element_equal(el, orig);
+       assert_element_equal(el2, orig2);
+       assert_element_equal(el3, orig3);
+
+       /* now with the control permisive flag, the first element should be
+          modified to remove the overlap.*/
+
+       /* 6, 7 collide, so el2 will only have 4 and 5 */
+       ret = ldb_msg_find_common_values(NULL, *state, el2, el3, remove_dupes);
+       assert_int_equal(ret, LDB_SUCCESS);
+
+       assert_element_equal(el3, orig3);
+       assert_int_not_equal(el2->num_values, orig2->num_values);
+       assert_int_equal(el2->num_values, 2);
+       el2b = new_msg_element(*state, "test", 4, 2);
+       assert_element_equal(el2, el2b);
+
+       /* now try the same things with a long and a short value list.
+          this should still trigger the quadratic path.
+        */
+       el2 = new_msg_element(*state, "test", 4, 10);
+       orig2 = new_msg_element(*state, "test", 4, 10);
+
+       /* no collisions */
+       ret = ldb_msg_find_common_values(NULL, *state, el, el2, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+       ret = ldb_msg_find_common_values(NULL, *state, el2, el, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+
+       /*collisions */
+       ret = ldb_msg_find_common_values(NULL, *state, el3, el2, 0);
+       assert_int_equal(ret, LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS);
+
+       assert_element_equal(el, orig);
+       assert_element_equal(el2, orig2);
+       assert_element_equal(el3, orig3);
+
+       /*collisions with permissive flag*/
+       ret = ldb_msg_find_common_values(NULL, *state, el3, el2, remove_dupes);
+       assert_int_equal(ret, LDB_SUCCESS);
+       assert_element_equal(el2, orig2);
+       assert_int_equal(el3->num_values, 0);
+
+       /* seeing as we have an empty element, try permutations therewith.
+          everything should succeed. */
+       ret = ldb_msg_find_common_values(NULL, *state, el3, el2, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+       ret = ldb_msg_find_common_values(NULL, *state, el3, el, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+       ret = ldb_msg_find_common_values(NULL, *state, el2, el3, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+       ret = ldb_msg_find_common_values(NULL, *state, el3, el2, remove_dupes);
+       assert_int_equal(ret, LDB_SUCCESS);
+       ret = ldb_msg_find_common_values(NULL, *state, el2, el3, remove_dupes);
+       assert_int_equal(ret, LDB_SUCCESS);
+
+       assert_element_equal(el2, orig2);
+       assert_element_equal(el, orig);
+       assert_int_equal(el3->num_values, 0);
+
+       /* now with two large value lists */
+       el = new_msg_element(*state, "test", 0, 12);
+       orig = new_msg_element(*state, "test", 0, 12);
+       el4 = new_msg_element(*state, "test", 12, 12);
+       orig4 = new_msg_element(*state, "test", 12, 12);
+
+       /* no collisions */
+       ret = ldb_msg_find_common_values(NULL, *state, el, el4, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+
+       ret = ldb_msg_find_common_values(NULL, *state, el4, el, 0);
+       assert_int_equal(ret, LDB_SUCCESS);
+
+       /* collisions */
+       ret = ldb_msg_find_common_values(NULL, *state, el4, el2, 0);
+       assert_int_equal(ret, LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS);
+       ret = ldb_msg_find_common_values(NULL, *state, el2, el4, 0);
+       assert_int_equal(ret, LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS);
+       ret = ldb_msg_find_common_values(NULL, *state, el2, el, 0);
+       assert_int_equal(ret, LDB_ERR_ATTRIBUTE_OR_VALUE_EXISTS);
+
+       assert_element_equal(el, orig);
+       assert_element_equal(el2, orig2);
+       assert_element_equal(el4, orig4);
+
+       /* with permissive control, but no collisions */
+       ret = ldb_msg_find_common_values(NULL, *state, el, el4, remove_dupes);
+       assert_int_equal(ret, LDB_SUCCESS);
+       ret = ldb_msg_find_common_values(NULL, *state, el4, el, remove_dupes);
+       assert_int_equal(ret, LDB_SUCCESS);
+
+       assert_element_equal(el, orig);
+       assert_element_equal(el4, orig4);
+
+       /* now with collisions, thus modifications.
+          At this stage:
+          el is 0-11 (inclusive)
+          e2 is 4-13
+          el3 is empty
+          el4 is 12-23
+        */
+       ret = ldb_msg_find_common_values(NULL, *state, el4, el2, remove_dupes);
+       assert_int_equal(ret, LDB_SUCCESS);
+       assert_element_equal(el2, orig2);
+       assert_int_not_equal(el4->num_values, orig4->num_values);
+       /* 4 should start at 14 */
+       orig4 = new_msg_element(*state, "test", 14, 10);
+       assert_element_equal(el4, orig4);
+
+       ret = ldb_msg_find_common_values(NULL, *state, el2, el, remove_dupes);
+       assert_int_equal(ret, LDB_SUCCESS);
+       assert_element_equal(el, orig);
+       assert_int_not_equal(el2->num_values, orig2->num_values);
+       orig2 = new_msg_element(*state, "test", 12, 2);
+       assert_element_equal(el2, orig2);
+
+       /* make sure an identical element with a different name is rejected */
+       el2 = new_msg_element(*state, "fish", 12, 2);
+       ret = ldb_msg_find_common_values(NULL, *state, el2, el, remove_dupes);
+       assert_int_equal(ret, LDB_ERR_INAPPROPRIATE_MATCHING);
+}
+
+
+
+int main(int argc, const char **argv)
+{
+       const struct CMUnitTest tests[] = {
+               cmocka_unit_test_setup_teardown(test_ldb_msg_find_duplicate_val,
+                                               ldb_msg_setup,
+                                               ldb_msg_teardown),
+               cmocka_unit_test_setup_teardown(
+                       test_ldb_msg_find_common_values,
+                       ldb_msg_setup,
+                       ldb_msg_teardown),
+       };
+
+       return cmocka_run_group_tests(tests, NULL, NULL);
+}
index 635a787b5d694dae0e44e195b4c619df3330b432..bdada800994c69ce8c339ff3ebb3b0adf5fe7c11 100644 (file)
@@ -321,6 +321,11 @@ def build(bld):
                          deps='cmocka ldb',
                          install=False)
 
+        bld.SAMBA_BINARY('ldb_msg_test',
+                         source='tests/ldb_msg.c',
+                         deps='cmocka ldb',
+                         install=False)
+
 def test(ctx):
     '''run ldb testsuite'''
     import Utils, samba_utils, shutil
@@ -346,8 +351,11 @@ def test(ctx):
 
     os.environ['LDB_MODULES_PATH'] = Utils.g_module.blddir + '/modules/ldb'
     os.environ['LD_LIBRARY_PATH'] = Utils.g_module.blddir + '/bin/default/lib/ldb'
-    cmd = Utils.g_module.blddir + '/ldb_tdb_mod_op_test'
-    cmocka_ret = samba_utils.RUN_COMMAND(cmd)
+    cmocka_ret = 0
+    for test_exe in ['ldb_tdb_mod_op_test',
+                     'ldb_msg_test']:
+            cmd = os.path.join(Utils.g_module.blddir, test_exe)
+            cmocka_ret = cmocka_ret or samba_utils.RUN_COMMAND(cmd)
 
     sys.exit(ret or pyret or cmocka_ret)