2 ldb database library using mdb back end
4 Copyright (C) Jakub Hrozek 2014
5 Copyright (C) Catalyst.Net Ltd 2017
7 ** NOTE! The following LGPL license applies to the ldb
8 ** library. This does NOT imply that all of Samba is released
11 This library is free software; you can redistribute it and/or
12 modify it under the terms of the GNU Lesser General Public
13 License as published by the Free Software Foundation; either
14 version 3 of the License, or (at your option) any later version.
16 This library is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
21 You should have received a copy of the GNU Lesser General Public
22 License along with this library; if not, see <http://www.gnu.org/licenses/>.
26 #include "../ldb_tdb/ldb_tdb.h"
27 #include "include/dlinklist.h"
29 #define MDB_URL_PREFIX "mdb://"
30 #define MDB_URL_PREFIX_SIZE (sizeof(MDB_URL_PREFIX)-1)
32 #define MEGABYTE (1024*1024)
33 #define GIGABYTE (1024*1024*1024)
35 int ldb_mdb_err_map(int lmdb_err)
41 return LDB_ERR_OPERATIONS_ERROR;
42 case MDB_INCOMPATIBLE:
45 return LDB_ERR_UNAVAILABLE;
53 return LDB_ERR_PROTOCOL_ERROR;
56 case MDB_READERS_FULL:
62 return LDB_ERR_ENTRY_ALREADY_EXISTS;
65 return LDB_ERR_NO_SUCH_OBJECT;
67 return LDB_ERR_INSUFFICIENT_ACCESS_RIGHTS;
74 #define ldb_mdb_error(ldb, ecode) lmdb_error_at(ldb, ecode, __FILE__, __LINE__)
75 static int lmdb_error_at(struct ldb_context *ldb,
80 int ldb_err = ldb_mdb_err_map(ecode);
81 char *reason = mdb_strerror(ecode);
82 ldb_asprintf_errstring(ldb,
92 static bool lmdb_transaction_active(struct ltdb_private *ltdb)
94 return ltdb->lmdb_private->txlist != NULL;
97 static MDB_txn *lmdb_trans_get_tx(struct lmdb_trans *ltx)
106 static void trans_push(struct lmdb_private *lmdb, struct lmdb_trans *ltx)
109 talloc_steal(lmdb->txlist, ltx);
112 DLIST_ADD(lmdb->txlist, ltx);
115 static void trans_finished(struct lmdb_private *lmdb, struct lmdb_trans *ltx)
117 DLIST_REMOVE(lmdb->txlist, ltx);
122 static struct lmdb_trans *lmdb_private_trans_head(struct lmdb_private *lmdb)
124 struct lmdb_trans *ltx;
130 static MDB_txn *get_current_txn(struct lmdb_private *lmdb)
133 if (lmdb->read_txn != NULL) {
134 return lmdb->read_txn;
137 txn = lmdb_trans_get_tx(lmdb_private_trans_head(lmdb));
140 ret = mdb_txn_begin(lmdb->env, NULL, MDB_RDONLY, &txn);
143 ldb_asprintf_errstring(lmdb->ldb,
144 "%s failed: %s\n", __FUNCTION__,
147 lmdb->read_txn = txn;
152 static int lmdb_store(struct ltdb_private *ltdb,
154 struct ldb_val data, int flags)
156 struct lmdb_private *lmdb = ltdb->lmdb_private;
163 if (ltdb->read_only) {
164 return LDB_ERR_UNWILLING_TO_PERFORM;
167 txn = lmdb_trans_get_tx(lmdb_private_trans_head(lmdb));
169 ldb_debug(lmdb->ldb, LDB_DEBUG_FATAL, "No transaction");
170 lmdb->error = MDB_PANIC;
171 return ldb_mdb_error(lmdb->ldb, lmdb->error);
174 lmdb->error = mdb_dbi_open(txn, NULL, 0, &dbi);
175 if (lmdb->error != MDB_SUCCESS) {
176 return ldb_mdb_error(lmdb->ldb, lmdb->error);
179 mdb_key.mv_size = key.length;
180 mdb_key.mv_data = key.data;
182 mdb_data.mv_size = data.length;
183 mdb_data.mv_data = data.data;
185 if (flags == TDB_INSERT) {
186 mdb_flags = MDB_NOOVERWRITE;
187 } else if ((flags == TDB_MODIFY)) {
189 * Modifying a record, ensure that it exists.
190 * This mimics the TDB semantics
193 lmdb->error = mdb_get(txn, dbi, &mdb_key, &value);
194 if (lmdb->error != MDB_SUCCESS) {
195 if (ltdb->read_lock_count == 0 && lmdb->read_txn != NULL) {
196 mdb_txn_commit(lmdb->read_txn);
197 lmdb->read_txn = NULL;
199 return ldb_mdb_error(lmdb->ldb, lmdb->error);
206 lmdb->error = mdb_put(txn, dbi, &mdb_key, &mdb_data, mdb_flags);
207 if (lmdb->error != MDB_SUCCESS) {
208 return ldb_mdb_error(lmdb->ldb, lmdb->error);
211 return ldb_mdb_err_map(lmdb->error);
215 static int lmdb_delete(struct ltdb_private *ltdb, struct ldb_val key)
217 struct lmdb_private *lmdb = ltdb->lmdb_private;
222 if (ltdb->read_only) {
223 return LDB_ERR_UNWILLING_TO_PERFORM;
226 txn = lmdb_trans_get_tx(lmdb_private_trans_head(lmdb));
228 ldb_debug(lmdb->ldb, LDB_DEBUG_FATAL, "No transaction");
229 lmdb->error = MDB_PANIC;
230 return ldb_mdb_error(lmdb->ldb, lmdb->error);
233 lmdb->error = mdb_dbi_open(txn, NULL, 0, &dbi);
234 if (lmdb->error != MDB_SUCCESS) {
235 return ldb_mdb_error(lmdb->ldb, lmdb->error);
238 mdb_key.mv_size = key.length;
239 mdb_key.mv_data = key.data;
241 lmdb->error = mdb_del(txn, dbi, &mdb_key, NULL);
242 if (lmdb->error != MDB_SUCCESS) {
243 return ldb_mdb_error(lmdb->ldb, lmdb->error);
245 return ldb_mdb_err_map(lmdb->error);
248 static int lmdb_traverse_fn(struct ltdb_private *ltdb,
249 ldb_kv_traverse_fn fn,
252 struct lmdb_private *lmdb = ltdb->lmdb_private;
257 MDB_cursor *cursor = NULL;
260 txn = get_current_txn(lmdb);
262 ldb_debug(lmdb->ldb, LDB_DEBUG_FATAL, "No transaction");
263 lmdb->error = MDB_PANIC;
264 return ldb_mdb_error(lmdb->ldb, lmdb->error);
267 lmdb->error = mdb_dbi_open(txn, NULL, 0, &dbi);
268 if (lmdb->error != MDB_SUCCESS) {
269 return ldb_mdb_error(lmdb->ldb, lmdb->error);
272 lmdb->error = mdb_cursor_open(txn, dbi, &cursor);
273 if (lmdb->error != MDB_SUCCESS) {
277 while ((lmdb->error = mdb_cursor_get(
279 &mdb_data, MDB_NEXT)) == MDB_SUCCESS) {
281 struct ldb_val key = {
282 .length = mdb_key.mv_size,
283 .data = mdb_key.mv_data,
285 struct ldb_val data = {
286 .length = mdb_data.mv_size,
287 .data = mdb_data.mv_data,
290 ret = fn(ltdb, key, data, ctx);
295 if (lmdb->error == MDB_NOTFOUND) {
296 lmdb->error = MDB_SUCCESS;
299 if (cursor != NULL) {
300 mdb_cursor_close(cursor);
303 if (ltdb->read_lock_count == 0 && lmdb->read_txn != NULL) {
304 mdb_txn_commit(lmdb->read_txn);
305 lmdb->read_txn = NULL;
308 if (lmdb->error != MDB_SUCCESS) {
309 return ldb_mdb_error(lmdb->ldb, lmdb->error);
311 return ldb_mdb_err_map(lmdb->error);
314 static int lmdb_update_in_iterate(struct ltdb_private *ltdb,
320 struct lmdb_private *lmdb = ltdb->lmdb_private;
322 int ret = LDB_SUCCESS;
325 * Need to take a copy of the data as the delete operation alters the
326 * data, as it is in private lmdb memory.
328 copy.length = data.length;
329 copy.data = talloc_memdup(ltdb, data.data, data.length);
330 if (copy.data == NULL) {
331 lmdb->error = MDB_PANIC;
332 return ldb_oom(lmdb->ldb);
335 lmdb->error = lmdb_delete(ltdb, key);
336 if (lmdb->error != MDB_SUCCESS) {
340 "Failed to delete %*.*s "
341 "for rekey as %*.*s: %s",
342 (int)key.length, (int)key.length,
343 (const char *)key.data,
344 (int)key2.length, (int)key2.length,
345 (const char *)key.data,
346 mdb_strerror(lmdb->error));
347 ret = ldb_mdb_error(lmdb->ldb, lmdb->error);
350 lmdb->error = lmdb_store(ltdb, key2, copy, 0);
351 if (lmdb->error != MDB_SUCCESS) {
355 "Failed to rekey %*.*s as %*.*s: %s",
356 (int)key.length, (int)key.length,
357 (const char *)key.data,
358 (int)key2.length, (int)key2.length,
359 (const char *)key.data,
360 mdb_strerror(lmdb->error));
361 ret = ldb_mdb_error(lmdb->ldb, lmdb->error);
366 if (copy.data != NULL) {
367 TALLOC_FREE(copy.data);
372 * Explicity invalidate the data, as the delete has done this
379 /* Handles only a single record */
380 static int lmdb_parse_record(struct ltdb_private *ltdb, struct ldb_val key,
381 int (*parser)(struct ldb_val key, struct ldb_val data,
385 struct lmdb_private *lmdb = ltdb->lmdb_private;
392 txn = get_current_txn(lmdb);
394 ldb_debug(lmdb->ldb, LDB_DEBUG_FATAL, "No transaction active");
395 lmdb->error = MDB_PANIC;
396 return ldb_mdb_error(lmdb->ldb, lmdb->error);
399 lmdb->error = mdb_dbi_open(txn, NULL, 0, &dbi);
400 if (lmdb->error != MDB_SUCCESS) {
401 return ldb_mdb_error(lmdb->ldb, lmdb->error);
404 mdb_key.mv_size = key.length;
405 mdb_key.mv_data = key.data;
407 lmdb->error = mdb_get(txn, dbi, &mdb_key, &mdb_data);
408 if (lmdb->error != MDB_SUCCESS) {
409 /* TODO closing a handle should not even be necessary */
410 mdb_dbi_close(lmdb->env, dbi);
411 if (ltdb->read_lock_count == 0 && lmdb->read_txn != NULL) {
412 mdb_txn_commit(lmdb->read_txn);
413 lmdb->read_txn = NULL;
415 if (lmdb->error == MDB_NOTFOUND) {
416 return LDB_ERR_NO_SUCH_OBJECT;
418 return ldb_mdb_error(lmdb->ldb, lmdb->error);
420 data.data = mdb_data.mv_data;
421 data.length = mdb_data.mv_size;
423 /* TODO closing a handle should not even be necessary */
424 mdb_dbi_close(lmdb->env, dbi);
426 /* We created a read transaction, commit it */
427 if (ltdb->read_lock_count == 0 && lmdb->read_txn != NULL) {
428 mdb_txn_commit(lmdb->read_txn);
429 lmdb->read_txn = NULL;
431 return parser(key, data, ctx);
435 static int lmdb_lock_read(struct ldb_module *module)
437 void *data = ldb_module_get_private(module);
438 struct ltdb_private *ltdb = talloc_get_type(data, struct ltdb_private);
439 struct lmdb_private *lmdb = ltdb->lmdb_private;
441 lmdb->error = MDB_SUCCESS;
442 if (ltdb->in_transaction == 0 &&
443 ltdb->read_lock_count == 0) {
444 lmdb->error = mdb_txn_begin(lmdb->env,
449 if (lmdb->error != MDB_SUCCESS) {
450 return ldb_mdb_error(lmdb->ldb, lmdb->error);
453 ltdb->read_lock_count++;
454 return ldb_mdb_err_map(lmdb->error);
457 static int lmdb_unlock_read(struct ldb_module *module)
459 void *data = ldb_module_get_private(module);
460 struct ltdb_private *ltdb = talloc_get_type(data, struct ltdb_private);
462 if (ltdb->in_transaction == 0 && ltdb->read_lock_count == 1) {
463 struct lmdb_private *lmdb = ltdb->lmdb_private;
464 mdb_txn_commit(lmdb->read_txn);
465 lmdb->read_txn = NULL;
466 ltdb->read_lock_count--;
469 ltdb->read_lock_count--;
473 static int lmdb_transaction_start(struct ltdb_private *ltdb)
475 struct lmdb_private *lmdb = ltdb->lmdb_private;
476 struct lmdb_trans *ltx;
477 struct lmdb_trans *ltx_head;
480 /* Do not take out the transaction lock on a read-only DB */
481 if (ltdb->read_only) {
482 return LDB_ERR_UNWILLING_TO_PERFORM;
485 ltx = talloc_zero(lmdb, struct lmdb_trans);
487 return ldb_oom(lmdb->ldb);
490 ltx_head = lmdb_private_trans_head(lmdb);
492 tx_parent = lmdb_trans_get_tx(ltx_head);
494 lmdb->error = mdb_txn_begin(lmdb->env, tx_parent, 0, <x->tx);
495 if (lmdb->error != MDB_SUCCESS) {
496 return ldb_mdb_error(lmdb->ldb, lmdb->error);
499 trans_push(lmdb, ltx);
501 return ldb_mdb_err_map(lmdb->error);
504 static int lmdb_transaction_cancel(struct ltdb_private *ltdb)
506 struct lmdb_trans *ltx;
507 struct lmdb_private *lmdb = ltdb->lmdb_private;
509 ltx = lmdb_private_trans_head(lmdb);
511 return LDB_ERR_OPERATIONS_ERROR;
514 mdb_txn_abort(ltx->tx);
515 trans_finished(lmdb, ltx);
519 static int lmdb_transaction_prepare_commit(struct ltdb_private *ltdb)
521 /* No need to prepare a commit */
525 static int lmdb_transaction_commit(struct ltdb_private *ltdb)
527 struct lmdb_trans *ltx;
528 struct lmdb_private *lmdb = ltdb->lmdb_private;
530 ltx = lmdb_private_trans_head(lmdb);
532 return LDB_ERR_OPERATIONS_ERROR;
535 lmdb->error = mdb_txn_commit(ltx->tx);
536 trans_finished(lmdb, ltx);
541 static int lmdb_error(struct ltdb_private *ltdb)
543 return ldb_mdb_err_map(ltdb->lmdb_private->error);
546 static const char *lmdb_errorstr(struct ltdb_private *ltdb)
548 return mdb_strerror(ltdb->lmdb_private->error);
551 static const char * lmdb_name(struct ltdb_private *ltdb)
556 static bool lmdb_changed(struct ltdb_private *ltdb)
559 * lmdb does no provide a quick way to determine if the database
560 * has changed. This function always returns true.
562 * Note that tdb uses a sequence number that allows this function
563 * to be implemented efficiently.
568 static struct kv_db_ops lmdb_key_value_ops = {
570 .delete = lmdb_delete,
571 .iterate = lmdb_traverse_fn,
572 .update_in_iterate = lmdb_update_in_iterate,
573 .fetch_and_parse = lmdb_parse_record,
574 .lock_read = lmdb_lock_read,
575 .unlock_read = lmdb_unlock_read,
576 .begin_write = lmdb_transaction_start,
577 .prepare_write = lmdb_transaction_prepare_commit,
578 .finish_write = lmdb_transaction_commit,
579 .abort_write = lmdb_transaction_cancel,
581 .errorstr = lmdb_errorstr,
583 .has_changed = lmdb_changed,
584 .transaction_active = lmdb_transaction_active,
587 static const char *lmdb_get_path(const char *url)
592 if (strchr(url, ':')) {
593 if (strncmp(url, MDB_URL_PREFIX, MDB_URL_PREFIX_SIZE) != 0) {
596 path = url + MDB_URL_PREFIX_SIZE;
604 static int lmdb_pvt_destructor(struct lmdb_private *lmdb)
606 struct lmdb_trans *ltx = NULL;
608 /* Check if this is a forked child */
609 if (getpid() != lmdb->pid) {
612 * We cannot call mdb_env_close or commit any transactions,
613 * otherwise they might appear finished in the parent.
617 if (mdb_env_get_fd(lmdb->env, &fd) == 0) {
621 /* Remove the pointer, so that no access should occur */
628 * Close the read transaction if it's open
630 if (lmdb->read_txn != NULL) {
631 mdb_txn_abort(lmdb->read_txn);
634 if (lmdb->env == NULL) {
639 * Abort any currently active transactions
641 ltx = lmdb_private_trans_head(lmdb);
642 while (ltx != NULL) {
643 mdb_txn_abort(ltx->tx);
644 trans_finished(lmdb, ltx);
645 ltx = lmdb_private_trans_head(lmdb);
648 mdb_env_close(lmdb->env);
654 static int lmdb_pvt_open(TALLOC_CTX *mem_ctx,
655 struct ldb_context *ldb,
658 struct lmdb_private *lmdb)
661 unsigned int mdb_flags;
663 if (flags & LDB_FLG_DONT_CREATE_DB) {
665 if (stat(path, &st) != 0) {
666 return LDB_ERR_UNAVAILABLE;
670 ret = mdb_env_create(&lmdb->env);
672 ldb_asprintf_errstring(
674 "Could not create MDB environment %s: %s\n",
677 return LDB_ERR_OPERATIONS_ERROR;
680 /* Close when lmdb is released */
681 talloc_set_destructor(lmdb, lmdb_pvt_destructor);
683 ret = mdb_env_set_mapsize(lmdb->env, 16LL * GIGABYTE);
685 ldb_asprintf_errstring(
687 "Could not open MDB environment %s: %s\n",
690 return ldb_mdb_err_map(ret);
693 mdb_env_set_maxreaders(lmdb->env, 100000);
694 /* MDB_NOSUBDIR implies there is a separate file called path and a
695 * separate lockfile called path-lock
697 mdb_flags = MDB_NOSUBDIR|MDB_NOTLS;
698 if (flags & LDB_FLG_RDONLY) {
699 mdb_flags |= MDB_RDONLY;
701 if (flags & LDB_FLG_NOSYNC) {
702 mdb_flags |= MDB_NOSYNC;
704 ret = mdb_env_open(lmdb->env, path, mdb_flags, 0644);
706 ldb_asprintf_errstring(ldb,
707 "Could not open DB %s: %s\n",
708 path, mdb_strerror(ret));
710 return ldb_mdb_err_map(ret);
713 /* Store the original pid during the LMDB open */
714 lmdb->pid = getpid();
720 int lmdb_connect(struct ldb_context *ldb,
723 const char *options[],
724 struct ldb_module **_module)
726 const char *path = NULL;
727 struct lmdb_private *lmdb = NULL;
728 struct ltdb_private *ltdb = NULL;
732 * We hold locks, so we must use a private event context
733 * on each returned handle
735 ldb_set_require_private_event_context(ldb);
737 path = lmdb_get_path(url);
739 ldb_debug(ldb, LDB_DEBUG_ERROR, "Invalid mdb URL '%s'", url);
740 return LDB_ERR_OPERATIONS_ERROR;
743 ltdb = talloc_zero(ldb, struct ltdb_private);
746 return LDB_ERR_OPERATIONS_ERROR;
749 lmdb = talloc_zero(ldb, struct lmdb_private);
753 return LDB_ERR_OPERATIONS_ERROR;
756 ltdb->kv_ops = &lmdb_key_value_ops;
758 ret = lmdb_pvt_open(ldb, ldb, path, flags, lmdb);
759 if (ret != LDB_SUCCESS) {
763 ltdb->lmdb_private = lmdb;
764 if (flags & LDB_FLG_RDONLY) {
765 ltdb->read_only = true;
767 return init_store(ltdb, "ldb_mdb backend", ldb, options, _module);