#include "tdb_private.h"
-/* 'right' merges can involve O(n^2) cost when combined with a
- traverse, so they are disabled until we find a way to do them in
- O(1) time
-*/
-#define USE_RIGHT_MERGES 0
-
/* read a freelist record and check for simple errors */
int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, struct tdb_record *rec)
{
return 0;
}
-
-#if USE_RIGHT_MERGES
-/* Remove an element from the freelist. Must have alloc lock. */
-static int remove_from_freelist(struct tdb_context *tdb, tdb_off_t off, tdb_off_t next)
-{
- tdb_off_t last_ptr, i;
-
- /* read in the freelist top */
- last_ptr = FREELIST_TOP;
- while (tdb_ofs_read(tdb, last_ptr, &i) != -1 && i != 0) {
- if (i == off) {
- /* We've found it! */
- return tdb_ofs_write(tdb, last_ptr, &next);
- }
- /* Follow chain (next offset is at start of record) */
- last_ptr = i;
- }
- tdb->ecode = TDB_ERR_CORRUPT;
- TDB_LOG((tdb, TDB_DEBUG_FATAL,"remove_from_freelist: not on list at off=%u\n", off));
- return -1;
-}
-#endif
-
-
/* update a record tailer (must hold allocation lock) */
static int update_tailer(struct tdb_context *tdb, tdb_off_t offset,
const struct tdb_record *rec)
* 0 if left was not a free record
* 1 if left was free and successfully merged.
*
- * The currend record is handed in with pointer and fully read record.
+ * The current record is handed in with pointer and fully read record.
*
* The left record pointer and struct can be retrieved as result
* in lp and lr;
return 1;
}
+/**
+ * Check whether the record left of a given freelist record is
+ * also a freelist record, and if so, merge the two records.
+ *
+ * Return code:
+ * -1 upon error
+ * 0 if left was not a free record
+ * 1 if left was free and successfully merged.
+ *
+ * In this variant, the input record is specified just as the pointer
+ * and is read from the database if needed.
+ *
+ * next_ptr will contain the original record's next pointer after
+ * successful merging (which will be lost after merging), so that
+ * the caller can update the last pointer.
+ */
+static int check_merge_ptr_with_left_record(struct tdb_context *tdb,
+ tdb_off_t rec_ptr,
+ tdb_off_t *next_ptr)
+{
+ tdb_off_t left_ptr;
+ struct tdb_record rec, left_rec;
+ int ret;
+
+ ret = read_record_on_left(tdb, rec_ptr, &left_ptr, &left_rec);
+ if (ret != 0) {
+ return 0;
+ }
+
+ if (left_rec.magic != TDB_FREE_MAGIC) {
+ return 0;
+ }
+
+ /* It's free - expand to include it. */
+
+ ret = tdb->methods->tdb_read(tdb, rec_ptr, &rec,
+ sizeof(rec), DOCONV());
+ if (ret != 0) {
+ return -1;
+ }
+
+ ret = merge_with_left_record(tdb, left_ptr, &left_rec, &rec);
+ if (ret != 0) {
+ return -1;
+ }
+
+ if (next_ptr != NULL) {
+ *next_ptr = rec.next;
+ }
+
+ return 1;
+}
+
/**
* Add an element into the freelist.
*
goto fail;
}
-#if USE_RIGHT_MERGES
- /* Look right first (I'm an Australian, dammit) */
- if (offset + sizeof(*rec) + rec->rec_len + sizeof(*rec) <= tdb->map_size) {
- tdb_off_t right = offset + sizeof(*rec) + rec->rec_len;
- struct tdb_record r;
-
- if (tdb->methods->tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) {
- TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: right read failed at %u\n", right));
- goto left;
- }
-
- /* If it's free, expand to include it. */
- if (r.magic == TDB_FREE_MAGIC) {
- if (remove_from_freelist(tdb, right, r.next) == -1) {
- TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: right free failed at %u\n", right));
- goto left;
- }
- rec->rec_len += sizeof(r) + r.rec_len;
- if (update_tailer(tdb, offset, rec) == -1) {
- TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset));
- goto fail;
- }
- }
- }
-left:
-#endif
-
ret = check_merge_with_left_record(tdb, offset, rec, NULL, NULL);
if (ret == -1) {
goto fail;
struct tdb_context *tdb, tdb_len_t length, struct tdb_record *rec)
{
tdb_off_t rec_ptr, last_ptr, newrec_ptr;
+ struct tdb_chainwalk_ctx chainwalk;
+ bool modified;
struct {
tdb_off_t rec_ptr, last_ptr;
tdb_len_t rec_len;
} bestfit;
float multiplier = 1.0;
+ bool merge_created_candidate;
/* over-allocate to reduce fragmentation */
length *= 1.25;
length = TDB_ALIGN(length, TDB_ALIGNMENT);
again:
+ merge_created_candidate = false;
last_ptr = FREELIST_TOP;
/* read in the freelist top */
if (tdb_ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1)
return 0;
+ modified = false;
+ tdb_chainwalk_init(&chainwalk, rec_ptr);
+
bestfit.rec_ptr = 0;
bestfit.last_ptr = 0;
bestfit.rec_len = 0;
issues when faced with a slowly increasing record size.
*/
while (rec_ptr) {
+ int ret;
+ tdb_off_t left_ptr;
+ struct tdb_record left_rec;
+
if (tdb_rec_free_read(tdb, rec_ptr, rec) == -1) {
return 0;
}
+ ret = check_merge_with_left_record(tdb, rec_ptr, rec,
+ &left_ptr, &left_rec);
+ if (ret == -1) {
+ return 0;
+ }
+ if (ret == 1) {
+ /* merged */
+ rec_ptr = rec->next;
+ ret = tdb_ofs_write(tdb, last_ptr, &rec->next);
+ if (ret == -1) {
+ return 0;
+ }
+
+ /*
+ * We have merged the current record into the left
+ * neighbour. So our traverse of the freelist will
+ * skip it and consider the next record in the chain.
+ *
+ * But the enlarged left neighbour may be a candidate.
+ * If it is, we can not directly use it, though.
+ * The only thing we can do and have to do here is to
+ * update the current best fit size in the chain if the
+ * current best fit is the left record. (By that we may
+ * worsen the best fit we already had, bit this is not a
+ * problem.)
+ *
+ * If the current best fit is not the left record,
+ * all we can do is remember the fact that a merge
+ * created a new candidate so that we can trigger
+ * a second walk of the freelist if at the end of
+ * the first walk we have not found any fit.
+ * This way we can avoid expanding the database.
+ */
+
+ if (bestfit.rec_ptr == left_ptr) {
+ bestfit.rec_len = left_rec.rec_len;
+ }
+
+ if (left_rec.rec_len > length) {
+ merge_created_candidate = true;
+ }
+
+ modified = true;
+
+ continue;
+ }
+
if (rec->rec_len >= length) {
if (bestfit.rec_ptr == 0 ||
rec->rec_len < bestfit.rec_len) {
last_ptr = rec_ptr;
rec_ptr = rec->next;
+ if (!modified) {
+ bool ok;
+ ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr);
+ if (!ok) {
+ return 0;
+ }
+ }
+
/* if we've found a record that is big enough, then
stop searching if its also not too big. The
definition of 'too big' changes as we scan
return newrec_ptr;
}
+ if (merge_created_candidate) {
+ goto again;
+ }
+
/* we didn't find enough space. See if we can expand the
database and if we can then try again */
if (tdb_expand(tdb, length + sizeof(*rec)) == 0)
return (tdb_ofs_write(tdb, last_ptr, &rec->next) == 0);
}
+static void tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
+{
+ uint32_t max_dead_records = tdb->max_dead_records;
+
+ tdb->max_dead_records = 0;
+
+ tdb_trim_dead(tdb, hash);
+
+ tdb->max_dead_records = max_dead_records;
+}
+
/*
* Chain "hash" is assumed to be locked
*/
struct tdb_record *rec)
{
tdb_off_t ret;
- int i;
+ uint32_t i;
if (tdb->max_dead_records == 0) {
/*
if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
return 0;
}
+ /*
+ * Dead records can happen even if max_dead_records==0, they
+ * are older than the max_dead_records concept: They happen if
+ * tdb_delete happens concurrently with a traverse.
+ */
+ tdb_purge_dead(tdb, hash);
ret = tdb_allocate_from_freelist(tdb, length, rec);
tdb_unlock(tdb, -1, F_WRLCK);
return ret;
}
-/*
- return the size of the freelist - used to decide if we should repack
-*/
-_PUBLIC_ int tdb_freelist_size(struct tdb_context *tdb)
+/**
+ * Merge adjacent records in the freelist.
+ */
+static int tdb_freelist_merge_adjacent(struct tdb_context *tdb,
+ int *count_records, int *count_merged)
+{
+ tdb_off_t cur, next;
+ int count = 0;
+ int merged = 0;
+ int ret;
+
+ ret = tdb_lock(tdb, -1, F_RDLCK);
+ if (ret == -1) {
+ return -1;
+ }
+
+ cur = FREELIST_TOP;
+ while (tdb_ofs_read(tdb, cur, &next) == 0 && next != 0) {
+ tdb_off_t next2;
+
+ count++;
+
+ ret = check_merge_ptr_with_left_record(tdb, next, &next2);
+ if (ret == -1) {
+ goto done;
+ }
+ if (ret == 1) {
+ /*
+ * merged:
+ * now let cur->next point to next2 instead of next
+ */
+
+ ret = tdb_ofs_write(tdb, cur, &next2);
+ if (ret != 0) {
+ goto done;
+ }
+
+ next = next2;
+ merged++;
+ }
+
+ cur = next;
+ }
+
+ if (count_records != NULL) {
+ *count_records = count;
+ }
+
+ if (count_merged != NULL) {
+ *count_merged = merged;
+ }
+
+ ret = 0;
+
+done:
+ tdb_unlock(tdb, -1, F_RDLCK);
+ return ret;
+}
+
+/**
+ * return the size of the freelist - no merging done
+ */
+static int tdb_freelist_size_no_merge(struct tdb_context *tdb)
{
tdb_off_t ptr;
int count=0;
tdb_unlock(tdb, -1, F_RDLCK);
return count;
}
+
+/**
+ * return the size of the freelist - used to decide if we should repack
+ *
+ * As a side effect, adjacent records are merged unless the
+ * database is read-only, in order to reduce the fragmentation
+ * without repacking.
+ */
+_PUBLIC_ int tdb_freelist_size(struct tdb_context *tdb)
+{
+
+ int count = 0;
+
+ if (tdb->read_only) {
+ count = tdb_freelist_size_no_merge(tdb);
+ } else {
+ int ret;
+ ret = tdb_freelist_merge_adjacent(tdb, &count, NULL);
+ if (ret != 0) {
+ return -1;
+ }
+ }
+
+ return count;
+}