tdb: introduce tdb->hdr_ofs
authorVolker Lendecke <vl@samba.org>
Thu, 21 Feb 2013 15:34:32 +0000 (16:34 +0100)
committerJeremy Allison <jra@samba.org>
Thu, 22 May 2014 19:05:15 +0000 (21:05 +0200)
This makes it possible to have some extra headers before
the real tdb content starts in the file.

This will be used used e.g. to implement locking based on robust mutexes.

Pair-Programmed-With: Stefan Metzmacher <metze@samba.org>
Pair-Programmed-With: Michael Adam <obnox@samba.org>
Signed-off-by: Volker Lendecke <vl@samba.org>
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Signed-off-by: Michael Adam <obnox@samba.org>
Reviewed-by: Jeremy Allison <jra@samba.org>
lib/tdb/common/io.c
lib/tdb/common/open.c
lib/tdb/common/summary.c
lib/tdb/common/tdb_private.h
lib/tdb/test/run-3G-file.c

index 11dfefd102b68cdb924680c5c0d94ebe8bf0d734..07d22ccdb2163c2b0915c3f2d869f1f24263620a 100644 (file)
 
 #include "tdb_private.h"
 
+/*
+ * tdb->hdr_ofs is 0 for now.
+ *
+ * Note: that we only have the 4GB limit of tdb_off_t for
+ * tdb->map_size. The file size on disk can be 4GB + tdb->hdr_ofs!
+ */
+
+static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off)
+{
+       off_t tmp = tdb->hdr_ofs + *off;
+
+       if ((tmp < tdb->hdr_ofs) || (tmp < *off)) {
+               errno = EIO;
+               return false;
+       }
+
+       *off = tmp;
+       return true;
+}
+
+static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf,
+                         size_t count, off_t offset)
+{
+       if (!tdb_adjust_offset(tdb, &offset)) {
+               return -1;
+       }
+       return pwrite(tdb->fd, buf, count, offset);
+}
+
+static ssize_t tdb_pread(struct tdb_context *tdb, void *buf,
+                        size_t count, off_t offset)
+{
+       if (!tdb_adjust_offset(tdb, &offset)) {
+               return -1;
+       }
+       return pread(tdb->fd, buf, count, offset);
+}
+
+static int tdb_ftruncate(struct tdb_context *tdb, off_t length)
+{
+       if (!tdb_adjust_offset(tdb, &length)) {
+               return -1;
+       }
+       return ftruncate(tdb->fd, length);
+}
+
+static int tdb_fstat(struct tdb_context *tdb, struct stat *buf)
+{
+       int ret;
+
+       ret = fstat(tdb->fd, buf);
+       if (ret == -1) {
+               return -1;
+       }
+
+       if (buf->st_size < tdb->hdr_ofs) {
+               errno = EIO;
+               return -1;
+       }
+       buf->st_size -= tdb->hdr_ofs;
+
+       return ret;
+}
+
 /* check for an out of bounds access - if it is out of bounds then
    see if the database has been expanded by someone else and expand
    if necessary
@@ -58,7 +122,7 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len,
                return -1;
        }
 
-       if (fstat(tdb->fd, &st) == -1) {
+       if (tdb_fstat(tdb, &st) == -1) {
                tdb->ecode = TDB_ERR_IO;
                return -1;
        }
@@ -122,16 +186,18 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
                tdb->ecode = TDB_ERR_IO;
                return -1;
 #else
-               ssize_t written = pwrite(tdb->fd, buf, len, off);
+               ssize_t written;
+
+               written = tdb_pwrite(tdb, buf, len, off);
+
                if ((written != (ssize_t)len) && (written != -1)) {
                        /* try once more */
                        tdb->ecode = TDB_ERR_IO;
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
                                 "%zi of %u bytes at %u, trying once more\n",
                                 written, len, off));
-                       written = pwrite(tdb->fd, (const char *)buf+written,
-                                        len-written,
-                                        off+written);
+                       written = tdb_pwrite(tdb, (const char *)buf+written,
+                                            len-written, off+written);
                }
                if (written == -1) {
                        /* Ensure ecode is set for log fn. */
@@ -176,7 +242,9 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
                tdb->ecode = TDB_ERR_IO;
                return -1;
 #else
-               ssize_t ret = pread(tdb->fd, buf, len, off);
+               ssize_t ret;
+
+               ret = tdb_pread(tdb, buf, len, off);
                if (ret != (ssize_t)len) {
                        /* Ensure ecode is set for log fn. */
                        tdb->ecode = TDB_ERR_IO;
@@ -258,7 +326,8 @@ int tdb_mmap(struct tdb_context *tdb)
        if (should_mmap(tdb)) {
                tdb->map_ptr = mmap(NULL, tdb->map_size,
                                    PROT_READ|(tdb->read_only? 0:PROT_WRITE),
-                                   MAP_SHARED|MAP_FILE, tdb->fd, 0);
+                                   MAP_SHARED|MAP_FILE, tdb->fd,
+                                   tdb->hdr_ofs);
 
                /*
                 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
@@ -303,12 +372,12 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
                return -1;
        }
 
-       if (ftruncate(tdb->fd, new_size) == -1) {
+       if (tdb_ftruncate(tdb, new_size) == -1) {
                char b = 0;
-               ssize_t written = pwrite(tdb->fd,  &b, 1, new_size - 1);
+               ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1);
                if (written == 0) {
                        /* try once more, potentially revealing errno */
-                       written = pwrite(tdb->fd,  &b, 1, new_size - 1);
+                       written = tdb_pwrite(tdb, &b, 1, new_size - 1);
                }
                if (written == 0) {
                        /* again - give up, guessing errno */
@@ -328,10 +397,10 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
        memset(buf, TDB_PAD_BYTE, sizeof(buf));
        while (addition) {
                size_t n = addition>sizeof(buf)?sizeof(buf):addition;
-               ssize_t written = pwrite(tdb->fd, buf, n, size);
+               ssize_t written = tdb_pwrite(tdb, buf, n, size);
                if (written == 0) {
                        /* prevent infinite loops: try _once_ more */
-                       written = pwrite(tdb->fd, buf, n, size);
+                       written = tdb_pwrite(tdb, buf, n, size);
                }
                if (written == 0) {
                        /* give up, trying to provide a useful errno */
@@ -437,6 +506,14 @@ int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
        /* must know about any previous expansions by another process */
        tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
 
+       /*
+        * Note: that we don't care about tdb->hdr_ofs != 0 here
+        *
+        * The 4GB limitation is just related to tdb->map_size
+        * and the offset calculation in the records.
+        *
+        * The file on disk can be up to 4GB + tdb->hdr_ofs
+        */
        size = tdb_expand_adjust(tdb->map_size, size, tdb->page_size);
 
        if (!tdb_add_off_t(tdb->map_size, size, &new_size)) {
index 17ab0b7c2856372a6c7b9966e414abf0f44c2d3e..162f30d40477d59debd5eff9ba13a5c05eb21661 100644 (file)
@@ -194,6 +194,7 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
        unsigned v;
        const char *hash_alg;
        uint32_t magic1, magic2;
+       int ret;
 
        ZERO_STRUCT(header);
 
@@ -340,7 +341,6 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
        if ((tdb_flags & TDB_CLEAR_IF_FIRST) &&
            (!tdb->read_only) &&
            (locked = (tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE) == 0))) {
-               int ret;
                ret = tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0,
                                 TDB_LOCK_WAIT);
                if (ret == -1) {
@@ -400,8 +400,18 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
                tdb->flags |= TDB_CONVERT;
                tdb_convert(&header, sizeof(header));
        }
-       if (fstat(tdb->fd, &st) == -1)
+
+       /*
+        * We only use st.st_dev and st.st_ino from the raw fstat()
+        * call, everything else needs to use tdb_fstat() in order
+        * to skip tdb->hdr_ofs!
+        */
+       if (fstat(tdb->fd, &st) == -1) {
                goto fail;
+       }
+       tdb->device = st.st_dev;
+       tdb->inode = st.st_ino;
+       ZERO_STRUCT(st);
 
        if (header.rwlocks != 0 &&
            header.rwlocks != TDB_FEATURE_FLAG_MAGIC &&
@@ -446,28 +456,27 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
        }
 
        /* Is it already in the open list?  If so, fail. */
-       if (tdb_already_open(st.st_dev, st.st_ino)) {
+       if (tdb_already_open(tdb->device, tdb->inode)) {
                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
                         "%s (%d,%d) is already open in this process\n",
-                        name, (int)st.st_dev, (int)st.st_ino));
+                        name, (int)tdb->device, (int)tdb->inode));
                errno = EBUSY;
                goto fail;
        }
 
-       /* Beware truncation! */
-       tdb->map_size = st.st_size;
-       if (tdb->map_size != st.st_size) {
-               /* Ensure ecode is set for log fn. */
-               tdb->ecode = TDB_ERR_IO;
-               TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
-                        "len %llu too large!\n", (long long)st.st_size));
+       /*
+        * We had tdb_mmap(tdb) here before,
+        * but we need to use tdb_fstat(),
+        * which is triggered from tdb_oob() before calling tdb_mmap().
+        * As this skips tdb->hdr_ofs.
+        */
+       tdb->map_size = 0;
+       ret = tdb->methods->tdb_oob(tdb, 0, 1, 0);
+       if (ret == -1) {
                errno = EIO;
                goto fail;
        }
 
-       tdb->device = st.st_dev;
-       tdb->inode = st.st_ino;
-       tdb_mmap(tdb);
        if (locked) {
                if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) {
                        TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
@@ -649,6 +658,11 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock)
                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: open failed (%s)\n", strerror(errno)));
                goto fail;
        }
+       /*
+        * We only use st.st_dev and st.st_ino from the raw fstat()
+        * call, everything else needs to use tdb_fstat() in order
+        * to skip tdb->hdr_ofs!
+        */
        if (fstat(tdb->fd, &st) != 0) {
                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: fstat failed (%s)\n", strerror(errno)));
                goto fail;
@@ -657,7 +671,16 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock)
                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: file dev/inode has changed!\n"));
                goto fail;
        }
-       if (tdb_mmap(tdb) != 0) {
+       ZERO_STRUCT(st);
+
+       /*
+        * We had tdb_mmap(tdb) here before,
+        * but we need to use tdb_fstat(),
+        * which is triggered from tdb_oob() before calling tdb_mmap().
+        * As this skips tdb->hdr_ofs.
+        */
+       tdb->map_size = 0;
+       if (tdb->methods->tdb_oob(tdb, 0, 1, 0) != 0) {
                goto fail;
        }
 #endif /* fake pread or pwrite */
index 6f2e0a9e80a4607aa0eafb4801c3e9aa22d0625e..e9989f676f78b83834fe10cdbd9444679b67f3ac 100644 (file)
@@ -18,7 +18,8 @@
 #include "tdb_private.h"
 
 #define SUMMARY_FORMAT \
-       "Size of file/data: %u/%zu\n" \
+       "Size of file/data: %llu/%zu\n" \
+       "Header offset/logical size: %zu/%zu\n" \
        "Number of records: %zu\n" \
        "Incompatible hash: %s\n" \
        "Active/supported feature flags: 0x%08x/0x%08x\n" \
@@ -88,6 +89,7 @@ static size_t get_hash_length(struct tdb_context *tdb, unsigned int i)
 
 _PUBLIC_ char *tdb_summary(struct tdb_context *tdb)
 {
+       off_t file_size;
        tdb_off_t off, rec_off;
        struct tally freet, keys, data, dead, extra, hashval, uncoal;
        struct tdb_record rec;
@@ -165,9 +167,11 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb)
        for (off = 0; off < tdb->hash_size; off++)
                tally_add(&hashval, get_hash_length(tdb, off));
 
+       file_size = tdb->hdr_ofs + tdb->map_size;
 
        len = asprintf(&ret, SUMMARY_FORMAT,
-                tdb->map_size, keys.total+data.total,
+                (unsigned long long)file_size, keys.total+data.total,
+                (size_t)tdb->hdr_ofs, (size_t)tdb->map_size,
                 keys.num,
                 (tdb->hash_fn == tdb_jenkins_hash)?"yes":"no",
                 (unsigned)tdb->feature_flags, TDB_SUPPORTED_FEATURE_FLAGS,
@@ -182,16 +186,16 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb)
                 hashval.min, tally_mean(&hashval), hashval.max,
                 uncoal.total,
                 uncoal.min, tally_mean(&uncoal), uncoal.max,
-                keys.total * 100.0 / tdb->map_size,
-                data.total * 100.0 / tdb->map_size,
-                extra.total * 100.0 / tdb->map_size,
-                freet.total * 100.0 / tdb->map_size,
-                dead.total * 100.0 / tdb->map_size,
+                keys.total * 100.0 / file_size,
+                data.total * 100.0 / file_size,
+                extra.total * 100.0 / file_size,
+                freet.total * 100.0 / file_size,
+                dead.total * 100.0 / file_size,
                 (keys.num + freet.num + dead.num)
                 * (sizeof(struct tdb_record) + sizeof(uint32_t))
-                * 100.0 / tdb->map_size,
+                * 100.0 / file_size,
                 tdb->hash_size * sizeof(tdb_off_t)
-                * 100.0 / tdb->map_size);
+                * 100.0 / file_size);
        if (len == -1) {
                goto unlock;
        }
index aa9dd55ba47a26c4ce845c8fe4618f8ea3e1fa9a..4981e2cd6ac613332c8e9ea520dd32957a8b4895 100644 (file)
@@ -202,6 +202,9 @@ struct tdb_context {
        int num_lockrecs;
        struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */
        int lockrecs_array_length;
+
+       tdb_off_t hdr_ofs; /* this is 0 for now */
+
        enum TDB_ERROR ecode; /* error code for last tdb error */
        uint32_t hash_size;
        uint32_t feature_flags;
index 67fd54f54fd25bee7c9dcedccf32cf0acc4dab0e..900b1a667a2deaded503cecfcf42790d336731f9 100644 (file)
@@ -22,12 +22,12 @@ static int tdb_expand_file_sparse(struct tdb_context *tdb,
                return -1;
        }
 
-       if (ftruncate(tdb->fd, size+addition) == -1) {
+       if (tdb_ftruncate(tdb, size+addition) == -1) {
                char b = 0;
-               ssize_t written = pwrite(tdb->fd,  &b, 1, (size+addition) - 1);
+               ssize_t written = tdb_pwrite(tdb,  &b, 1, (size+addition) - 1);
                if (written == 0) {
                        /* try once more, potentially revealing errno */
-                       written = pwrite(tdb->fd,  &b, 1, (size+addition) - 1);
+                       written = tdb_pwrite(tdb,  &b, 1, (size+addition) - 1);
                }
                if (written == 0) {
                        /* again - give up, guessing errno */