Add a vfs_preopen module to hide fs latencies
authorVolker Lendecke <vl@samba.org>
Tue, 10 Mar 2009 17:02:21 +0000 (18:02 +0100)
committerVolker Lendecke <vl@samba.org>
Tue, 10 Mar 2009 17:11:56 +0000 (18:11 +0100)
docs-xml/manpages-3/vfs_preopen.8.xml [new file with mode: 0644]
source3/Makefile.in
source3/configure.in
source3/modules/vfs_preopen.c [new file with mode: 0644]

diff --git a/docs-xml/manpages-3/vfs_preopen.8.xml b/docs-xml/manpages-3/vfs_preopen.8.xml
new file mode 100644 (file)
index 0000000..a84d472
--- /dev/null
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!DOCTYPE refentry PUBLIC "-//Samba-Team//DTD DocBook V4.2-Based Variant V1.0//EN" "http://www.samba.org/samba/DTD/samba-doc">
+<refentry id="vfs_preopen.8">
+
+<refmeta>
+       <refentrytitle>vfs_preopen</refentrytitle>
+       <manvolnum>8</manvolnum>
+       <refmiscinfo class="source">Samba</refmiscinfo>
+       <refmiscinfo class="manual">System Administration tools</refmiscinfo>
+       <refmiscinfo class="version">3.3</refmiscinfo>
+</refmeta>
+
+<refnamediv>
+       <refname>vfs_preopen</refname>
+       <refpurpose>Hide read latencies for applications reading numbered files</refpurpose>
+</refnamediv>
+
+<refsynopsisdiv>
+       <cmdsynopsis>
+               <command>vfs objects = preopen</command>
+       </cmdsynopsis>
+</refsynopsisdiv>
+
+<refsect1>
+       <title>DESCRIPTION</title>
+
+       <para>This VFS module is part of the
+       <citerefentry><refentrytitle>samba</refentrytitle>
+       <manvolnum>7</manvolnum></citerefentry> suite.</para>
+
+       <para>This module assists applications that want to read numbered
+       files in sequence with very strict latency requirements. One area
+       where this happens in video streaming applications that want to read
+       one file per frame.</para>
+
+       <para>When you use this module, a number of helper processes is
+       started that speculatively open files and read a number of bytes to
+       prime the file system cache, so that later on when the real
+       application's request comes along, no disk access is necessary.</para>
+
+       <para>This module is stackable.</para>
+
+</refsect1>
+
+
+<refsect1>
+       <title>OPTIONS</title>
+
+       <variablelist>
+
+               <varlistentry>
+               <term>preopen:names = /pattern/</term>
+               <listitem>
+               <para>
+               preopen:names specifies the file name pattern which should
+               trigger the preopen helpers to do their work. We assume that
+               the files are numbered incrementally. So if your file names
+               are numbered FRAME00000.frm FRAME00001.frm and so on you would
+               list them as <command>preopen:names=/FRAME*.frm/</command>
+               </para>
+               </listitem>
+               </varlistentry>
+
+               <varlistentry>
+               <term>preopen:num_bytes = BYTES</term>
+               <listitem>
+               <para>
+               Specifies the number of bytes the helpers should speculatively
+               read, defaults to 1.
+               </para>
+               </listitem>
+               </varlistentry>
+
+               <varlistentry>
+               <term>preopen:helpers = NUM-PROCS</term>
+               <listitem>
+               <para>
+               Number of forked helper processes, defaults to 1.
+               </para>
+               </listitem>
+               </varlistentry>
+
+               <varlistentry>
+               <term>preopen:queuelen = NUM-FILES</term>
+               <listitem>
+               <para>
+               Number of files that should be speculatively opened. Defaults
+               to the 10 subsequent files.
+               </para>
+               </listitem>
+               </varlistentry>
+
+       </variablelist>
+</refsect1>
+
+<refsect1>
+       <title>VERSION</title>
+       <para>This man page is correct for version 3.3 of the Samba suite.
+       </para>
+</refsect1>
+
+<refsect1>
+       <title>AUTHOR</title>
+
+       <para>The original Samba software and related utilities
+       were created by Andrew Tridgell. Samba is now developed
+       by the Samba Team as an Open Source project similar
+       to the way the Linux kernel is developed.</para>
+
+       <para>The PREOPEN VFS module was created with contributions from
+       Volker Lendecke and the developers at IBM.
+       </para>
+</refsect1>
+
+</refentry>
index 6aabcf0c8d215ded7559fa0dc52627f12da3290d..76fd91a31ee1b52b6f856cb9c06a5de41c56aa0a 100644 (file)
@@ -667,6 +667,7 @@ VFS_READAHEAD_OBJ = modules/vfs_readahead.o
 VFS_TSMSM_OBJ = modules/vfs_tsmsm.o
 VFS_FILEID_OBJ = modules/vfs_fileid.o
 VFS_AIO_FORK_OBJ = modules/vfs_aio_fork.o
+VFS_PREOPEN_OBJ = modules/vfs_preopen.o
 VFS_SYNCOPS_OBJ = modules/vfs_syncops.o
 VFS_ACL_XATTR_OBJ = modules/vfs_acl_xattr.o
 VFS_ACL_TDB_OBJ = modules/vfs_acl_tdb.o
@@ -2567,6 +2568,10 @@ bin/aio_fork.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_AIO_FORK_OBJ)
        @echo "Building plugin $@"
        @$(SHLD_MODULE) $(VFS_AIO_FORK_OBJ)
 
+bin/preopen.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_PREOPEN_OBJ)
+       @echo "Building plugin $@"
+       @$(SHLD_MODULE) $(VFS_PREOPEN_OBJ)
+
 bin/acl_xattr.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_ACL_XATTR_OBJ)
        @echo "Building plugin $@"
        @$(SHLD_MODULE) $(VFS_ACL_XATTR_OBJ)
index e48ff345540782ffe2dc9201757ca4b2ed4c335d..2af1545d58164f9cd1c11446660dfa2b84c3f240 100644 (file)
@@ -417,7 +417,7 @@ dnl These have to be built static:
 default_static_modules="pdb_smbpasswd pdb_tdbsam pdb_wbc_sam rpc_lsarpc rpc_samr rpc_winreg rpc_initshutdown rpc_dssetup rpc_wkssvc rpc_svcctl rpc_ntsvcs rpc_netlogon rpc_netdfs rpc_srvsvc rpc_spoolss2 rpc_eventlog auth_sam auth_unix auth_winbind auth_wbc auth_server auth_domain auth_builtin auth_netlogond vfs_default nss_info_template"
 
 dnl These are preferably build shared, and static if dlopen() is not available
-default_shared_modules="vfs_recycle vfs_audit vfs_extd_audit vfs_full_audit vfs_netatalk vfs_fake_perms vfs_default_quota vfs_readonly vfs_cap vfs_expand_msdfs vfs_shadow_copy vfs_shadow_copy2 charset_CP850 charset_CP437 auth_script vfs_readahead vfs_xattr_tdb vfs_streams_xattr vfs_streams_depot vfs_acl_xattr vfs_acl_tdb vfs_smb_traffic_analyzer"
+default_shared_modules="vfs_recycle vfs_audit vfs_extd_audit vfs_full_audit vfs_netatalk vfs_fake_perms vfs_default_quota vfs_readonly vfs_cap vfs_expand_msdfs vfs_shadow_copy vfs_shadow_copy2 charset_CP850 charset_CP437 auth_script vfs_readahead vfs_xattr_tdb vfs_streams_xattr vfs_streams_depot vfs_acl_xattr vfs_acl_tdb vfs_smb_traffic_analyzer vfs_preopen"
 
 if test "x$developer" = xyes; then
    default_static_modules="$default_static_modules rpc_rpcecho"
@@ -6185,6 +6185,7 @@ SMB_MODULE(vfs_readahead, \$(VFS_READAHEAD_OBJ), "bin/readahead.$SHLIBEXT", VFS)
 SMB_MODULE(vfs_tsmsm, \$(VFS_TSMSM_OBJ), "bin/tsmsm.$SHLIBEXT", VFS)
 SMB_MODULE(vfs_fileid, \$(VFS_FILEID_OBJ), "bin/fileid.$SHLIBEXT", VFS)
 SMB_MODULE(vfs_aio_fork, \$(VFS_AIO_FORK_OBJ), "bin/aio_fork.$SHLIBEXT", VFS)
+SMB_MODULE(vfs_preopen, \$(VFS_PREOPEN_OBJ), "bin/preopen.$SHLIBEXT", VFS)
 SMB_MODULE(vfs_syncops, \$(VFS_SYNCOPS_OBJ), "bin/syncops.$SHLIBEXT", VFS)
 SMB_MODULE(vfs_zfsacl, \$(VFS_ZFSACL_OBJ), "bin/zfsacl.$SHLIBEXT", VFS)
 SMB_MODULE(vfs_notify_fam, \$(VFS_NOTIFY_FAM_OBJ), "bin/notify_fam.$SHLIBEXT", VFS)
diff --git a/source3/modules/vfs_preopen.c b/source3/modules/vfs_preopen.c
new file mode 100644 (file)
index 0000000..25b9e7f
--- /dev/null
@@ -0,0 +1,456 @@
+/*
+ * Force a readahead of files by opening them and reading the first bytes
+ *
+ * Copyright (C) Volker Lendecke 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "includes.h"
+
+struct preopen_state;
+
+struct preopen_helper {
+       struct preopen_state *state;
+       struct fd_event *fde;
+       pid_t pid;
+       int fd;
+       bool busy;
+};
+
+struct preopen_state {
+       int num_helpers;
+       struct preopen_helper *helpers;
+
+       size_t to_read;         /* How many bytes to read in children? */
+       int queue_max;
+
+       char *template_fname;   /* Filename to be sent to children */
+       size_t number_start;    /* start offset into "template_fname" */
+       int num_digits;         /* How many digits is the number long? */
+
+       int fnum_sent;          /* last fname sent to children */
+
+       int fnum_queue_end;     /* last fname to be sent, based on
+                                * last open call + preopen:queuelen
+                                */
+
+       name_compare_entry *preopen_names;
+};
+
+static void preopen_helper_destroy(struct preopen_helper *c)
+{
+       int status;
+       close(c->fd);
+       c->fd = -1;
+       kill(c->pid, SIGKILL);
+       waitpid(c->pid, &status, 0);
+       c->busy = true;
+}
+
+static void preopen_queue_run(struct preopen_state *state)
+{
+       char *pdelimiter;
+       char delimiter;
+
+       pdelimiter = state->template_fname + state->number_start
+               + state->num_digits;
+       delimiter = *pdelimiter;
+
+       while (state->fnum_sent < state->fnum_queue_end) {
+
+               ssize_t written;
+               size_t to_write;
+               int helper;
+
+               for (helper=0; helper<state->num_helpers; helper++) {
+                       if (state->helpers[helper].busy) {
+                               continue;
+                       }
+                       break;
+               }
+               if (helper == state->num_helpers) {
+                       /* everyone is busy */
+                       return;
+               }
+
+               snprintf(state->template_fname + state->number_start,
+                        state->num_digits + 1,
+                        "%.*lu", state->num_digits,
+                        (long unsigned int)(state->fnum_sent + 1));
+               *pdelimiter = delimiter;
+
+               to_write = talloc_get_size(state->template_fname);
+               written = write_data(state->helpers[helper].fd,
+                                    state->template_fname, to_write);
+               state->helpers[helper].busy = true;
+
+               if (written != to_write) {
+                       preopen_helper_destroy(&state->helpers[helper]);
+               }
+               state->fnum_sent += 1;
+       }
+}
+
+static void preopen_helper_readable(struct event_context *ev,
+                                   struct fd_event *fde, uint16_t flags,
+                                   void *priv)
+{
+       struct preopen_helper *helper = (struct preopen_helper *)priv;
+       struct preopen_state *state = helper->state;
+       ssize_t nread;
+       char c;
+
+       if ((flags & EVENT_FD_READ) == 0) {
+               return;
+       }
+
+       nread = read(helper->fd, &c, 1);
+       if (nread <= 0) {
+               preopen_helper_destroy(helper);
+               return;
+       }
+
+       helper->busy = false;
+
+       preopen_queue_run(state);
+}
+
+static int preopen_helpers_destructor(struct preopen_state *c)
+{
+       int i;
+
+       for (i=0; i<c->num_helpers; i++) {
+               if (c->helpers[i].fd == -1) {
+                       continue;
+               }
+               preopen_helper_destroy(&c->helpers[i]);
+       }
+
+       return 0;
+}
+
+static bool preopen_helper_open_one(int sock_fd, char **pnamebuf,
+                                   size_t to_read, void *filebuf)
+{
+       char *namebuf = *pnamebuf;
+       ssize_t nwritten, nread;
+       char c = 0;
+       int fd;
+
+       nread = 0;
+
+       while ((nread == 0) || (namebuf[nread-1] != '\0')) {
+               ssize_t thistime;
+
+               thistime = read(sock_fd, namebuf + nread,
+                               talloc_get_size(namebuf) - nread);
+               if (thistime <= 0) {
+                       return false;
+               }
+
+               nread += thistime;
+
+               if (nread == talloc_get_size(namebuf)) {
+                       namebuf = TALLOC_REALLOC_ARRAY(
+                               NULL, namebuf, char,
+                               talloc_get_size(namebuf) * 2);
+                       if (namebuf == NULL) {
+                               return false;
+                       }
+                       *pnamebuf = namebuf;
+               }
+       }
+
+       fd = open(namebuf, O_RDONLY);
+       if (fd == -1) {
+               goto done;
+       }
+       nread = read(fd, filebuf, to_read);
+       close(fd);
+
+ done:
+       nwritten = write(sock_fd, &c, 1);
+       return true;
+}
+
+static bool preopen_helper(int fd, size_t to_read)
+{
+       char *namebuf;
+       void *readbuf;
+
+       namebuf = TALLOC_ARRAY(NULL, char, 1024);
+       if (namebuf == NULL) {
+               return false;
+       }
+
+       readbuf = talloc_size(NULL, to_read);
+       if (readbuf == NULL) {
+               TALLOC_FREE(namebuf);
+               return false;
+       }
+
+       while (preopen_helper_open_one(fd, &namebuf, to_read, readbuf)) {
+               ;
+       }
+
+       TALLOC_FREE(readbuf);
+       TALLOC_FREE(namebuf);
+       return false;
+}
+
+static NTSTATUS preopen_init_helper(struct preopen_helper *h)
+{
+       int fdpair[2];
+       NTSTATUS status;
+
+       if (socketpair(AF_UNIX, SOCK_STREAM, 0, fdpair) == -1) {
+               status = map_nt_error_from_unix(errno);
+               DEBUG(10, ("socketpair() failed: %s\n", strerror(errno)));
+               return status;
+       }
+
+       h->pid = sys_fork();
+
+       if (h->pid == -1) {
+               return map_nt_error_from_unix(errno);
+       }
+
+       if (h->pid == 0) {
+               close(fdpair[0]);
+               preopen_helper(fdpair[1], h->state->to_read);
+               exit(0);
+       }
+       close(fdpair[1]);
+       h->fd = fdpair[0];
+       h->fde = event_add_fd(smbd_event_context(), h->state, h->fd,
+                             EVENT_FD_READ, preopen_helper_readable, h);
+       if (h->fde == NULL) {
+               close(h->fd);
+               h->fd = -1;
+               return NT_STATUS_NO_MEMORY;
+       }
+       h->busy = false;
+       return NT_STATUS_OK;
+}
+
+static NTSTATUS preopen_init_helpers(TALLOC_CTX *mem_ctx, size_t to_read,
+                                    int num_helpers, int queue_max,
+                                    struct preopen_state **presult)
+{
+       struct preopen_state *result;
+       int i;
+
+       result = talloc(mem_ctx, struct preopen_state);
+       if (result == NULL) {
+               return NT_STATUS_NO_MEMORY;
+       }
+
+       result->num_helpers = num_helpers;
+       result->helpers = TALLOC_ARRAY(result, struct preopen_helper,
+                                      num_helpers);
+       if (result->helpers == NULL) {
+               TALLOC_FREE(result);
+               return NT_STATUS_NO_MEMORY;
+       }
+
+       result->to_read = to_read;
+       result->queue_max = queue_max;
+       result->template_fname = NULL;
+       result->fnum_sent = 0;
+
+       for (i=0; i<num_helpers; i++) {
+               result->helpers[i].state = result;
+               result->helpers[i].fd = -1;
+       }
+
+       talloc_set_destructor(result, preopen_helpers_destructor);
+
+       for (i=0; i<num_helpers; i++) {
+               preopen_init_helper(&result->helpers[i]);
+       }
+
+       *presult = result;
+       return NT_STATUS_OK;
+}
+
+static void preopen_free_helpers(void **ptr)
+{
+       TALLOC_FREE(*ptr);
+}
+
+static struct preopen_state *preopen_state_get(vfs_handle_struct *handle)
+{
+       struct preopen_state *state;
+       NTSTATUS status;
+       const char *namelist;
+
+       if (SMB_VFS_HANDLE_TEST_DATA(handle)) {
+               SMB_VFS_HANDLE_GET_DATA(handle, state, struct preopen_state,
+                                       return NULL);
+               return state;
+       }
+
+       namelist = lp_parm_const_string(SNUM(handle->conn), "preopen", "names",
+                                       NULL);
+
+       if (namelist == NULL) {
+               return NULL;
+       }
+
+       status = preopen_init_helpers(
+               NULL,
+               lp_parm_int(SNUM(handle->conn), "preopen", "num_bytes", 1),
+               lp_parm_int(SNUM(handle->conn), "preopen", "helpers", 1),
+               lp_parm_int(SNUM(handle->conn), "preopen", "queuelen", 10),
+               &state);
+       if (!NT_STATUS_IS_OK(status)) {
+               return NULL;
+       }
+
+       set_namearray(&state->preopen_names, (char *)namelist);
+
+       if (state->preopen_names == NULL) {
+               TALLOC_FREE(state);
+               return NULL;
+       }
+
+       if (!SMB_VFS_HANDLE_TEST_DATA(handle)) {
+               SMB_VFS_HANDLE_SET_DATA(handle, state, preopen_free_helpers,
+                                       struct preopen_state, return NULL);
+       }
+
+       return state;
+}
+
+static bool preopen_parse_fname(const char *fname, unsigned long *pnum,
+                               size_t *pstart_idx, int *pnum_digits)
+{
+       const char *p, *q;
+       unsigned long num;
+
+       p = strrchr_m(fname, '/');
+       if (p == NULL) {
+               p = fname;
+       }
+
+       p += 1;
+       while (p[0] != '\0') {
+               if (isdigit(p[0]) && isdigit(p[1]) && isdigit(p[2])) {
+                       break;
+               }
+               p += 1;
+       }
+       if (*p == '\0') {
+               /* no digits around */
+               return false;
+       }
+
+       num = strtoul(p, (char **)&q, 10);
+
+       if (num+1 < num) {
+               /* overflow */
+               return false;
+       }
+
+       *pnum = num;
+       *pstart_idx = (p - fname);
+       *pnum_digits = (q - p);
+       return true;
+}
+
+static int preopen_open(vfs_handle_struct *handle, const char *fname,
+                       files_struct *fsp, int flags, mode_t mode)
+{
+       struct preopen_state *state;
+       int res;
+       unsigned long num;
+
+       DEBUG(10, ("preopen_open called on %s\n", fname));
+
+       state = preopen_state_get(handle);
+       if (state == NULL) {
+               return SMB_VFS_NEXT_OPEN(handle, fname, fsp, flags, mode);
+       }
+
+       res = SMB_VFS_NEXT_OPEN(handle, fname, fsp, flags, mode);
+       if (res == -1) {
+               return -1;
+       }
+
+       if (flags != O_RDONLY) {
+               return res;
+       }
+
+       if (!is_in_path(fname, state->preopen_names, true)) {
+               DEBUG(10, ("%s does not match the preopen:names list\n",
+                          fname));
+               return res;
+       }
+
+       TALLOC_FREE(state->template_fname);
+       state->template_fname = talloc_asprintf(
+               state, "%s/%s", fsp->conn->connectpath, fname);
+
+       if (state->template_fname == NULL) {
+               return res;
+       }
+
+       if (!preopen_parse_fname(state->template_fname, &num,
+                                &state->number_start, &state->num_digits)) {
+               TALLOC_FREE(state->template_fname);
+               return res;
+       }
+
+       if (num > state->fnum_sent) {
+               /*
+                * Helpers were too slow, there's no point in reading
+                * files in helpers that we already read in the
+                * parent.
+                */
+               state->fnum_sent = num;
+       }
+
+       if ((state->fnum_queue_end != 0) /* Something was started earlier */
+           && (num < (state->fnum_queue_end - state->queue_max))) {
+               /*
+                * "num" is before the queue we announced. This means
+                * a new run is started.
+                */
+               state->fnum_sent = num;
+       }
+
+       state->fnum_queue_end = num + state->queue_max;
+
+       preopen_queue_run(state);
+
+       return res;
+}
+
+/* VFS operations structure */
+
+static vfs_op_tuple preopen_ops[] = {
+       {SMB_VFS_OP(preopen_open),      SMB_VFS_OP_OPEN,
+        SMB_VFS_LAYER_TRANSPARENT},
+       {SMB_VFS_OP(NULL),              SMB_VFS_OP_NOOP,
+        SMB_VFS_LAYER_NOOP}
+};
+
+NTSTATUS vfs_preopen_init(void);
+NTSTATUS vfs_preopen_init(void)
+{
+       return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
+                               "preopen", preopen_ops);
+}