tools: Finds missing files in gluster volume given backend brickpath
authorKotresh HR <khiremat@redhat.com>
Thu, 29 Jan 2015 10:23:19 +0000 (15:53 +0530)
committerVijay Bellur <vbellur@redhat.com>
Mon, 16 Mar 2015 04:20:03 +0000 (21:20 -0700)
The tool finds the missing files in a geo-replication slave volume.
The tool crawls backend .glusterfs of the brickpath, which is passed
as a parameter and stats each entry on slave volume mount to check
the presence of file. The mount used is aux-gfid-mount, hence no path
conversion is required and is fast. The tool needs to be run on every
node in cluster for each brickpath of geo-rep master volume to find
missing files on slave volume. The tool is generic enough and can be
used in non geo-replication context as well.

Most of the crawler code is leverged from Avati's xfind and is modified
to crawl only .glusterfs (https://github.com/avati/xsync)

Thanks Aravinda for scripts to convert gfid to path.

Change-Id: I84deaaaf638f7c571ff1319b67a3440fe27da810
BUG: 1187140
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Signed-off-by: Kotresh HR <khiremat@redhat.com>
Reviewed-on: http://review.gluster.org/9503
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Makefile.am
configure.ac
doc/tools/gfind_missing_files.md [new file with mode: 0644]
glusterfs.spec.in
tools/Makefile.am [new file with mode: 0644]
tools/gfind_missing_files/Makefile.am [new file with mode: 0644]
tools/gfind_missing_files/gcrawler.c [new file with mode: 0644]
tools/gfind_missing_files/gfid_to_path.py [new file with mode: 0644]
tools/gfind_missing_files/gfid_to_path.sh [new file with mode: 0644]
tools/gfind_missing_files/gfind_missing_files.sh [new file with mode: 0644]

index a35b1642143b05d33eaffca9fd7e8e0b7a345b23..60a8d36131dc6ddfa42ba30558a88e56d9f45dda 100644 (file)
@@ -11,7 +11,7 @@ EXTRA_DIST = autogen.sh \
 
 SUBDIRS = $(ARGP_STANDALONE_DIR) libglusterfs rpc api xlators glusterfsd \
        $(FUSERMOUNT_SUBDIR) doc extras cli heal @SYNCDAEMON_SUBDIR@ \
-       @UMOUNTD_SUBDIR@
+       @UMOUNTD_SUBDIR@ tools
 
 pkgconfigdir = @pkgconfigdir@
 pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc
index 0d2ee210539e2299ab0410d0d32f329093b9fdd0..1a513975d9219e683d9140c04d8c3120635db47b 100644 (file)
@@ -211,6 +211,8 @@ AC_CONFIG_FILES([Makefile
                 geo-replication/Makefile
                 geo-replication/src/Makefile
                 geo-replication/syncdaemon/Makefile
+                tools/Makefile
+                tools/gfind_missing_files/Makefile
                 heal/Makefile
                 heal/src/Makefile
                 glusterfs.spec])
diff --git a/doc/tools/gfind_missing_files.md b/doc/tools/gfind_missing_files.md
new file mode 100644 (file)
index 0000000..47241be
--- /dev/null
@@ -0,0 +1,67 @@
+Introduction
+========
+The tool gfind_missing_files.sh can be used to find the missing files in a
+GlusterFS geo-replicated slave volume. The tool uses a multi-threaded crawler
+operating on the backend .glusterfs of a brickpath which is passed as one of
+the parameters to the tool. It does a stat on each entry in the slave volume
+mount to check for the presence of a file. The tool uses the aux-gfid-mount
+thereby avoiding path conversions and potentially saving time.
+
+This tool should be run on every node and each brickpath in a geo-replicated
+master volume to find the missing files on the slave volume.
+
+The script gfind_missing_files.sh is a wrapper script that in turn uses the
+gcrawler binary to do the backend crawling. The script detects the gfids of
+the missing files and runs the gfid-to-path conversion script to list out the
+missing files with their full pathnames.
+
+Usage
+=====
+```sh
+$bash gfind_missing_files.sh <BRICK_PATH> <SLAVE_HOST> <SLAVE_VOL> <OUTFILE>
+            BRICK_PATH -   Full path of the brick
+            SLAVE_HOST -   Hostname of gluster volume
+            SLAVE_VOL  -   Gluster volume name
+            OUTFILE   -    Output file which contains gfids of the missing files
+```
+
+The gfid-to-path conversion uses a quicker algorithm for converting gfids to
+paths and it is possible that in some cases all missing gfids may not be
+converted to their respective paths.
+
+Example output(126733 missing files)
+===================================
+```sh
+$ionice -c 2 -n 7 ./gfind_missing_files.sh /bricks/m3 acdc slave-vol ~/test_results/m3-4.txt
+Calling crawler...
+Crawl Complete.
+gfids of skipped files are available in the file /root/test_results/m3-4.txt
+Starting gfid to path conversion
+Path names of skipped files are available in the file /root/test_results/m3-4.txt_pathnames
+WARNING: Unable to convert some GFIDs to Paths, GFIDs logged to /root/test_results/m3-4.txt_gfids
+Use bash gfid_to_path.sh <brick-path> /root/test_results/m3-4.txt_gfids to convert those GFIDs to Path
+Total Missing File Count : 126733
+```
+In such cases, an additional step is needed to convert those gfids to paths.
+This can be used as shown below:
+```sh
+ $bash gfid_to_path.sh <BRICK_PATH> <GFID_FILE>
+             BRICK_PATH - Full path of the brick.
+             GFID_FILE  - OUTFILE_gfids got from gfind_missing_files.sh
+```
+Things to keep in mind when running the tool
+============================================
+1. Running this tool can result in a crawl of the backend filesystem at each
+   brick which can be intensive. To ensure there is no impact on ongoing I/O on
+   RHS volumes, we recommend that this tool be run at a low I/O scheduling class
+   (best-effort) and priority.
+```sh
+$ionice -c 2 -p <pid of gfind_missing_files.sh>
+```
+
+2. We do not recommend interrupting the tool when it is running
+   (e.g. by doing CTRL^C). It is better to wait for the tool to finish
+    execution. In case it is interupted, manually unmount the Slave Volume.
+```sh
+    umount <MOUNT_POINT>
+```
index 1b31f51ed7864811897952cc352e3151353ec29a..affb753cb673bdbd4d925f5fc9e7391cc2812dce 100644 (file)
@@ -955,6 +955,10 @@ fi
 %{_datadir}/glusterfs/scripts/generate-gfid-file.sh
 %{_datadir}/glusterfs/scripts/gsync-sync-gfid
 %ghost %attr(0644,-,-) %{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
+%{_libexecdir}/glusterfs/gfind_missing_files
+%{_sbindir}/gfind_missing_files
+%exclude %{_libexecdir}/glusterfs/gfind_missing_files/gfid_to_path.pyo
+%exclude %{_libexecdir}/glusterfs/gfind_missing_files/gfid_to_path.pyc
 %endif
 
 %files libs
@@ -1049,6 +1053,9 @@ fi
 %ghost      %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
 
 %changelog
+* Thu Mar 12 2015 Kotresh H R <khiremat@redhat.com>
+- gfind_missing_files tool is included (#1187140)
+
 * Thu Feb 26 2015 Kaleb S. KEITHLEY <kkeithle@redhat.com>
 - enable cmocka unittest support only when asked for (#1067059)
 
diff --git a/tools/Makefile.am b/tools/Makefile.am
new file mode 100644 (file)
index 0000000..74229ab
--- /dev/null
@@ -0,0 +1,3 @@
+SUBDIRS = gfind_missing_files
+
+CLEANFILES =
diff --git a/tools/gfind_missing_files/Makefile.am b/tools/gfind_missing_files/Makefile.am
new file mode 100644 (file)
index 0000000..456aad8
--- /dev/null
@@ -0,0 +1,24 @@
+gfindmissingfilesdir = $(libexecdir)/glusterfs/gfind_missing_files
+
+gfindmissingfiles_SCRIPTS = gfind_missing_files.sh gfid_to_path.sh \
+       gfid_to_path.py
+
+EXTRA_DIST = gfind_missing_files.sh gfid_to_path.sh \
+       gfid_to_path.py
+
+gfindmissingfiles_PROGRAMS = gcrawler
+
+gcrawler_SOURCES = gcrawler.c
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+uninstall-local:
+       rm -f $(DESTDIR)$(sbindir)/gfind_missing_files
+
+install-data-local:
+       rm -f $(DESTDIR)$(sbindir)/gfind_missing_files
+       ln -s $(libexecdir)/glusterfs/gfind_missing_files/gfind_missing_files.sh $(DESTDIR)$(sbindir)/gfind_missing_files
+
+CLEANFILES =
diff --git a/tools/gfind_missing_files/gcrawler.c b/tools/gfind_missing_files/gcrawler.c
new file mode 100644 (file)
index 0000000..517e773
--- /dev/null
@@ -0,0 +1,572 @@
+/*
+  Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+  This file is part of GlusterFS.
+
+  This file is licensed to you under your choice of the GNU Lesser
+  General Public License, version 3 or any later version (LGPLv3 or
+  later), or the GNU General Public License, version 2 (GPLv2), in all
+  cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <assert.h>
+
+#ifndef __FreeBSD__
+#ifdef __NetBSD__
+#include <sys/xattr.h>
+#else
+#include <attr/xattr.h>
+#endif /* __NetBSD__ */
+#endif /* __FreeBSD__ */
+
+#include "list.h"
+
+#define THREAD_MAX 32
+#define BUMP(name) INC(name, 1)
+#define DEFAULT_WORKERS 4
+
+#define NEW(x) {                              \
+        x = calloc (1, sizeof (typeof (*x))); \
+        }
+
+#define err(x ...) fprintf(stderr, x)
+#define out(x ...) fprintf(stdout, x)
+#define dbg(x ...) do { if (debug) fprintf(stdout, x); } while (0)
+#define tout(x ...) do { out("[%ld] ", pthread_self()); out(x); } while (0)
+#define terr(x ...) do { err("[%ld] ", pthread_self()); err(x); } while (0)
+#define tdbg(x ...) do { dbg("[%ld] ", pthread_self()); dbg(x); } while (0)
+
+int debug = 0;
+const char *slavemnt = NULL;
+int workers = 0;
+
+struct stats {
+        unsigned long long int cnt_skipped_gfids;
+};
+
+pthread_spinlock_t stats_lock;
+
+struct stats stats_total;
+int stats = 0;
+
+#define INC(name, val) do {                             \
+        if (!stats)                                     \
+                break;                                  \
+        pthread_spin_lock(&stats_lock);                 \
+        {                                               \
+                stats_total.cnt_##name += val;          \
+        }                                               \
+        pthread_spin_unlock(&stats_lock);               \
+        } while (0)
+
+void
+stats_dump()
+{
+        if (!stats)
+                return;
+
+        out("-------------------------------------------\n");
+        out("Skipped_Files : %10lld\n", stats_total.cnt_skipped_gfids);
+        out("-------------------------------------------\n");
+}
+
+struct dirjob {
+        struct list_head    list;
+
+        char               *dirname;
+
+        struct dirjob      *parent;
+        int                 ret;    /* final status of this subtree */
+        int                 refcnt; /* how many dirjobs have this as parent */
+
+        pthread_spinlock_t  lock;
+};
+
+
+struct xwork {
+        pthread_t        cthreads[THREAD_MAX]; /* crawler threads */
+        int              count;
+        int              idle;
+        int              stop;
+
+        struct dirjob    crawl;
+
+        struct dirjob   *rootjob; /* to verify completion in xwork_fini() */
+
+        pthread_mutex_t  mutex;
+        pthread_cond_t   cond;
+};
+
+
+struct dirjob *
+dirjob_ref (struct dirjob *job)
+{
+        pthread_spin_lock (&job->lock);
+        {
+                job->refcnt++;
+        }
+        pthread_spin_unlock (&job->lock);
+
+        return job;
+}
+
+
+void
+dirjob_free (struct dirjob *job)
+{
+        assert (list_empty (&job->list));
+
+        pthread_spin_destroy (&job->lock);
+        free (job->dirname);
+        free (job);
+}
+
+void
+dirjob_ret (struct dirjob *job, int err)
+{
+        int            ret = 0;
+        int            refcnt = 0;
+        struct dirjob *parent = NULL;
+
+        pthread_spin_lock (&job->lock);
+        {
+                refcnt = --job->refcnt;
+                job->ret = (job->ret || err);
+        }
+        pthread_spin_unlock (&job->lock);
+
+        if (refcnt == 0) {
+                ret = job->ret;
+
+                if (ret)
+                        terr ("Failed: %s (%d)\n", job->dirname, ret);
+                else
+                        tdbg ("Finished: %s\n", job->dirname);
+
+                parent = job->parent;
+                if (parent)
+                        dirjob_ret (parent, ret);
+
+                dirjob_free (job);
+                job = NULL;
+        }
+}
+
+
+struct dirjob *
+dirjob_new (const char *dir, struct dirjob *parent)
+{
+        struct dirjob *job = NULL;
+
+        NEW(job);
+        if (!job)
+                return NULL;
+
+        job->dirname = strdup (dir);
+        if (!job->dirname) {
+                free (job);
+                return NULL;
+        }
+
+        INIT_LIST_HEAD(&job->list);
+        pthread_spin_init (&job->lock, PTHREAD_PROCESS_PRIVATE);
+        job->ret = 0;
+
+        if (parent)
+                job->parent = dirjob_ref (parent);
+
+        job->refcnt = 1;
+
+        return job;
+}
+
+void
+xwork_addcrawl (struct xwork *xwork, struct dirjob *job)
+{
+        pthread_mutex_lock (&xwork->mutex);
+        {
+                list_add_tail (&job->list, &xwork->crawl.list);
+                pthread_cond_broadcast (&xwork->cond);
+        }
+        pthread_mutex_unlock (&xwork->mutex);
+}
+
+int
+xwork_add (struct xwork *xwork, const char *dir, struct dirjob *parent)
+{
+        struct dirjob *job = NULL;
+
+        job = dirjob_new (dir, parent);
+        if (!job)
+                return -1;
+
+        xwork_addcrawl (xwork, job);
+
+        return 0;
+}
+
+
+struct dirjob *
+xwork_pick (struct xwork *xwork, int block)
+{
+        struct dirjob *job = NULL;
+        struct list_head *head = NULL;
+
+        head = &xwork->crawl.list;
+
+        pthread_mutex_lock (&xwork->mutex);
+        {
+                for (;;) {
+                        if (xwork->stop)
+                                break;
+
+                        if (!list_empty (head)) {
+                                job = list_entry (head->next, typeof(*job),
+                                                  list);
+                                list_del_init (&job->list);
+                                break;
+                        }
+
+                        if (((xwork->count * 2) == xwork->idle) &&
+                            list_empty (&xwork->crawl.list)) {
+                                /* no outstanding jobs, and no
+                                   active workers
+                                */
+                                tdbg ("Jobless. Terminating\n");
+                                xwork->stop = 1;
+                                pthread_cond_broadcast (&xwork->cond);
+                                break;
+                        }
+
+                        if (!block)
+                                break;
+
+                        xwork->idle++;
+                        pthread_cond_wait (&xwork->cond, &xwork->mutex);
+                        xwork->idle--;
+                }
+        }
+        pthread_mutex_unlock (&xwork->mutex);
+
+        return job;
+}
+
+int
+skip_name (const char *dirname, const char *name)
+{
+        if (strcmp (name, ".") == 0)
+                return 1;
+
+        if (strcmp (name, "..") == 0)
+                return 1;
+
+        if (strcmp (name, "changelogs") == 0)
+                return 1;
+
+        if (strcmp (name, "health_check") == 0)
+                return 1;
+
+        if (strcmp (name, "indices") == 0)
+                return 1;
+
+        if (strcmp (name, "landfill") == 0)
+                return 1;
+
+        return 0;
+}
+
+int
+skip_stat (struct dirjob *job, const char *name)
+{
+        if (job == NULL)
+                return 0;
+
+        if (strcmp (job->dirname, ".glusterfs") == 0) {
+                tdbg ("Directly adding directories under .glusterfs "
+                      "to global list: %s\n", name);
+                return 1;
+        }
+
+        if (job->parent != NULL) {
+                if (strcmp (job->parent->dirname, ".glusterfs") == 0) {
+                        tdbg ("Directly adding directories under .glusterfs/XX "
+                              "to global list: %s\n", name);
+                        return 1;
+                }
+        }
+
+        return 0;
+}
+
+int
+xworker_do_crawl (struct xwork *xwork, struct dirjob *job)
+{
+        DIR            *dirp = NULL;
+        int             ret = -1;
+        int             boff;
+        int             plen;
+        struct dirent  *result;
+        char            dbuf[512];
+        char           *path = NULL;
+        struct dirjob  *cjob = NULL;
+        struct stat     statbuf = {0,};
+        char            gfid_path[4096] = {0,};
+
+
+        plen = strlen (job->dirname) + 256 + 2;
+        path = alloca (plen);
+
+        tdbg ("Entering: %s\n", job->dirname);
+
+        dirp = opendir (job->dirname);
+        if (!dirp) {
+                terr ("opendir failed on %s (%s)\n", job->dirname,
+                     strerror (errno));
+                goto out;
+        }
+
+        boff = sprintf (path, "%s/", job->dirname);
+
+        for (;;) {
+                ret = readdir_r (dirp, (struct dirent *)dbuf, &result);
+                if (ret) {
+                        err ("readdir_r(%s): %s\n", job->dirname,
+                             strerror (errno));
+                        goto out;
+                }
+
+                if (!result) /* EOF */
+                        break;
+
+                if (result->d_ino == 0)
+                        continue;
+
+                if (skip_name (job->dirname, result->d_name))
+                        continue;
+
+                /* It is sure that, children and grandchildren of .glusterfs
+                 * are directories, just add them to global queue.
+                 */
+                if (skip_stat (job, result->d_name)) {
+                        strncpy (path + boff, result->d_name, (plen-boff));
+                        cjob = dirjob_new (path, job);
+                        if (!cjob) {
+                                err ("dirjob_new(%s): %s\n",
+                                     path, strerror (errno));
+                                ret = -1;
+                                goto out;
+                        }
+                        xwork_addcrawl (xwork, cjob);
+                        continue;
+                }
+
+                strcpy (gfid_path, slavemnt);
+                strcat (gfid_path, "/.gfid/");
+                strcat (gfid_path, result->d_name);
+                ret = lstat (gfid_path, &statbuf);
+
+                if (ret && errno == ENOENT) {
+                        out ("%s\n", result->d_name);
+                        BUMP (skipped_gfids);
+                }
+
+                if (ret && errno != ENOENT) {
+                        err ("stat on slave failed(%s): %s\n",
+                             gfid_path, strerror (errno));
+                        goto out;
+                }
+        }
+
+        ret = 0;
+out:
+        if (dirp)
+                closedir (dirp);
+
+        return ret;
+}
+
+
+void *
+xworker_crawl (void *data)
+{
+        struct xwork *xwork = data;
+        struct dirjob *job = NULL;
+        int            ret = -1;
+
+        while ((job = xwork_pick (xwork, 0))) {
+                ret = xworker_do_crawl (xwork, job);
+                dirjob_ret (job, ret);
+        }
+
+        return NULL;
+}
+
+int
+xwork_fini (struct xwork *xwork, int stop)
+{
+        int i = 0;
+        int ret = 0;
+        void *tret = 0;
+
+        pthread_mutex_lock (&xwork->mutex);
+        {
+                xwork->stop = (xwork->stop || stop);
+                pthread_cond_broadcast (&xwork->cond);
+        }
+        pthread_mutex_unlock (&xwork->mutex);
+
+        for (i = 0; i < xwork->count; i++) {
+                pthread_join (xwork->cthreads[i], &tret);
+                tdbg ("CThread id %ld returned %p\n",
+                      xwork->cthreads[i], tret);
+        }
+
+        if (debug) {
+                assert (xwork->rootjob->refcnt == 1);
+                dirjob_ret (xwork->rootjob, 0);
+        }
+
+        if (stats)
+                pthread_spin_destroy(&stats_lock);
+
+        return ret;
+}
+
+
+int
+xwork_init (struct xwork *xwork, int count)
+{
+        int  i = 0;
+        int  ret = 0;
+        struct dirjob *rootjob = NULL;
+
+        if (stats)
+                pthread_spin_init (&stats_lock, PTHREAD_PROCESS_PRIVATE);
+
+        pthread_mutex_init (&xwork->mutex, NULL);
+        pthread_cond_init (&xwork->cond, NULL);
+
+        INIT_LIST_HEAD (&xwork->crawl.list);
+
+        rootjob = dirjob_new (".glusterfs", NULL);
+        if (debug)
+                xwork->rootjob = dirjob_ref (rootjob);
+
+        xwork_addcrawl (xwork, rootjob);
+
+        xwork->count = count;
+        for (i = 0; i < count; i++) {
+                ret = pthread_create (&xwork->cthreads[i], NULL,
+                                      xworker_crawl, xwork);
+                if (ret)
+                        break;
+                tdbg ("Spawned crawler %d thread %ld\n", i,
+                      xwork->cthreads[i]);
+        }
+
+        return ret;
+}
+
+
+int
+xfind (const char *basedir)
+{
+        struct xwork xwork;
+        int          ret = 0;
+        char         *cwd = NULL;
+
+        ret = chdir (basedir);
+        if (ret) {
+                err ("%s: %s\n", basedir, strerror (errno));
+                return ret;
+        }
+
+        cwd = getcwd (0, 0);
+        if (!cwd) {
+                err ("getcwd(): %s\n", strerror (errno));
+                return -1;
+        }
+
+        tdbg ("Working directory: %s\n", cwd);
+        free (cwd);
+
+        memset (&xwork, 0, sizeof (xwork));
+
+        ret = xwork_init (&xwork, workers);
+        if (ret == 0)
+                xworker_crawl (&xwork);
+
+        ret = xwork_fini (&xwork, ret);
+        stats_dump ();
+
+        return ret;
+}
+
+static char *
+parse_and_validate_args (int argc, char *argv[])
+{
+        char        *basedir = NULL;
+        struct stat  d = {0, };
+        int          ret = -1;
+#ifndef __FreeBSD__
+        unsigned char volume_id[16];
+#endif /* __FreeBSD__ */
+        char        *slv_mnt = NULL;
+
+        if (argc != 4) {
+                err ("Usage: %s <DIR> <SLAVE-VOL-MOUNT> <CRAWL-THREAD-COUNT>\n",
+                      argv[0]);
+                return NULL;
+        }
+
+        basedir = argv[1];
+        ret = lstat (basedir, &d);
+        if (ret) {
+                err ("%s: %s\n", basedir, strerror (errno));
+                return NULL;
+        }
+
+#ifndef __FreeBSD__
+        ret = lgetxattr (basedir, "trusted.glusterfs.volume-id",
+                         volume_id, 16);
+        if (ret != 16) {
+                err ("%s:Not a valid brick path.\n", basedir);
+                return NULL;
+        }
+#endif /* __FreeBSD__ */
+
+        slv_mnt = argv[2];
+        ret = lstat (slv_mnt, &d);
+        if (ret) {
+                err ("%s: %s\n", slv_mnt, strerror (errno));
+                return NULL;
+        }
+        slavemnt = argv[2];
+
+        workers = atoi(argv[3]);
+        if (workers <= 0)
+                workers = DEFAULT_WORKERS;
+
+        return basedir;
+}
+
+int
+main (int argc, char *argv[])
+{
+        char *basedir = NULL;
+
+        basedir = parse_and_validate_args (argc, argv);
+        if (!basedir)
+                return 1;
+
+        xfind (basedir);
+
+        return 0;
+}
diff --git a/tools/gfind_missing_files/gfid_to_path.py b/tools/gfind_missing_files/gfid_to_path.py
new file mode 100644 (file)
index 0000000..8362f68
--- /dev/null
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import sys
+import os
+import xattr
+import uuid
+import re
+import errno
+
+CHANGELOG_SEARCH_MAX_TRY = 31
+DEC_CTIME_START = 5
+ROOT_GFID = "00000000-0000-0000-0000-000000000001"
+MAX_NUM_CHANGELOGS_TRY = 2
+
+
+def output_not_found(gfid):
+    # Write GFID to stderr
+    sys.stderr.write("%s\n" % gfid)
+
+
+def output_success(path):
+    # Write converted Path to Stdout
+    sys.stdout.write("%s\n" % path)
+
+
+def full_dir_path(gfid):
+    out_path = ""
+    while True:
+        path = os.path.join(".glusterfs", gfid[0:2], gfid[2:4], gfid)
+        path_readlink = os.readlink(path)
+        pgfid = os.path.dirname(path_readlink)
+        out_path = os.path.join(os.path.basename(path_readlink), out_path)
+        if pgfid == "../../00/00/%s" % ROOT_GFID:
+            out_path = os.path.join("./", out_path)
+            break
+        gfid = os.path.basename(pgfid)
+    return out_path
+
+
+def find_path_from_changelog(fd, gfid):
+    """
+    In given Changelog File, finds using following pattern
+    <T><GFID>\x00<TYPE>\x00<MODE>\x00<UID>\x00<GID>\x00<PARGFID>/<BASENAME>
+    Pattern search finds PARGFID and BASENAME, Convert PARGFID to Path
+    Using readlink and add basename to form Full path.
+    """
+    content = fd.read()
+
+    pattern = "E%s" % gfid
+    pattern += "\x00(3|23)\x00\d+\x00\d+\x00\d+\x00([^\x00]+)/([^\x00]+)"
+    pat = re.compile(pattern)
+    match = pat.search(content)
+
+    if match:
+        pgfid = match.group(2)
+        basename = match.group(3)
+        if pgfid == ROOT_GFID:
+            return os.path.join("./", basename)
+        else:
+            full_path_parent = full_dir_path(pgfid)
+            if full_path_parent:
+                return os.path.join(full_path_parent, basename)
+
+    return None
+
+
+def gfid_to_path(gfid):
+    """
+    Try readlink, if it is directory it succeeds.
+    Get ctime of the GFID file, Decrement by 5 sec
+    Search for Changelog filename, Since Changelog file generated
+    every 15 sec, Search and get immediate next Changelog after the file
+    Creation. Get the Path by searching in Changelog file.
+    Get the resultant file's GFID and Compare with the input, If these
+    GFIDs are different then Some thing is changed(May be Rename)
+    """
+    gfid = gfid.strip()
+    gpath = os.path.join(".glusterfs", gfid[0:2], gfid[2:4], gfid)
+    try:
+        output_success(full_dir_path(gfid))
+        return
+    except OSError:
+        # Not an SymLink
+        pass
+
+    try:
+        ctime = int(os.stat(gpath).st_ctime)
+        ctime -= DEC_CTIME_START
+    except (OSError, IOError):
+        output_not_found(gfid)
+        return
+
+    path = None
+    found_changelog = False
+    changelog_parse_try = 0
+    for i in range(CHANGELOG_SEARCH_MAX_TRY):
+        cl = os.path.join(".glusterfs/changelogs", "CHANGELOG.%s" % ctime)
+
+        try:
+            with open(cl, "rb") as f:
+                changelog_parse_try += 1
+                found_changelog = True
+                path = find_path_from_changelog(f, gfid)
+                if not path and changelog_parse_try < MAX_NUM_CHANGELOGS_TRY:
+                    ctime += 1
+                    continue
+            break
+        except (IOError, OSError) as e:
+            if e.errno == errno.ENOENT:
+                ctime += 1
+            else:
+                break
+
+    if not found_changelog:
+        output_not_found(gfid)
+        return
+
+    if not path:
+        output_not_found(gfid)
+        return
+    gfid1 = str(uuid.UUID(bytes=xattr.get(path, "trusted.gfid")))
+    if gfid != gfid1:
+        output_not_found(gfid)
+        return
+
+    output_success(path)
+
+
+def main():
+    num_arguments = 3
+    if not sys.stdin.isatty():
+        num_arguments = 2
+
+    if len(sys.argv) != num_arguments:
+        sys.stderr.write("Invalid arguments\nUsage: "
+                         "%s <BRICK_PATH> <GFID_FILE>\n" % sys.argv[0])
+        sys.exit(1)
+
+    path = sys.argv[1]
+
+    if sys.stdin.isatty():
+        gfid_list = os.path.abspath(sys.argv[2])
+        os.chdir(path)
+        with open(gfid_list) as f:
+            for gfid in f:
+                gfid_to_path(gfid)
+    else:
+        os.chdir(path)
+        for gfid in sys.stdin:
+            gfid_to_path(gfid)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/gfind_missing_files/gfid_to_path.sh b/tools/gfind_missing_files/gfid_to_path.sh
new file mode 100644 (file)
index 0000000..20ac6a9
--- /dev/null
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+## Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+## This file is part of GlusterFS.
+##
+## This file is licensed to you under your choice of the GNU Lesser
+## General Public License, version 3 or any later version (LGPLv3 or
+## later), or the GNU General Public License, version 2 (GPLv2), in all
+## cases as published by the Free Software Foundation.
+
+E_BADARGS=65
+
+
+function gfid_to_path()
+{
+    brick_dir=$1;
+    gfid_file=$(readlink -e $2);
+
+    current_dir=$(pwd);
+    cd $brick_dir;
+
+    while read gfid
+    do
+        to_search=`echo .glusterfs/${gfid:0:2}"/"${gfid:2:2}"/"$gfid`;
+        find . -samefile $to_search | grep -v $to_search;
+    done < $gfid_file;
+
+    cd $current_dir;
+}
+
+
+function main(){
+    if [ $# -ne 2 ]
+    then
+        echo "Usage: `basename $0` BRICK_DIR GFID_FILE";
+        exit $E_BADARGS;
+    fi
+
+    gfid_to_path $1 $2;
+}
+
+main "$@";
diff --git a/tools/gfind_missing_files/gfind_missing_files.sh b/tools/gfind_missing_files/gfind_missing_files.sh
new file mode 100644 (file)
index 0000000..07d6bef
--- /dev/null
@@ -0,0 +1,119 @@
+#!/bin/sh
+
+##  Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+##  This file is part of GlusterFS.
+##
+##  This file is licensed to you under your choice of the GNU Lesser
+##  General Public License, version 3 or any later version (LGPLv3 or
+##  later), or the GNU General Public License, version 2 (GPLv2), in all
+##  cases as published by the Free Software Foundation.
+
+BRICKPATH=    #Brick path of gluster volume
+SLAVEHOST=    #Slave hostname
+SLAVEVOL=     #Slave volume
+SLAVEMNT=     #Slave gluster volume mount point
+WORKERS=4     #Default number of worker threads
+
+function out()
+{
+    echo "$@";
+}
+
+function fatal()
+{
+    out FATAL "$@";
+    exit 1
+}
+
+function ping_host ()
+{
+    ### Use bash internal socket support
+    {
+        exec 400<>/dev/tcp/$1/$2
+        if [ $? -ne '0' ]; then
+            return 1;
+        else
+            exec 400>&-
+            return 0;
+        fi
+    } 1>&2 2>/dev/null
+}
+
+function mount_slave()
+{
+    local i; # inode number
+    SSH_PORT=22
+
+    SLAVEMNT=`mktemp -d`
+    [ "x$SLAVEMNT" = "x" ] && fatal "Could not mktemp directory";
+    [ -d "$SLAVEMNT" ] || fatal "$SLAVEMNT not a directory";
+
+    ping_host ${SLAVEHOST} $SSH_PORT
+    if [ $? -ne 0 ]; then
+        echo "$SLAVEHOST not reachable.";
+        exit 1;
+    fi;
+
+    glusterfs --volfile-id=$SLAVEVOL --aux-gfid-mount --volfile-server=$SLAVEHOST $SLAVEMNT;
+    i=$(stat -c '%i' $SLAVEMNT);
+    [ "x$i" = "x1" ] || fatal "Could not mount volume $2 on $SLAVEMNT Please check host and volume exists";
+}
+
+function parse_cli()
+{
+    if [[ $# -ne 4 ]]; then
+        echo "Usage: gfind_missing_files <brick-path> <slave-host> <slave-vol> <OUTFILE>"
+        exit 1
+    else
+        BRICKPATH=$1;
+        SLAVEHOST=$2;
+        SLAVEVOL=$3;
+        OUTFILE=$4;
+
+        mount_slave;
+        echo "Slave volume is mounted at ${SLAVEMNT}"
+        echo
+    fi
+}
+
+function main()
+{
+    parse_cli "$@";
+
+    echo "Calling crawler...";
+    path=$(readlink -e $0)
+    $(dirname $path)/gcrawler ${BRICKPATH} ${SLAVEMNT} ${WORKERS} > ${OUTFILE}
+
+    #Clean up the mount
+    umount $SLAVEMNT;
+    rmdir $SLAVEMNT;
+
+    echo "Crawl Complete."
+    num_files_missing=$(wc -l ${OUTFILE} | awk '{print $1}')
+    if [ $num_files_missing -eq 0 ]
+    then
+        echo "Total Missing File Count : 0"
+        exit 0;
+    fi
+
+    echo "gfids of skipped files are available in the file ${OUTFILE}"
+    echo
+    echo "Starting gfid to path conversion"
+
+    #Call python script to convert gfids to full pathname
+    INFILE=$(readlink -e ${OUTFILE})
+    python $(dirname $path)/gfid_to_path.py ${BRICKPATH} ${INFILE} 1> ${OUTFILE}_pathnames 2> ${OUTFILE}_gfids
+    echo "Path names of skipped files are available in the file ${OUTFILE}_pathnames"
+
+    gfid_to_path_failures=$(wc -l ${OUTFILE}_gfids | awk '{print $1}')
+    if [ $gfid_to_path_failures -gt 0 ]
+    then
+       echo "WARNING: Unable to convert some GFIDs to Paths, GFIDs logged to ${OUTFILE}_gfids"
+       echo "Use $(dirname $path)/gfid_to_path.sh <brick-path> ${OUTFILE}_gfids to convert those GFIDs to Path"
+    fi
+
+    #Output
+    echo "Total Missing File Count : $(wc -l ${OUTFILE} | awk '{print $1}')"
+}
+
+main "$@";