.\" Title: remote-cache-clean
.\" Author: Martin Schwenke
.\" Generator: DocBook XSL Stylesheets v1.75.1 <http://docbook.sf.net/>
-.\" Date: 07/21/2009
+.\" Date: 07/23/2009
.\" Manual: System administration commands
.\" Source: Remote Cache
.\" Language: English
.\"
-.TH "REMOTE\-CACHE\-CLEAN" "8" "07/21/2009" "Remote Cache" "System administration commands"
+.TH "REMOTE\-CACHE\-CLEAN" "8" "07/23/2009" "Remote Cache" "System administration commands"
.\" -----------------------------------------------------------------
.\" * set default formatting
.\" -----------------------------------------------------------------
remote-cache-clean \- Clean up a remote\-cache local cache directory
.SH "SYNOPSIS"
.HP \w'\fBremote\-cache\-clean\fR\ 'u
-\fBremote\-cache\-clean\fR [\-v | \-\-verbose] [\-l\ \fIfile\fR | \-\-log\-file=\fIfile\fR] [\-n | \-\-dry\-run] {\fIcache\-root\fR} {\fIhigh\-water\fR} {\fIlow\-water\fR}
+\fBremote\-cache\-clean\fR [\-v | \-\-verbose] [\-l\ \fIfile\fR | \-\-log\-file=\fIfile\fR] [\-n | \-\-dry\-run] [\-\-trim\-old=\fIhours\fR] [\-\-trim\-unused=\fIhours\fR] [\-\-no\-lru] {\fIcache\-root\fR} {\fIhigh\-water\fR} {\fIlow\-water\fR}
.SH "DESCRIPTION"
.PP
remote\-cache\-clean is a script that uses a least recently used algorithm to clean up a local cache directory that is used by remote\-cache\&. The root directory for the cache must be specified via
.nr an-break-flag 1
.br
.ps +1
+\fB--trim-old=<hours>\fR
+.RS 4
+.PP
+Do an initial pass (before the LRU pass) that removes all files that have been in the cache for longer than <hours>\&. This may not be run if the cache size is below
+\fIlow\-water\fR
+KB\&. However, when it runs it removes all relevant files, so may result in a cache size much smaller than
+\fIlow\-water\fR
+KB\&.
+.RE
+.sp
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fB--trim-unused=<hours>\fR
+.RS 4
+.PP
+Do an initial pass (before the LRU pass) that removes all files that have not been accessed for longer than <hours>\&. This may not be run if the cache size is below
+\fIlow\-water\fR
+KB\&. However, when it runs it removes all relevant files, so may result in a cache size much smaller than
+\fIlow\-water\fR
+KB\&.
+.RE
+.sp
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fB--no-lru\fR
+.RS 4
+.PP
+Do not run the pass that employs the least recently used algorithm\&.
+.RE
+.sp
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
\fB-h, --help\fR
.RS 4
.PP
.RS 4
.\}
.nf
- remote\-cache\-clean \-v \-l /var/log/remote\-cache\-clean\&.log /var/cache/remote\-cache 20000 15000
+ remote\-cache\-clean \-v \-l /var/log/remote\-cache\-clean\&.log \e
+ /var/cache/remote\-cache 20000 15000
+
+.fi
+.if n \{\
+.RE
+.\}
+.PP
+Like the first example, but first removes all files that have been in the cache for more than 3 days, if the cache size is over 20MB\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+ remote\-cache\-clean \-\-trim\-old=72 \e
+ /var/cache/remote\-cache 20000 15000
.fi
.if n \{\
-<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>remote-cache-clean</title><meta name="generator" content="DocBook XSL Stylesheets V1.75.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry" title="remote-cache-clean"><a name="remote-cache-clean.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>remote-cache-clean — Clean up a remote-cache local cache directory</p></div><div class="refsynopsisdiv" title="Synopsis"><h2>Synopsis</h2><div class="cmdsynopsis"><p><code class="command">remote-cache-clean</code> [ -v | --verbose ] [ -l <em class="replaceable"><code>file</code></em> | --log-file=<em class="replaceable"><code>file</code></em> ] [ -n | --dry-run ] {<em class="replaceable"><code>cache-root</code></em>} {<em class="replaceable"><code>high-water</code></em>} {<em class="replaceable"><code>low-water</code></em>}</p></div></div><div class="refsect1" title="DESCRIPTION"><a name="id2936439"></a><h2>DESCRIPTION</h2><p>
+<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>remote-cache-clean</title><meta name="generator" content="DocBook XSL Stylesheets V1.75.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry" title="remote-cache-clean"><a name="remote-cache-clean.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>remote-cache-clean — Clean up a remote-cache local cache directory</p></div><div class="refsynopsisdiv" title="Synopsis"><h2>Synopsis</h2><div class="cmdsynopsis"><p><code class="command">remote-cache-clean</code> [ -v | --verbose ] [ -l <em class="replaceable"><code>file</code></em> | --log-file=<em class="replaceable"><code>file</code></em> ] [ -n | --dry-run ] [--trim-old=<em class="replaceable"><code>hours</code></em>] [--trim-unused=<em class="replaceable"><code>hours</code></em>] [--no-lru] {<em class="replaceable"><code>cache-root</code></em>} {<em class="replaceable"><code>high-water</code></em>} {<em class="replaceable"><code>low-water</code></em>}</p></div></div><div class="refsect1" title="DESCRIPTION"><a name="id2835080"></a><h2>DESCRIPTION</h2><p>
remote-cache-clean is a script that uses a least recently used
algorithm to clean up a local cache directory that is used by
remote-cache. The root directory for the cache must be
will be removed from <em class="replaceable"><code>cache-root</code></em> until
the space consumed is less than
<em class="replaceable"><code>low-water</code></em> KB.
- </p><div class="refsect2" title="Options"><a name="id2936471"></a><h3>Options</h3><div class="refsect3" title="-v, --verbose"><a name="id2936476"></a><h4>-v, --verbose</h4><p>
- Print information and actions taken to standard output.
- </p></div><div class="refsect3" title="-l <file>, --log-file=<file>"><a name="id2936488"></a><h4>-l <file>, --log-file=<file></h4><p>
- Append information and actions taken to log <file>.
- </p></div><div class="refsect3" title="-n, --dry-run"><a name="id2936500"></a><h4>-n, --dry-run</h4><p>
- Do not actually delete any files. When used with -v (or
- --verbose) or -l (or --log-file) this can be useful to see
- what would have been done.
- </p></div><div class="refsect3" title="-h, --help"><a name="id2936512"></a><h4>-h, --help</h4><p>
- Show a short usage guide.
- </p></div></div></div><div class="refsect1" title="EXAMPLES"><a name="id2936524"></a><h2>EXAMPLES</h2><p>
+ </p><div class="refsect2" title="Options"><a name="id2835113"></a><h3>Options</h3><div class="refsect3" title="-v, --verbose"><a name="id2835118"></a><h4>-v, --verbose</h4><p>
+ Print information and actions taken to standard output.
+ </p></div><div class="refsect3" title="-l <file>, --log-file=<file>"><a name="id2835130"></a><h4>-l <file>, --log-file=<file></h4><p>
+ Append information and actions taken to log <file>.
+ </p></div><div class="refsect3" title="-n, --dry-run"><a name="id2835142"></a><h4>-n, --dry-run</h4><p>
+ Do not actually delete any files. When used with -v (or
+ --verbose) or -l (or --log-file) this can be useful to see
+ what would have been done.
+ </p></div><div class="refsect3" title="--trim-old=<hours>"><a name="id2835154"></a><h4>--trim-old=<hours></h4><p>
+ Do an initial pass (before the LRU pass) that removes all
+ files that have been in the cache for longer than
+ <hours>. This may not be run if the cache size is
+ below <em class="replaceable"><code>low-water</code></em> KB. However,
+ when it runs it removes all relevant files, so may result in
+ a cache size much smaller than
+ <em class="replaceable"><code>low-water</code></em> KB.
+ </p></div><div class="refsect3" title="--trim-unused=<hours>"><a name="id2835178"></a><h4>--trim-unused=<hours></h4><p>
+ Do an initial pass (before the LRU pass) that removes all
+ files that have not been accessed for longer than
+ <hours>. This may not be run if the cache size is
+ below <em class="replaceable"><code>low-water</code></em> KB. However,
+ when it runs it removes all relevant files, so may result in
+ a cache size much smaller than
+ <em class="replaceable"><code>low-water</code></em> KB.
+ </p></div><div class="refsect3" title="--no-lru"><a name="id2835202"></a><h4>--no-lru</h4><p>
+ Do not run the pass that employs the least recently used
+ algorithm.
+ </p></div><div class="refsect3" title="-h, --help"><a name="id2835213"></a><h4>-h, --help</h4><p>
+ Show a short usage guide.
+ </p></div></div></div><div class="refsect1" title="EXAMPLES"><a name="id2835225"></a><h2>EXAMPLES</h2><p>
If /var/cache/remote-cache contains more than 20MB of file data
then remove the most recently used files until it the file usage
is below 15MB.
As above, but also logs actions to
/var/log/remote-cache-clean.log and also to the screen.
</p><pre class="screen">
- remote-cache-clean -v -l /var/log/remote-cache-clean.log /var/cache/remote-cache 20000 15000
+ remote-cache-clean -v -l /var/log/remote-cache-clean.log \
+ /var/cache/remote-cache 20000 15000
</pre><p>
- </p></div><div class="refsect1" title="SEE ALSO"><a name="id2936556"></a><h2>SEE ALSO</h2><p>
+ </p><p>
+ Like the first example, but first removes all files that have
+ been in the cache for more than 3 days, if the cache size is
+ over 20MB.
+ </p><pre class="screen">
+ remote-cache-clean --trim-old=72 \
+ /var/cache/remote-cache 20000 15000
+ </pre><p>
+ </p></div><div class="refsect1" title="SEE ALSO"><a name="id2835269"></a><h2>SEE ALSO</h2><p>
remote-cache(8)
- </p></div><div class="refsect1" title="BUGS"><a name="id2936564"></a><h2>BUGS</h2><p>
+ </p></div><div class="refsect1" title="BUGS"><a name="id2835278"></a><h2>BUGS</h2><p>
remote-cache-clean builds a file list at the start of processing
and does not recheck file space usage. Therefore, if the cache
is in use during clean-up then, upon completion, files may
Therefore, running du(1) on the cache-root will indicate extra
space usage. A future version of remote-cache clean will remove
empty directories to mitigate this by some amount.
- </p></div><div class="refsect1" title="COPYRIGHT/LICENSE"><a name="id2936587"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
+ </p></div><div class="refsect1" title="COPYRIGHT/LICENSE"><a name="id2835301"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
remote-cache-clean is Copyright (C) Martin Schwenke 2009<br>
<br>
remote-cache is Copyright (C) Ronnie Sahlberg 2008<br>
<arg choice="plain">-n</arg>
<arg choice="plain">--dry-run</arg>
</group>
+ <arg choice="opt">--trim-old=<replaceable>hours</replaceable></arg>
+ <arg choice="opt">--trim-unused=<replaceable>hours</replaceable></arg>
+ <arg choice="opt">--no-lru</arg>
<arg choice="req"><replaceable>cache-root</replaceable></arg>
<arg choice="req"><replaceable>high-water</replaceable></arg>
<arg choice="req"><replaceable>low-water</replaceable></arg>
<refsect2>
<title>Options</title>
-
- <refsect3>
- <title>-v, --verbose</title>
- <para>
- Print information and actions taken to standard output.
- </para>
- </refsect3>
-
- <refsect3>
- <title>-l <file>, --log-file=<file></title>
- <para>
- Append information and actions taken to log <file>.
- </para>
- </refsect3>
-
- <refsect3>
- <title>-n, --dry-run</title>
- <para>
- Do not actually delete any files. When used with -v (or
- --verbose) or -l (or --log-file) this can be useful to see
- what would have been done.
- </para>
- </refsect3>
-
- <refsect3>
- <title>-h, --help</title>
- <para>
- Show a short usage guide.
- </para>
- </refsect3>
-
- </refsect2>
- </refsect1>
+
+ <refsect3>
+ <title>-v, --verbose</title>
+ <para>
+ Print information and actions taken to standard output.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>-l <file>, --log-file=<file></title>
+ <para>
+ Append information and actions taken to log <file>.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>-n, --dry-run</title>
+ <para>
+ Do not actually delete any files. When used with -v (or
+ --verbose) or -l (or --log-file) this can be useful to see
+ what would have been done.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>--trim-old=<hours></title>
+ <para>
+ Do an initial pass (before the LRU pass) that removes all
+ files that have been in the cache for longer than
+ <hours>. This may not be run if the cache size is
+ below <replaceable>low-water</replaceable> KB. However,
+ when it runs it removes all relevant files, so may result in
+ a cache size much smaller than
+ <replaceable>low-water</replaceable> KB.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>--trim-unused=<hours></title>
+ <para>
+ Do an initial pass (before the LRU pass) that removes all
+ files that have not been accessed for longer than
+ <hours>. This may not be run if the cache size is
+ below <replaceable>low-water</replaceable> KB. However,
+ when it runs it removes all relevant files, so may result in
+ a cache size much smaller than
+ <replaceable>low-water</replaceable> KB.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>--no-lru</title>
+ <para>
+ Do not run the pass that employs the least recently used
+ algorithm.
+ </para>
+ </refsect3>
+
+ <refsect3>
+ <title>-h, --help</title>
+ <para>
+ Show a short usage guide.
+ </para>
+ </refsect3>
+
+ </refsect2>
+ </refsect1>
<refsect1><title>EXAMPLES</title>
<para>
As above, but also logs actions to
/var/log/remote-cache-clean.log and also to the screen.
<screen format="linespecific">
- remote-cache-clean -v -l /var/log/remote-cache-clean.log /var/cache/remote-cache 20000 15000
+ remote-cache-clean -v -l /var/log/remote-cache-clean.log \
+ /var/cache/remote-cache 20000 15000
+ </screen>
+ </para>
+
+ <para>
+ Like the first example, but first removes all files that have
+ been in the cache for more than 3 days, if the cache size is
+ over 20MB.
+ <screen format="linespecific">
+ remote-cache-clean --trim-old=72 \
+ /var/cache/remote-cache 20000 15000
</screen>
</para>
+
</refsect1>
<refsect1><title>SEE ALSO</title>
import sys
import logging
from optparse import OptionParser
+from time import time
class File(object):
def __init__(self, path):
self.path = path
s = os.lstat(self.path)
self.atime = s.st_atime
+ self.ctime = s.st_ctime
self.size = float(s.__getattribute__('st_blocks')) / 2 # 1KB units
def __str__(self):
return "(%d, %d, %s)" % (self.atime, self.size, self.path)
+ def remove(self):
+ """Remove the file if options.dry_run is not set.
+ Ignore any errors. """
+
+ global options
+
+ if not options.dry_run:
+ try:
+ os.remove(self.path)
+ except OSError:
+ pass
+
+
class Cache(object):
def __init__(self, root):
self.size = 0.0
+ self.count = 0
self.files = None
self.root = root
def __str__(self):
return ("Size: %d KB" % (self.size)) + \
"\n".join([ str(f) for f in self.files])
+
+ def _add_file(self, path):
+ """Add a file to the cache. Don't worry if it fails."""
+ try:
+ t = File(path)
+ except OSError:
+ return
+ self.size += t.size
+ self.count += 1
+ self.files.append(t)
+
+ def _trim(self, condition, log_func, stop_on_fail=False):
+ """Remove files from self.files for which condition(f) is
+ True. If stop_on_fail is True then terminate when the
+ condition first fails. self.files is processed in *reverse*
+ order. Log message will contain log_func, which is usually
+ the name of the calling function."""
+
+ global logger
+
+ init_count = self.count
+ init_size = self.size
+
+ keep_files = []
+ while self.files:
+ f = self.files.pop()
+ if condition(f):
+ logger.info("%s: Removing %s", log_func, f.path)
+ f.remove()
+ self.size -= f.size
+ self.count -=1
+ else:
+ keep_files.append(f)
+ if stop_on_fail:
+ break
+
+ # reverse() is fast.
+ keep_files.reverse()
+ self.files += keep_files
+
+ logger.info("%s: Removed %d files, recovered %d KB", log_func,
+ init_count - self.count, init_size - self.size)
def scan(self):
+ """Gather information about files in the cache."""
self.files = []
for (root, dirs, files) in os.walk(self.root):
for f in files:
- try:
- t = File(os.path.join(root, f))
- except OSError:
- continue
- self.size += t.size
- self.files.append(t)
+ self._add_file(os.path.join(root, f))
def lru_clean(self, high_water, low_water):
- """Removes the least recently used files until space usage for
+ """Remove the least recently used files until space usage for
the cache is below low_water. Files are only removed if the
initial usage is above high_water. All sizes are in 1KB
units."""
- global options
- global logger
+ if self.files is None:
+ self.scan()
+
+ # Reverse sort: we want to process the list in reverse order
+ # because pop() is *much* more efficient than pop(0)
+ self.files.sort(key = lambda f: (f.atime, -f.size), reverse=True)
+
+ # Yes, this is a cache-global condition ignores the file!
+ under_water = lambda f: self.size > low_water
+ self._trim(under_water, "lru_clean", stop_on_fail=True)
+
+ def trim_unused(self, num_hours):
+ """Remove any files that haven't been accessed for more than num_hours.
+ This does not respect options.low_water."""
if self.files is None:
self.scan()
- self.files.sort(key = lambda f: (f.atime, -f.size))
+ # Optimisation: precalculate atime_limit.
+ # Use Unix time, not the horribly verbose datetime/timedelta.
+ atime_limit = time() - num_hours * (60*60)
+ is_unused = lambda file: file.atime < atime_limit
- files_removed = 0
- init_size = self.size
+ self._trim(is_unused, "trim_unused")
+
+ def trim_old(self, num_hours):
+ """Remove any files that were probably created more than num_hours ago.
+ This does not respect options.low_water."""
- while (self.size > low_water) and self.files:
- f = self.files.pop(0)
- logger.info("lru_clean: Removing %s", f.path)
- if not options.dry_run:
- try:
- os.remove(f.path)
- except OSError:
- pass
- self.size -= f.size
- files_removed += 1
+ if self.files is None:
+ self.scan()
- logger.info("lru_clean: Removed %d files, recovered %d KB",
- files_removed, init_size - self.size)
+ # Optimisation: precalculate ctime_limit.
+ # Use Unix time, not the horribly verbose datetime/timedelta.
+ ctime_limit = time() - num_hours * (60*60)
+ is_old = lambda file: file.ctime < ctime_limit
+ self._trim(is_old, "trim_old")
def process_args():
usage = """usage: %prog [options] cache-root high-water low-water
cache-root: root directory of cache
high-water: maximum allowable cache size - no action if cache is smaller
- low-water: desired cache size - cache is reduced to this size"""
+ low-water: desired cache size - cache is reduced to this size
+
+ If multiple removal algorithms are used then processing stops after
+ the first pass that makes the cache smaller than low-water."""
parser = OptionParser(usage=usage)
parser.add_option("-v", "--verbose",
action="store_true", dest="verbose", default=False,
- help="print details of files being removed")
+ help="print information and actions taken to stdout")
parser.add_option("-l", "--log-file",
action="store", dest="log_file", default=None,
- help="write events to this log file")
+ metavar="FILE",
+ help="append information and actions taken to FILE")
parser.add_option("-n", "--dry-run",
action="store_true", dest="dry_run", default=False,
- help="do not actually remove files - " + \
- "useful for testing with -v")
+ help="do not remove files, useful for testing with -v")
+ parser.add_option("--trim-old",
+ action="store", type="int", dest="trim_old",
+ default=None, metavar="HOURS",
+ help="initial pass removes files more than HOURS old")
+ parser.add_option("--trim-unused",
+ action="store", type="int", dest="trim_unused",
+ default=None, metavar="HOURS",
+ help="initial pass removes files unused for HOURS")
+ parser.add_option("--no-lru",
+ action="store_false", dest="use_lru", default=True,
+ help="do not do an LRU cleanup pass")
(options, args) = parser.parse_args()
logger = None
def main():
- global options
- global logger
+ global options, logger
options = process_args()
logger = setup_logging()
c = Cache(options.cache_root)
c.scan()
- logger.info("main: Initial cache size is %d KB", c.size)
+ logger.info("main: Initial cache size is %d KB, %d files", c.size, c.count)
if c.size < options.high_water:
logger.info(
options.high_water)
return
- c.lru_clean(options.high_water, options.low_water)
+ if options.trim_old is not None and c.size > options.low_water:
+ c.trim_old(options.trim_old)
+ if options.trim_unused is not None and c.size > options.low_water:
+ c.trim_unused(options.trim_unused)
+ if options.use_lru and c.size > options.low_water:
+ c.lru_clean(options.high_water, options.low_water)
- logger.info("main: Final cache size is %d KB", c.size)
+ logger.info("main: Final cache size is %d KB, %d files", c.size, c.count)
logging.shutdown()
if __name__ == '__main__':