object_store: Make iter_tree_contents depth-first.
authorDave Borowitz <dborowitz@google.com>
Fri, 30 Jul 2010 11:08:46 +0000 (13:08 +0200)
committerJelmer Vernooij <jelmer@samba.org>
Fri, 30 Jul 2010 11:08:46 +0000 (13:08 +0200)
This mimics the behavior of os.walk as well as common tools like find
and ls -R.

Memory usage may also be better, as directory trees tend to have a
higher branching factor than depth. (Note that the overhead of Python
stack frames may make this not true in many cases, and I haven't run
any benchmarks.)

NEWS
dulwich/object_store.py
dulwich/tests/test_object_store.py

diff --git a/NEWS b/NEWS
index abe297baa8edc885f2893de51d746c67626c3976..1955d9c21aff45e0a8784ac4ee673534b8529131 100644 (file)
--- a/NEWS
+++ b/NEWS
 
   * New public function dulwich.pack.write_pack_header. (Dave Borowitz)
 
+ API CHANGES
+
+  * ObjectStore.iter_tree_contents now walks contents in depth-first, sorted
+    order. (Dave Borowitz)
+
 
 0.6.1  2010-07-22
 
index 32f9b78503fca06523f1b685a25a911962f14468..1a834399edfb8ef28dd77ff7026e074560132f67 100644 (file)
@@ -175,21 +175,24 @@ class BaseObjectStore(object):
                     else:
                         todo.add((None, newhexsha, childpath))
 
-    def iter_tree_contents(self, tree):
-        """Yield (path, mode, hexsha) tuples for all non-Tree objects in a tree.
+    def iter_tree_contents(self, tree_id):
+        """Iterate the contents of a tree and all subtrees.
 
-        :param tree: SHA1 of the root of the tree
+        Iteration is depth-first, as in e.g. os.walk.
+
+        :param tree_id: SHA1 of the tree.
+        :yield: Tuples of (path, mode, hexhsa) for objects in a tree.
         """
-        todo = set([(tree, "")])
+        todo = [('', stat.S_IFDIR, tree_id)]
         while todo:
-            (tid, tpath) = todo.pop()
-            tree = self[tid]
-            for name, mode, hexsha in tree.iteritems():
-                path = posixpath.join(tpath, name)
-                if stat.S_ISDIR(mode):
-                    todo.add((hexsha, path))
-                else:
-                    yield path, mode, hexsha
+            path, mode, hexsha = todo.pop()
+            if stat.S_ISDIR(mode):
+                entries = reversed(self[hexsha].iteritems())
+                for name, entry_mode, entry_hexsha in entries:
+                    entry_path = posixpath.join(path, name)
+                    todo.append((entry_path, entry_mode, entry_hexsha))
+            else:
+                yield path, mode, hexsha
 
     def find_missing_objects(self, haves, wants, progress=None,
                              get_tagged=None):
index 15241bae88d196d153142047af6d24810474c786..e53a612dd7b8ed9c85a9348655af9755c91f23d8 100644 (file)
@@ -23,6 +23,9 @@ import os
 import shutil
 import tempfile
 
+from dulwich.index import (
+    commit_tree,
+    )
 from dulwich.objects import (
     Blob,
     )
@@ -75,6 +78,24 @@ class ObjectStoreTests(object):
         r = self.store[testobject.id]
         self.assertEquals(r, testobject)
 
+    def test_iter_tree_contents(self):
+        blob_a = make_object(Blob, data='a')
+        blob_b = make_object(Blob, data='b')
+        blob_c = make_object(Blob, data='c')
+        for blob in [blob_a, blob_b, blob_c]:
+            self.store.add_object(blob)
+
+        blobs = [
+          ('a', blob_a.id, 0100644),
+          ('ad/b', blob_b.id, 0100644),
+          ('ad/bd/c', blob_c.id, 0100755),
+          ('ad/c', blob_c.id, 0100644),
+          ('c', blob_c.id, 0100644),
+          ]
+        tree_id = commit_tree(self.store, blobs)
+        self.assertEquals([(p, m, h) for (p, h, m) in blobs],
+                          list(self.store.iter_tree_contents(tree_id)))
+
 
 class MemoryObjectStoreTests(ObjectStoreTests, TestCase):