1 # object_store.py -- Object store for git objects
2 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # or (at your option) a later version of the License.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 """Git object store interfaces and implementation."""
31 from dulwich.errors import (
34 from dulwich.file import GitFile
35 from dulwich.objects import (
45 from dulwich.pack import (
59 class BaseObjectStore(object):
60 """Object store interface."""
62 def determine_wants_all(self, refs):
63 return [sha for (ref, sha) in refs.iteritems()
64 if not sha in self and not ref.endswith("^{}")]
66 def iter_shas(self, shas):
67 """Iterate over the objects for the specified shas.
69 :param shas: Iterable object with SHAs
70 :return: Object iterator
72 return ObjectStoreIterator(self, shas)
74 def contains_loose(self, sha):
75 """Check if a particular object is present by SHA1 and is loose."""
76 raise NotImplementedError(self.contains_loose)
78 def contains_packed(self, sha):
79 """Check if a particular object is present by SHA1 and is packed."""
80 raise NotImplementedError(self.contains_packed)
82 def __contains__(self, sha):
83 """Check if a particular object is present by SHA1.
85 This method makes no distinction between loose and packed objects.
87 return self.contains_packed(sha) or self.contains_loose(sha)
91 """Iterable of pack objects."""
92 raise NotImplementedError
94 def get_raw(self, name):
95 """Obtain the raw text for an object.
97 :param name: sha for the object.
98 :return: tuple with numeric type and object contents.
100 raise NotImplementedError(self.get_raw)
102 def __getitem__(self, sha):
103 """Obtain an object by SHA1."""
104 type_num, uncomp = self.get_raw(sha)
105 return ShaFile.from_raw_string(type_num, uncomp)
108 """Iterate over the SHAs that are present in this store."""
109 raise NotImplementedError(self.__iter__)
111 def add_object(self, obj):
112 """Add a single object to this object store.
115 raise NotImplementedError(self.add_object)
117 def add_objects(self, objects):
118 """Add a set of objects to this object store.
120 :param objects: Iterable over a list of objects.
122 raise NotImplementedError(self.add_objects)
124 def tree_changes(self, source, target, want_unchanged=False):
125 """Find the differences between the contents of two trees
127 :param object_store: Object store to use for retrieving tree contents
128 :param tree: SHA1 of the root tree
129 :param want_unchanged: Whether unchanged files should be reported
130 :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
132 todo = set([(source, target, "")])
134 (sid, tid, path) = todo.pop()
143 for name, oldmode, oldhexsha in stree.iteritems():
144 oldchildpath = posixpath.join(path, name)
146 (newmode, newhexsha) = ttree[name]
147 newchildpath = oldchildpath
152 if (want_unchanged or oldmode != newmode or
153 oldhexsha != newhexsha):
154 if stat.S_ISDIR(oldmode):
155 if newmode is None or stat.S_ISDIR(newmode):
156 todo.add((oldhexsha, newhexsha, oldchildpath))
158 # entry became a file
159 todo.add((oldhexsha, None, oldchildpath))
160 yield ((None, newchildpath), (None, newmode), (None, newhexsha))
162 if newmode is not None and stat.S_ISDIR(newmode):
164 yield ((oldchildpath, None), (oldmode, None), (oldhexsha, None))
165 todo.add((None, newhexsha, newchildpath))
167 yield ((oldchildpath, newchildpath), (oldmode, newmode), (oldhexsha, newhexsha))
169 for name, newmode, newhexsha in ttree.iteritems():
170 childpath = posixpath.join(path, name)
171 if not name in stree:
172 if not stat.S_ISDIR(newmode):
173 yield ((None, childpath), (None, newmode), (None, newhexsha))
175 todo.add((None, newhexsha, childpath))
177 def iter_tree_contents(self, tree):
178 """Yield (path, mode, hexsha) tuples for all non-Tree objects in a tree.
180 :param tree: SHA1 of the root of the tree
182 todo = set([(tree, "")])
184 (tid, tpath) = todo.pop()
186 for name, mode, hexsha in tree.iteritems():
187 path = posixpath.join(tpath, name)
188 if stat.S_ISDIR(mode):
189 todo.add((hexsha, path))
191 yield path, mode, hexsha
193 def find_missing_objects(self, haves, wants, progress=None,
195 """Find the missing objects required for a set of revisions.
197 :param haves: Iterable over SHAs already in common.
198 :param wants: Iterable over SHAs of objects to fetch.
199 :param progress: Simple progress function that will be called with
200 updated progress strings.
201 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
202 sha for including tags.
203 :return: Iterator over (sha, path) pairs.
205 finder = MissingObjectFinder(self, haves, wants, progress, get_tagged)
206 return iter(finder.next, None)
208 def find_common_revisions(self, graphwalker):
209 """Find which revisions this store has in common using graphwalker.
211 :param graphwalker: A graphwalker object.
212 :return: List of SHAs that are in common
215 sha = graphwalker.next()
220 sha = graphwalker.next()
223 def get_graph_walker(self, heads):
224 """Obtain a graph walker for this object store.
226 :param heads: Local heads to start search with
227 :return: GraphWalker object
229 return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
231 def generate_pack_contents(self, have, want, progress=None):
232 """Iterate over the contents of a pack file.
234 :param have: List of SHA1s of objects that should not be sent
235 :param want: List of SHA1s of objects that should be sent
236 :param progress: Optional progress reporting method
238 return self.iter_shas(self.find_missing_objects(have, want, progress))
241 class PackBasedObjectStore(BaseObjectStore):
244 self._pack_cache = None
246 def contains_packed(self, sha):
247 """Check if a particular object is present by SHA1 and is packed."""
248 for pack in self.packs:
253 def _load_packs(self):
254 raise NotImplementedError(self._load_packs)
256 def _pack_cache_stale(self):
257 """Check whether the pack cache is stale."""
258 raise NotImplementedError(self._pack_cache_stale)
260 def _add_known_pack(self, pack):
261 """Add a newly appeared pack to the cache by path.
264 if self._pack_cache is not None:
265 self._pack_cache.append(pack)
269 """List with pack objects."""
270 if self._pack_cache is None or self._pack_cache_stale():
271 self._pack_cache = self._load_packs()
272 return self._pack_cache
274 def _iter_loose_objects(self):
275 """Iterate over the SHAs of all loose objects."""
276 raise NotImplementedError(self._iter_loose_objects)
278 def _get_loose_object(self, sha):
279 raise NotImplementedError(self._get_loose_object)
281 def _remove_loose_object(self, sha):
282 raise NotImplementedError(self._remove_loose_object)
284 def pack_loose_objects(self):
285 """Pack loose objects.
287 :return: Number of objects packed
290 for sha in self._iter_loose_objects():
291 objects.add((self._get_loose_object(sha), None))
292 self.add_objects(objects)
293 for obj, path in objects:
294 self._remove_loose_object(obj.id)
298 """Iterate over the SHAs that are present in this store."""
299 iterables = self.packs + [self._iter_loose_objects()]
300 return itertools.chain(*iterables)
302 def contains_loose(self, sha):
303 """Check if a particular object is present by SHA1 and is loose."""
304 return self._get_loose_object(sha) is not None
306 def get_raw(self, name):
307 """Obtain the raw text for an object.
309 :param name: sha for the object.
310 :return: tuple with numeric type and object contents.
313 sha = hex_to_sha(name)
315 elif len(name) == 20:
320 for pack in self.packs:
322 return pack.get_raw(sha)
326 hexsha = sha_to_hex(name)
327 ret = self._get_loose_object(hexsha)
329 return ret.type_num, ret.as_raw_string()
330 raise KeyError(hexsha)
332 def add_objects(self, objects):
333 """Add a set of objects to this object store.
335 :param objects: Iterable over objects, should support __len__.
336 :return: Pack object of the objects written.
338 if len(objects) == 0:
339 # Don't bother writing an empty pack file
341 f, commit = self.add_pack()
342 write_pack_data(f, objects, len(objects))
346 class DiskObjectStore(PackBasedObjectStore):
347 """Git-style object store that exists on disk."""
349 def __init__(self, path):
350 """Open an object store.
352 :param path: Path of the object store.
354 super(DiskObjectStore, self).__init__()
356 self.pack_dir = os.path.join(self.path, PACKDIR)
357 self._pack_cache_time = 0
359 def _load_packs(self):
362 self._pack_cache_time = os.stat(self.pack_dir).st_mtime
363 pack_dir_contents = os.listdir(self.pack_dir)
364 for name in pack_dir_contents:
365 # TODO: verify that idx exists first
366 if name.startswith("pack-") and name.endswith(".pack"):
367 filename = os.path.join(self.pack_dir, name)
368 pack_files.append((os.stat(filename).st_mtime, filename))
370 if e.errno == errno.ENOENT:
373 pack_files.sort(reverse=True)
374 suffix_len = len(".pack")
375 return [Pack(f[:-suffix_len]) for _, f in pack_files]
377 def _pack_cache_stale(self):
379 return os.stat(self.pack_dir).st_mtime > self._pack_cache_time
381 if e.errno == errno.ENOENT:
385 def _get_shafile_path(self, sha):
386 # Check from object dir
387 return hex_to_filename(self.path, sha)
389 def _iter_loose_objects(self):
390 for base in os.listdir(self.path):
393 for rest in os.listdir(os.path.join(self.path, base)):
396 def _get_loose_object(self, sha):
397 path = self._get_shafile_path(sha)
399 return ShaFile.from_path(path)
400 except (OSError, IOError), e:
401 if e.errno == errno.ENOENT:
405 def _remove_loose_object(self, sha):
406 os.remove(self._get_shafile_path(sha))
408 def move_in_thin_pack(self, path):
409 """Move a specific file containing a pack into the pack directory.
411 :note: The file should be on the same file system as the
414 :param path: Path to the pack file.
416 data = ThinPackData(self.get_raw, path)
418 # Write index for the thin pack (do we really need this?)
419 temppath = os.path.join(self.pack_dir,
420 sha_to_hex(urllib2.randombytes(20))+".tempidx")
421 data.create_index_v2(temppath)
422 p = Pack.from_objects(data, load_pack_index(temppath))
424 # Write a full pack version
425 temppath = os.path.join(self.pack_dir,
426 sha_to_hex(urllib2.randombytes(20))+".temppack")
427 write_pack(temppath, ((o, None) for o in p.iterobjects()), len(p))
428 pack_sha = load_pack_index(temppath+".idx").objects_sha1()
429 newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
430 os.rename(temppath+".pack", newbasename+".pack")
431 os.rename(temppath+".idx", newbasename+".idx")
432 final_pack = Pack(newbasename)
433 self._add_known_pack(final_pack)
436 def move_in_pack(self, path):
437 """Move a specific file containing a pack into the pack directory.
439 :note: The file should be on the same file system as the
442 :param path: Path to the pack file.
445 entries = p.sorted_entries()
446 basename = os.path.join(self.pack_dir,
447 "pack-%s" % iter_sha1(entry[0] for entry in entries))
448 f = GitFile(basename+".idx", "wb")
450 write_pack_index_v2(f, entries, p.get_stored_checksum())
454 os.rename(path, basename + ".pack")
455 final_pack = Pack(basename)
456 self._add_known_pack(final_pack)
459 def add_thin_pack(self):
460 """Add a new thin pack to this object store.
462 Thin packs are packs that contain deltas with parents that exist
465 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
466 f = os.fdopen(fd, 'wb')
470 if os.path.getsize(path) > 0:
471 return self.move_in_thin_pack(path)
477 """Add a new pack to this object store.
479 :return: Fileobject to write to and a commit function to
480 call when the pack is finished.
482 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
483 f = os.fdopen(fd, 'wb')
487 if os.path.getsize(path) > 0:
488 return self.move_in_pack(path)
493 def add_object(self, obj):
494 """Add a single object to this object store.
496 :param obj: Object to add
498 dir = os.path.join(self.path, obj.id[:2])
502 if e.errno != errno.EEXIST:
504 path = os.path.join(dir, obj.id[2:])
505 if os.path.exists(path):
506 return # Already there, no need to write again
507 f = GitFile(path, 'wb')
509 f.write(obj.as_legacy_object())
518 if e.errno != errno.EEXIST:
520 os.mkdir(os.path.join(path, "info"))
521 os.mkdir(os.path.join(path, PACKDIR))
525 class MemoryObjectStore(BaseObjectStore):
526 """Object store that keeps all objects in memory."""
529 super(MemoryObjectStore, self).__init__()
532 def contains_loose(self, sha):
533 """Check if a particular object is present by SHA1 and is loose."""
534 return sha in self._data
536 def contains_packed(self, sha):
537 """Check if a particular object is present by SHA1 and is packed."""
541 """Iterate over the SHAs that are present in this store."""
542 return self._data.iterkeys()
546 """List with pack objects."""
549 def get_raw(self, name):
550 """Obtain the raw text for an object.
552 :param name: sha for the object.
553 :return: tuple with numeric type and object contents.
555 return self[name].as_raw_string()
557 def __getitem__(self, name):
558 return self._data[name]
560 def add_object(self, obj):
561 """Add a single object to this object store.
564 self._data[obj.id] = obj
566 def add_objects(self, objects):
567 """Add a set of objects to this object store.
569 :param objects: Iterable over a list of objects.
571 for obj, path in objects:
572 self._data[obj.id] = obj
575 class ObjectImporter(object):
576 """Interface for importing objects."""
578 def __init__(self, count):
579 """Create a new ObjectImporter.
581 :param count: Number of objects that's going to be imported.
585 def add_object(self, object):
587 raise NotImplementedError(self.add_object)
589 def finish(self, object):
590 """Finish the imoprt and write objects to disk."""
591 raise NotImplementedError(self.finish)
594 class ObjectIterator(object):
595 """Interface for iterating over objects."""
597 def iterobjects(self):
598 raise NotImplementedError(self.iterobjects)
601 class ObjectStoreIterator(ObjectIterator):
602 """ObjectIterator that works on top of an ObjectStore."""
604 def __init__(self, store, sha_iter):
605 """Create a new ObjectIterator.
607 :param store: Object store to retrieve from
608 :param sha_iter: Iterator over (sha, path) tuples
611 self.sha_iter = sha_iter
615 """Yield tuple with next object and path."""
616 for sha, path in self.itershas():
617 yield self.store[sha], path
619 def iterobjects(self):
620 """Iterate over just the objects."""
625 """Iterate over the SHAs."""
626 for sha in self._shas:
628 for sha in self.sha_iter:
629 self._shas.append(sha)
632 def __contains__(self, needle):
633 """Check if an object is present.
635 :note: This checks if the object is present in
636 the underlying object store, not if it would
637 be yielded by the iterator.
639 :param needle: SHA1 of the object to check for
641 return needle in self.store
643 def __getitem__(self, key):
644 """Find an object by SHA1.
646 :note: This retrieves the object from the underlying
647 object store. It will also succeed if the object would
648 not be returned by the iterator.
650 return self.store[key]
653 """Return the number of objects."""
654 return len(list(self.itershas()))
657 def tree_lookup_path(lookup_obj, root_sha, path):
658 """Lookup an object in a Git tree.
660 :param lookup_obj: Callback for retrieving object by SHA1
661 :param root_sha: SHA1 of the root tree
662 :param path: Path to lookup
664 parts = path.split("/")
668 obj = lookup_obj(sha)
669 if not isinstance(obj, Tree):
670 raise NotTreeError(sha)
677 class MissingObjectFinder(object):
678 """Find the objects missing from another object store.
680 :param object_store: Object store containing at least all objects to be
682 :param haves: SHA1s of commits not to send (already present in target)
683 :param wants: SHA1s of commits to send
684 :param progress: Optional function to report progress to.
685 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
686 sha for including tags.
687 :param tagged: dict of pointed-to sha -> tag sha for including tags
690 def __init__(self, object_store, haves, wants, progress=None,
692 self.sha_done = set(haves)
693 self.objects_to_send = set([(w, None, False) for w in wants if w not in haves])
694 self.object_store = object_store
696 self.progress = lambda x: None
698 self.progress = progress
699 self._tagged = get_tagged and get_tagged() or {}
701 def add_todo(self, entries):
702 self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
704 def parse_tree(self, tree):
705 self.add_todo([(sha, name, not stat.S_ISDIR(mode)) for (mode, name, sha) in tree.entries() if not S_ISGITLINK(mode)])
707 def parse_commit(self, commit):
708 self.add_todo([(commit.tree, "", False)])
709 self.add_todo([(p, None, False) for p in commit.parents])
711 def parse_tag(self, tag):
712 self.add_todo([(tag.object[1], None, False)])
715 if not self.objects_to_send:
717 (sha, name, leaf) = self.objects_to_send.pop()
719 o = self.object_store[sha]
720 if isinstance(o, Commit):
722 elif isinstance(o, Tree):
724 elif isinstance(o, Tag):
726 if sha in self._tagged:
727 self.add_todo([(self._tagged[sha], None, True)])
728 self.sha_done.add(sha)
729 self.progress("counting objects: %d\r" % len(self.sha_done))
733 class ObjectStoreGraphWalker(object):
734 """Graph walker that finds what commits are missing from an object store.
736 :ivar heads: Revisions without descendants in the local repo
737 :ivar get_parents: Function to retrieve parents in the local repo
740 def __init__(self, local_heads, get_parents):
741 """Create a new instance.
743 :param local_heads: Heads to start search with
744 :param get_parents: Function for finding the parents of a SHA1.
746 self.heads = set(local_heads)
747 self.get_parents = get_parents
751 """Ack that a revision and its ancestors are present in the source."""
752 ancestors = set([sha])
754 # stop if we run out of heads to remove
760 # collect all ancestors
761 new_ancestors = set()
763 if a in self.parents:
764 new_ancestors.update(self.parents[a])
766 # no more ancestors; stop
767 if not new_ancestors:
770 ancestors = new_ancestors
773 """Iterate over ancestors of heads in the target."""
775 ret = self.heads.pop()
776 ps = self.get_parents(ret)
777 self.parents[ret] = ps
778 self.heads.update(ps)