198ebc751a6827db1753f275055f52382704c60a
[jelmer/dulwich-libgit2.git] / dulwich / fastexport.py
1 # __init__.py -- Fast export/import functionality
2 # Copyright (C) 2010 Jelmer Vernooij <jelmer@samba.org>
3 #
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; version 2
7 # of the License or (at your option) any later version of
8 # the License.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18 # MA  02110-1301, USA.
19
20
21 """Fast export/import functionality."""
22
23 from dulwich.index import (
24     commit_tree,
25     )
26 from dulwich.objects import (
27     Blob,
28     Commit,
29     format_timezone,
30     parse_timezone,
31     )
32
33 import stat
34
35 class FastExporter(object):
36     """Generate a fast-export output stream for Git objects."""
37
38     def __init__(self, outf, store):
39         self.outf = outf
40         self.store = store
41         self.markers = {}
42         self._marker_idx = 0
43
44     def _allocate_marker(self):
45         self._marker_idx+=1
46         return self._marker_idx
47
48     def _dump_blob(self, blob, marker):
49         self.outf.write("blob\nmark :%s\n" % marker)
50         self.outf.write("data %s\n" % blob.raw_length())
51         for chunk in blob.as_raw_chunks():
52             self.outf.write(chunk)
53         self.outf.write("\n")
54
55     def export_blob(self, blob):
56         i = self._allocate_marker()
57         self.markers[i] = blob.id
58         self._dump_blob(blob, i)
59         return i
60
61     def _dump_commit(self, commit, marker, ref, file_changes):
62         self.outf.write("commit %s\n" % ref)
63         self.outf.write("mark :%s\n" % marker)
64         self.outf.write("author %s %s %s\n" % (commit.author,
65             commit.author_time, format_timezone(commit.author_timezone)))
66         self.outf.write("committer %s %s %s\n" % (commit.committer,
67             commit.commit_time, format_timezone(commit.commit_timezone)))
68         self.outf.write("data %s\n" % len(commit.message))
69         self.outf.write(commit.message)
70         self.outf.write("\n")
71         self.outf.write('\n'.join(file_changes))
72         self.outf.write("\n\n")
73
74     def export_commit(self, commit, ref, base_tree=None):
75         file_changes = []
76         for (old_path, new_path), (old_mode, new_mode), (old_hexsha, new_hexsha) in \
77                 self.store.tree_changes(base_tree, commit.tree):
78             if new_path is None:
79                 file_changes.append("D %s" % old_path)
80                 continue
81             if not stat.S_ISDIR(new_mode):
82                 marker = self.export_blob(self.store[new_hexsha])
83             file_changes.append("M %o :%s %s" % (new_mode, marker, new_path))
84
85         i = self._allocate_marker()
86         self._dump_commit(commit, i, ref, file_changes)
87         return i
88
89
90 class FastImporter(object):
91     """Class for importing fastimport streams.
92
93     Please note that this is mostly a stub implementation at the moment,
94     doing the bare mimimum.
95     """
96
97     def __init__(self, repo):
98         self.repo = repo
99
100     def _parse_person(self, line):
101         (name, timestr, timezonestr) = line.rsplit(" ", 2)
102         return name, int(timestr), parse_timezone(timezonestr)[0]
103
104     def _read_blob(self, stream):
105         line = stream.readline()
106         if line.startswith("mark :"):
107             mark = line[len("mark :"):-1]
108             line = stream.readline()
109         else:
110             mark = None
111         if not line.startswith("data "):
112             raise ValueError("Blob without valid data line: %s" % line)
113         size = int(line[len("data "):])
114         o = Blob()
115         o.data = stream.read(size)
116         stream.readline()
117         self.repo.object_store.add_object(o)
118         return mark, o.id
119
120     def _read_commit(self, stream, contents, marks):
121         line = stream.readline()
122         if line.startswith("mark :"):
123             mark = line[len("mark :"):-1]
124             line = stream.readline()
125         else:
126             mark = None
127         o = Commit()
128         o.author = None
129         o.author_time = None
130         while line.startswith("author "):
131             (o.author, o.author_time, o.author_timezone) = \
132                     self._parse_person(line[len("author "):-1])
133             line = stream.readline()
134         while line.startswith("committer "):
135             (o.committer, o.commit_time, o.commit_timezone) = \
136                     self._parse_person(line[len("committer "):-1])
137             line = stream.readline()
138         if o.author is None:
139             o.author = o.committer
140         if o.author_time is None:
141             o.author_time = o.commit_time
142             o.author_timezone = o.commit_timezone
143         if not line.startswith("data "):
144             raise ValueError("Blob without valid data line: %s" % line)
145         size = int(line[len("data "):])
146         o.message = stream.read(size)
147         stream.readline()
148         line = stream.readline()[:-1]
149         while line:
150             if line.startswith("M "):
151                 (kind, modestr, val, path) = line.split(" ")
152                 if val[0] == ":":
153                     val = marks[int(val[1:])]
154                 contents[path] = (int(modestr, 8), val)
155             else:
156                 raise ValueError(line)
157             line = stream.readline()[:-1]
158         try:
159             o.parents = (self.repo.head(),)
160         except KeyError:
161             o.parents = ()
162         o.tree = commit_tree(self.repo.object_store,
163             ((path, hexsha, mode) for (path, (mode, hexsha)) in
164                 contents.iteritems()))
165         self.repo.object_store.add_object(o)
166         return mark, o.id
167
168     def import_stream(self, stream):
169         """Import from a file-like object.
170
171         :param stream: File-like object to read a fastimport stream from.
172         :return: Dictionary with marks
173         """
174         contents = {}
175         marks = {}
176         while True:
177             line = stream.readline()
178             if not line:
179                 break
180             line = line[:-1]
181             if line == "" or line[0] == "#":
182                 continue
183             if line.startswith("blob"):
184                 mark, hexsha = self._read_blob(stream)
185                 if mark is not None:
186                     marks[int(mark)] = hexsha
187             elif line.startswith("commit "):
188                 ref = line[len("commit "):-1]
189                 mark, hexsha = self._read_commit(stream, contents, marks)
190                 if mark is not None:
191                     marks[int(mark)] = hexsha
192                 self.repo.refs["HEAD"] = self.repo.refs[ref] = hexsha
193             else:
194                 raise ValueError("invalid command '%s'" % line)
195         return marks