1 # test_diff_tree.py -- Tests for file and tree diff utilities.
2 # Copyright (C) 2010 Google, Inc.
4 # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
5 # General Public License as public by the Free Software Foundation; version 2.0
6 # or (at your option) any later version. You can redistribute it and/or
7 # modify it under the terms of either of these two licenses.
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # You should have received a copy of the licenses; if not, see
16 # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
17 # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
18 # License, Version 2.0.
21 """Tests for file and tree diff utilities."""
23 from itertools import permutations
24 from dulwich.diff_tree import (
33 tree_changes_for_merge,
42 from dulwich.index import (
45 from dulwich.object_store import (
48 from dulwich.objects import (
54 from dulwich.tests import (
57 from dulwich.tests.utils import (
65 class DiffTestCase(TestCase):
68 super(DiffTestCase, self).setUp()
69 self.store = MemoryObjectStore()
70 self.empty_tree = self.commit_tree([])
72 def commit_tree(self, entries):
79 path, obj, mode = entry
80 if isinstance(obj, Blob):
81 self.store.add_object(obj)
85 commit_blobs.append((path, sha, mode))
86 return self.store[commit_tree(self.store, commit_blobs)]
89 class TreeChangesTest(DiffTestCase):
92 super(TreeChangesTest, self).setUp()
93 self.detector = RenameDetector(self.store)
95 def assertMergeFails(self, merge_entries, name, mode, sha):
98 self.assertRaises((TypeError, ValueError), merge_entries, '', t, t)
100 def _do_test_merge_entries(self, merge_entries):
101 blob_a1 = make_object(Blob, data=b'a1')
102 blob_a2 = make_object(Blob, data=b'a2')
103 blob_b1 = make_object(Blob, data=b'b1')
104 blob_c2 = make_object(Blob, data=b'c2')
105 tree1 = self.commit_tree([(b'a', blob_a1, 0o100644),
106 (b'b', blob_b1, 0o100755)])
107 tree2 = self.commit_tree([(b'a', blob_a2, 0o100644),
108 (b'c', blob_c2, 0o100755)])
110 self.assertEqual([], merge_entries(b'', self.empty_tree,
113 [((None, None, None), (b'a', 0o100644, blob_a1.id)),
114 ((None, None, None), (b'b', 0o100755, blob_b1.id)), ],
115 merge_entries(b'', self.empty_tree, tree1))
117 [((None, None, None), (b'x/a', 0o100644, blob_a1.id)),
118 ((None, None, None), (b'x/b', 0o100755, blob_b1.id)), ],
119 merge_entries(b'x', self.empty_tree, tree1))
122 [((b'a', 0o100644, blob_a2.id), (None, None, None)),
123 ((b'c', 0o100755, blob_c2.id), (None, None, None)), ],
124 merge_entries(b'', tree2, self.empty_tree))
127 [((b'a', 0o100644, blob_a1.id), (b'a', 0o100644, blob_a2.id)),
128 ((b'b', 0o100755, blob_b1.id), (None, None, None)),
129 ((None, None, None), (b'c', 0o100755, blob_c2.id)), ],
130 merge_entries(b'', tree1, tree2))
133 [((b'a', 0o100644, blob_a2.id), (b'a', 0o100644, blob_a1.id)),
134 ((None, None, None), (b'b', 0o100755, blob_b1.id)),
135 ((b'c', 0o100755, blob_c2.id), (None, None, None)), ],
136 merge_entries(b'', tree2, tree1))
138 self.assertMergeFails(merge_entries, 0xdeadbeef, 0o100644, '1' * 40)
139 self.assertMergeFails(merge_entries, b'a', b'deadbeef', '1' * 40)
140 self.assertMergeFails(merge_entries, b'a', 0o100644, 0xdeadbeef)
142 test_merge_entries = functest_builder(_do_test_merge_entries,
144 test_merge_entries_extension = ext_functest_builder(_do_test_merge_entries,
147 def _do_test_is_tree(self, is_tree):
148 self.assertFalse(is_tree(TreeEntry(None, None, None)))
149 self.assertFalse(is_tree(TreeEntry(b'a', 0o100644, b'a' * 40)))
150 self.assertFalse(is_tree(TreeEntry(b'a', 0o100755, b'a' * 40)))
151 self.assertFalse(is_tree(TreeEntry(b'a', 0o120000, b'a' * 40)))
152 self.assertTrue(is_tree(TreeEntry(b'a', 0o040000, b'a' * 40)))
153 self.assertRaises(TypeError, is_tree, TreeEntry(b'a', b'x', b'a' * 40))
154 self.assertRaises(AttributeError, is_tree, 1234)
156 test_is_tree = functest_builder(_do_test_is_tree, _is_tree_py)
157 test_is_tree_extension = ext_functest_builder(_do_test_is_tree, _is_tree)
159 def assertChangesEqual(self, expected, tree1, tree2, **kwargs):
160 actual = list(tree_changes(self.store, tree1.id, tree2.id, **kwargs))
161 self.assertEqual(expected, actual)
163 # For brevity, the following tests use tuples instead of TreeEntry objects.
165 def test_tree_changes_empty(self):
166 self.assertChangesEqual([], self.empty_tree, self.empty_tree)
168 def test_tree_changes_no_changes(self):
169 blob = make_object(Blob, data=b'blob')
170 tree = self.commit_tree([(b'a', blob), (b'b/c', blob)])
171 self.assertChangesEqual([], self.empty_tree, self.empty_tree)
172 self.assertChangesEqual([], tree, tree)
173 self.assertChangesEqual(
174 [TreeChange(CHANGE_UNCHANGED, (b'a', F, blob.id),
176 TreeChange(CHANGE_UNCHANGED, (b'b/c', F, blob.id),
177 (b'b/c', F, blob.id))],
178 tree, tree, want_unchanged=True)
180 def test_tree_changes_add_delete(self):
181 blob_a = make_object(Blob, data=b'a')
182 blob_b = make_object(Blob, data=b'b')
183 tree = self.commit_tree([(b'a', blob_a, 0o100644),
184 (b'x/b', blob_b, 0o100755)])
185 self.assertChangesEqual(
186 [TreeChange.add((b'a', 0o100644, blob_a.id)),
187 TreeChange.add((b'x/b', 0o100755, blob_b.id))],
188 self.empty_tree, tree)
189 self.assertChangesEqual(
190 [TreeChange.delete((b'a', 0o100644, blob_a.id)),
191 TreeChange.delete((b'x/b', 0o100755, blob_b.id))],
192 tree, self.empty_tree)
194 def test_tree_changes_modify_contents(self):
195 blob_a1 = make_object(Blob, data=b'a1')
196 blob_a2 = make_object(Blob, data=b'a2')
197 tree1 = self.commit_tree([(b'a', blob_a1)])
198 tree2 = self.commit_tree([(b'a', blob_a2)])
199 self.assertChangesEqual(
200 [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id),
201 (b'a', F, blob_a2.id))],
204 def test_tree_changes_modify_mode(self):
205 blob_a = make_object(Blob, data=b'a')
206 tree1 = self.commit_tree([(b'a', blob_a, 0o100644)])
207 tree2 = self.commit_tree([(b'a', blob_a, 0o100755)])
208 self.assertChangesEqual(
209 [TreeChange(CHANGE_MODIFY, (b'a', 0o100644, blob_a.id),
210 (b'a', 0o100755, blob_a.id))],
213 def test_tree_changes_change_type(self):
214 blob_a1 = make_object(Blob, data=b'a')
215 blob_a2 = make_object(Blob, data=b'/foo/bar')
216 tree1 = self.commit_tree([(b'a', blob_a1, 0o100644)])
217 tree2 = self.commit_tree([(b'a', blob_a2, 0o120000)])
218 self.assertChangesEqual(
219 [TreeChange.delete((b'a', 0o100644, blob_a1.id)),
220 TreeChange.add((b'a', 0o120000, blob_a2.id))],
223 def test_tree_changes_change_type_same(self):
224 blob_a1 = make_object(Blob, data=b'a')
225 blob_a2 = make_object(Blob, data=b'/foo/bar')
226 tree1 = self.commit_tree([(b'a', blob_a1, 0o100644)])
227 tree2 = self.commit_tree([(b'a', blob_a2, 0o120000)])
228 self.assertChangesEqual(
229 [TreeChange(CHANGE_MODIFY, (b'a', 0o100644, blob_a1.id),
230 (b'a', 0o120000, blob_a2.id))],
231 tree1, tree2, change_type_same=True)
233 def test_tree_changes_to_tree(self):
234 blob_a = make_object(Blob, data=b'a')
235 blob_x = make_object(Blob, data=b'x')
236 tree1 = self.commit_tree([(b'a', blob_a)])
237 tree2 = self.commit_tree([(b'a/x', blob_x)])
238 self.assertChangesEqual(
239 [TreeChange.delete((b'a', F, blob_a.id)),
240 TreeChange.add((b'a/x', F, blob_x.id))],
243 def test_tree_changes_complex(self):
244 blob_a_1 = make_object(Blob, data=b'a1_1')
245 blob_bx1_1 = make_object(Blob, data=b'bx1_1')
246 blob_bx2_1 = make_object(Blob, data=b'bx2_1')
247 blob_by1_1 = make_object(Blob, data=b'by1_1')
248 blob_by2_1 = make_object(Blob, data=b'by2_1')
249 tree1 = self.commit_tree([
251 (b'b/x/1', blob_bx1_1),
252 (b'b/x/2', blob_bx2_1),
253 (b'b/y/1', blob_by1_1),
254 (b'b/y/2', blob_by2_1),
257 blob_a_2 = make_object(Blob, data=b'a1_2')
258 blob_bx1_2 = blob_bx1_1
259 blob_by_2 = make_object(Blob, data=b'by_2')
260 blob_c_2 = make_object(Blob, data=b'c_2')
261 tree2 = self.commit_tree([
263 (b'b/x/1', blob_bx1_2),
268 self.assertChangesEqual(
269 [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a_1.id),
270 (b'a', F, blob_a_2.id)),
271 TreeChange.delete((b'b/x/2', F, blob_bx2_1.id)),
272 TreeChange.add((b'b/y', F, blob_by_2.id)),
273 TreeChange.delete((b'b/y/1', F, blob_by1_1.id)),
274 TreeChange.delete((b'b/y/2', F, blob_by2_1.id)),
275 TreeChange.add((b'c', F, blob_c_2.id))],
278 def test_tree_changes_name_order(self):
279 blob = make_object(Blob, data=b'a')
280 tree1 = self.commit_tree([(b'a', blob), (b'a.', blob), (b'a..', blob)])
281 # Tree order is the reverse of this, so if we used tree order, 'a..'
282 # would not be merged.
283 tree2 = self.commit_tree(
284 [(b'a/x', blob), (b'a./x', blob), (b'a..', blob)])
286 self.assertChangesEqual(
287 [TreeChange.delete((b'a', F, blob.id)),
288 TreeChange.add((b'a/x', F, blob.id)),
289 TreeChange.delete((b'a.', F, blob.id)),
290 TreeChange.add((b'a./x', F, blob.id))],
293 def test_tree_changes_prune(self):
294 blob_a1 = make_object(Blob, data=b'a1')
295 blob_a2 = make_object(Blob, data=b'a2')
296 blob_x = make_object(Blob, data=b'x')
297 tree1 = self.commit_tree([(b'a', blob_a1), (b'b/x', blob_x)])
298 tree2 = self.commit_tree([(b'a', blob_a2), (b'b/x', blob_x)])
299 # Remove identical items so lookups will fail unless we prune.
300 subtree = self.store[tree1[b'b'][1]]
301 for entry in subtree.items():
302 del self.store[entry.sha]
303 del self.store[subtree.id]
305 self.assertChangesEqual(
306 [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id),
307 (b'a', F, blob_a2.id))],
310 def test_tree_changes_rename_detector(self):
311 blob_a1 = make_object(Blob, data=b'a\nb\nc\nd\n')
312 blob_a2 = make_object(Blob, data=b'a\nb\nc\ne\n')
313 blob_b = make_object(Blob, data=b'b')
314 tree1 = self.commit_tree([(b'a', blob_a1), (b'b', blob_b)])
315 tree2 = self.commit_tree([(b'c', blob_a2), (b'b', blob_b)])
316 detector = RenameDetector(self.store)
318 self.assertChangesEqual(
319 [TreeChange.delete((b'a', F, blob_a1.id)),
320 TreeChange.add((b'c', F, blob_a2.id))],
322 self.assertChangesEqual(
323 [TreeChange.delete((b'a', F, blob_a1.id)),
324 TreeChange(CHANGE_UNCHANGED, (b'b', F, blob_b.id),
325 (b'b', F, blob_b.id)),
326 TreeChange.add((b'c', F, blob_a2.id))],
327 tree1, tree2, want_unchanged=True)
328 self.assertChangesEqual(
329 [TreeChange(CHANGE_RENAME, (b'a', F, blob_a1.id),
330 (b'c', F, blob_a2.id))],
331 tree1, tree2, rename_detector=detector)
332 self.assertChangesEqual(
333 [TreeChange(CHANGE_RENAME, (b'a', F, blob_a1.id),
334 (b'c', F, blob_a2.id)),
335 TreeChange(CHANGE_UNCHANGED, (b'b', F, blob_b.id),
336 (b'b', F, blob_b.id))],
337 tree1, tree2, rename_detector=detector, want_unchanged=True)
339 def assertChangesForMergeEqual(self, expected, parent_trees, merge_tree,
341 parent_tree_ids = [t.id for t in parent_trees]
342 actual = list(tree_changes_for_merge(
343 self.store, parent_tree_ids, merge_tree.id, **kwargs))
344 self.assertEqual(expected, actual)
346 parent_tree_ids.reverse()
347 expected = [list(reversed(cs)) for cs in expected]
348 actual = list(tree_changes_for_merge(
349 self.store, parent_tree_ids, merge_tree.id, **kwargs))
350 self.assertEqual(expected, actual)
352 def test_tree_changes_for_merge_add_no_conflict(self):
353 blob = make_object(Blob, data=b'blob')
354 parent1 = self.commit_tree([])
355 parent2 = merge = self.commit_tree([(b'a', blob)])
356 self.assertChangesForMergeEqual([], [parent1, parent2], merge)
357 self.assertChangesForMergeEqual([], [parent2, parent2], merge)
359 def test_tree_changes_for_merge_add_modify_conflict(self):
360 blob1 = make_object(Blob, data=b'1')
361 blob2 = make_object(Blob, data=b'2')
362 parent1 = self.commit_tree([])
363 parent2 = self.commit_tree([(b'a', blob1)])
364 merge = self.commit_tree([(b'a', blob2)])
365 self.assertChangesForMergeEqual(
366 [[TreeChange.add((b'a', F, blob2.id)),
367 TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id),
368 (b'a', F, blob2.id))]],
369 [parent1, parent2], merge)
371 def test_tree_changes_for_merge_modify_modify_conflict(self):
372 blob1 = make_object(Blob, data=b'1')
373 blob2 = make_object(Blob, data=b'2')
374 blob3 = make_object(Blob, data=b'3')
375 parent1 = self.commit_tree([(b'a', blob1)])
376 parent2 = self.commit_tree([(b'a', blob2)])
377 merge = self.commit_tree([(b'a', blob3)])
378 self.assertChangesForMergeEqual(
379 [[TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id),
380 (b'a', F, blob3.id)),
381 TreeChange(CHANGE_MODIFY, (b'a', F, blob2.id),
382 (b'a', F, blob3.id))]],
383 [parent1, parent2], merge)
385 def test_tree_changes_for_merge_modify_no_conflict(self):
386 blob1 = make_object(Blob, data=b'1')
387 blob2 = make_object(Blob, data=b'2')
388 parent1 = self.commit_tree([(b'a', blob1)])
389 parent2 = merge = self.commit_tree([(b'a', blob2)])
390 self.assertChangesForMergeEqual([], [parent1, parent2], merge)
392 def test_tree_changes_for_merge_delete_delete_conflict(self):
393 blob1 = make_object(Blob, data=b'1')
394 blob2 = make_object(Blob, data=b'2')
395 parent1 = self.commit_tree([(b'a', blob1)])
396 parent2 = self.commit_tree([(b'a', blob2)])
397 merge = self.commit_tree([])
398 self.assertChangesForMergeEqual(
399 [[TreeChange.delete((b'a', F, blob1.id)),
400 TreeChange.delete((b'a', F, blob2.id))]],
401 [parent1, parent2], merge)
403 def test_tree_changes_for_merge_delete_no_conflict(self):
404 blob = make_object(Blob, data=b'blob')
405 has = self.commit_tree([(b'a', blob)])
406 doesnt_have = self.commit_tree([])
407 self.assertChangesForMergeEqual([], [has, has], doesnt_have)
408 self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have)
410 def test_tree_changes_for_merge_octopus_no_conflict(self):
412 blobs = [make_object(Blob, data=bytes(i)) for i in r]
413 parents = [self.commit_tree([(b'a', blobs[i])]) for i in r]
415 # Take the SHA from each of the parents.
416 self.assertChangesForMergeEqual([], parents, parents[i])
418 def test_tree_changes_for_merge_octopus_modify_conflict(self):
419 # Because the octopus merge strategy is limited, I doubt it's possible
420 # to create this with the git command line. But the output is well-
421 # defined, so test it anyway.
423 parent_blobs = [make_object(Blob, data=bytes(i)) for i in r]
424 merge_blob = make_object(Blob, data=b'merge')
425 parents = [self.commit_tree([(b'a', parent_blobs[i])]) for i in r]
426 merge = self.commit_tree([(b'a', merge_blob)])
427 expected = [[TreeChange(CHANGE_MODIFY, (b'a', F, parent_blobs[i].id),
428 (b'a', F, merge_blob.id)) for i in r]]
429 self.assertChangesForMergeEqual(expected, parents, merge)
431 def test_tree_changes_for_merge_octopus_delete(self):
432 blob1 = make_object(Blob, data=b'1')
433 blob2 = make_object(Blob, data=b'3')
434 parent1 = self.commit_tree([(b'a', blob1)])
435 parent2 = self.commit_tree([(b'a', blob2)])
436 parent3 = merge = self.commit_tree([])
437 self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge)
438 self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge)
439 self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge)
440 self.assertChangesForMergeEqual(
441 [[TreeChange.delete((b'a', F, blob1.id)),
442 TreeChange.delete((b'a', F, blob2.id)),
444 [parent1, parent2, parent3], merge)
446 def test_tree_changes_for_merge_add_add_same_conflict(self):
447 blob = make_object(Blob, data=b'a\nb\nc\nd\n')
448 parent1 = self.commit_tree([(b'a', blob)])
449 parent2 = self.commit_tree([])
450 merge = self.commit_tree([(b'b', blob)])
451 add = TreeChange.add((b'b', F, blob.id))
452 self.assertChangesForMergeEqual(
453 [[add, add]], [parent1, parent2], merge)
455 def test_tree_changes_for_merge_add_exact_rename_conflict(self):
456 blob = make_object(Blob, data=b'a\nb\nc\nd\n')
457 parent1 = self.commit_tree([(b'a', blob)])
458 parent2 = self.commit_tree([])
459 merge = self.commit_tree([(b'b', blob)])
460 self.assertChangesForMergeEqual(
461 [[TreeChange(CHANGE_RENAME, (b'a', F, blob.id),
463 TreeChange.add((b'b', F, blob.id))]],
464 [parent1, parent2], merge, rename_detector=self.detector)
466 def test_tree_changes_for_merge_add_content_rename_conflict(self):
467 blob1 = make_object(Blob, data=b'a\nb\nc\nd\n')
468 blob2 = make_object(Blob, data=b'a\nb\nc\ne\n')
469 parent1 = self.commit_tree([(b'a', blob1)])
470 parent2 = self.commit_tree([])
471 merge = self.commit_tree([(b'b', blob2)])
472 self.assertChangesForMergeEqual(
473 [[TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
474 (b'b', F, blob2.id)),
475 TreeChange.add((b'b', F, blob2.id))]],
476 [parent1, parent2], merge, rename_detector=self.detector)
478 def test_tree_changes_for_merge_modify_rename_conflict(self):
479 blob1 = make_object(Blob, data=b'a\nb\nc\nd\n')
480 blob2 = make_object(Blob, data=b'a\nb\nc\ne\n')
481 parent1 = self.commit_tree([(b'a', blob1)])
482 parent2 = self.commit_tree([(b'b', blob1)])
483 merge = self.commit_tree([(b'b', blob2)])
484 self.assertChangesForMergeEqual(
485 [[TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
486 (b'b', F, blob2.id)),
487 TreeChange(CHANGE_MODIFY, (b'b', F, blob1.id),
488 (b'b', F, blob2.id))]],
489 [parent1, parent2], merge, rename_detector=self.detector)
492 class RenameDetectionTest(DiffTestCase):
494 def _do_test_count_blocks(self, count_blocks):
495 blob = make_object(Blob, data=b'a\nb\na\n')
496 self.assertEqual({hash(b'a\n'): 4, hash(b'b\n'): 2},
499 test_count_blocks = functest_builder(_do_test_count_blocks,
501 test_count_blocks_extension = ext_functest_builder(_do_test_count_blocks,
504 def _do_test_count_blocks_no_newline(self, count_blocks):
505 blob = make_object(Blob, data=b'a\na')
506 self.assertEqual({hash(b'a\n'): 2, hash(b'a'): 1}, _count_blocks(blob))
508 test_count_blocks_no_newline = functest_builder(
509 _do_test_count_blocks_no_newline, _count_blocks_py)
510 test_count_blocks_no_newline_extension = ext_functest_builder(
511 _do_test_count_blocks_no_newline, _count_blocks)
513 def _do_test_count_blocks_chunks(self, count_blocks):
514 blob = ShaFile.from_raw_chunks(Blob.type_num, [b'a\nb', b'\na\n'])
515 self.assertEqual({hash(b'a\n'): 4, hash(b'b\n'): 2},
518 test_count_blocks_chunks = functest_builder(_do_test_count_blocks_chunks,
520 test_count_blocks_chunks_extension = ext_functest_builder(
521 _do_test_count_blocks_chunks, _count_blocks)
523 def _do_test_count_blocks_long_lines(self, count_blocks):
525 data = a + b'xxx\ny\n' + a + b'zzz\n'
526 blob = make_object(Blob, data=data)
527 self.assertEqual({hash(b'a' * 64): 128, hash(b'xxx\n'): 4,
528 hash(b'y\n'): 2, hash(b'zzz\n'): 4},
531 test_count_blocks_long_lines = functest_builder(
532 _do_test_count_blocks_long_lines, _count_blocks_py)
533 test_count_blocks_long_lines_extension = ext_functest_builder(
534 _do_test_count_blocks_long_lines, _count_blocks)
536 def assertSimilar(self, expected_score, blob1, blob2):
537 self.assertEqual(expected_score, _similarity_score(blob1, blob2))
538 self.assertEqual(expected_score, _similarity_score(blob2, blob1))
540 def test_similarity_score(self):
541 blob0 = make_object(Blob, data=b'')
542 blob1 = make_object(Blob, data=b'ab\ncd\ncd\n')
543 blob2 = make_object(Blob, data=b'ab\n')
544 blob3 = make_object(Blob, data=b'cd\n')
545 blob4 = make_object(Blob, data=b'cd\ncd\n')
547 self.assertSimilar(100, blob0, blob0)
548 self.assertSimilar(0, blob0, blob1)
549 self.assertSimilar(33, blob1, blob2)
550 self.assertSimilar(33, blob1, blob3)
551 self.assertSimilar(66, blob1, blob4)
552 self.assertSimilar(0, blob2, blob3)
553 self.assertSimilar(50, blob3, blob4)
555 def test_similarity_score_cache(self):
556 blob1 = make_object(Blob, data=b'ab\ncd\n')
557 blob2 = make_object(Blob, data=b'ab\n')
561 50, _similarity_score(blob1, blob2, block_cache=block_cache))
562 self.assertEqual(set([blob1.id, blob2.id]), set(block_cache))
565 self.fail('Unexpected call to as_raw_chunks()')
567 blob1.as_raw_chunks = blob2.as_raw_chunks = fail_chunks
568 blob1.raw_length = lambda: 6
569 blob2.raw_length = lambda: 3
571 50, _similarity_score(blob1, blob2, block_cache=block_cache))
573 def test_tree_entry_sort(self):
576 TreeChange.add(TreeEntry(b'aaa', F, sha)),
577 TreeChange(CHANGE_COPY, TreeEntry(b'bbb', F, sha),
578 TreeEntry(b'aab', F, sha)),
579 TreeChange(CHANGE_MODIFY, TreeEntry(b'bbb', F, sha),
580 TreeEntry(b'bbb', F, b'dabc' * 10)),
581 TreeChange(CHANGE_RENAME, TreeEntry(b'bbc', F, sha),
582 TreeEntry(b'ddd', F, sha)),
583 TreeChange.delete(TreeEntry(b'ccc', F, sha)),
586 for perm in permutations(expected_entries):
587 self.assertEqual(expected_entries,
588 sorted(perm, key=_tree_change_key))
590 def detect_renames(self, tree1, tree2, want_unchanged=False, **kwargs):
591 detector = RenameDetector(self.store, **kwargs)
592 return detector.changes_with_renames(tree1.id, tree2.id,
593 want_unchanged=want_unchanged)
595 def test_no_renames(self):
596 blob1 = make_object(Blob, data=b'a\nb\nc\nd\n')
597 blob2 = make_object(Blob, data=b'a\nb\ne\nf\n')
598 blob3 = make_object(Blob, data=b'a\nb\ng\nh\n')
599 tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
600 tree2 = self.commit_tree([(b'a', blob1), (b'b', blob3)])
602 [TreeChange(CHANGE_MODIFY, (b'b', F, blob2.id),
603 (b'b', F, blob3.id))],
604 self.detect_renames(tree1, tree2))
606 def test_exact_rename_one_to_one(self):
607 blob1 = make_object(Blob, data=b'1')
608 blob2 = make_object(Blob, data=b'2')
609 tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
610 tree2 = self.commit_tree([(b'c', blob1), (b'd', blob2)])
612 [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
613 (b'c', F, blob1.id)),
614 TreeChange(CHANGE_RENAME, (b'b', F, blob2.id),
615 (b'd', F, blob2.id))],
616 self.detect_renames(tree1, tree2))
618 def test_exact_rename_split_different_type(self):
619 blob = make_object(Blob, data=b'/foo')
620 tree1 = self.commit_tree([(b'a', blob, 0o100644)])
621 tree2 = self.commit_tree([(b'a', blob, 0o120000)])
623 [TreeChange.add((b'a', 0o120000, blob.id)),
624 TreeChange.delete((b'a', 0o100644, blob.id))],
625 self.detect_renames(tree1, tree2))
627 def test_exact_rename_and_different_type(self):
628 blob1 = make_object(Blob, data=b'1')
629 blob2 = make_object(Blob, data=b'2')
630 tree1 = self.commit_tree([(b'a', blob1)])
631 tree2 = self.commit_tree([(b'a', blob2, 0o120000), (b'b', blob1)])
633 [TreeChange.add((b'a', 0o120000, blob2.id)),
634 TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
635 (b'b', F, blob1.id))],
636 self.detect_renames(tree1, tree2))
638 def test_exact_rename_one_to_many(self):
639 blob = make_object(Blob, data=b'1')
640 tree1 = self.commit_tree([(b'a', blob)])
641 tree2 = self.commit_tree([(b'b', blob), (b'c', blob)])
643 [TreeChange(CHANGE_RENAME, (b'a', F, blob.id), (b'b', F, blob.id)),
644 TreeChange(CHANGE_COPY, (b'a', F, blob.id), (b'c', F, blob.id))],
645 self.detect_renames(tree1, tree2))
647 def test_exact_rename_many_to_one(self):
648 blob = make_object(Blob, data=b'1')
649 tree1 = self.commit_tree([(b'a', blob), (b'b', blob)])
650 tree2 = self.commit_tree([(b'c', blob)])
652 [TreeChange(CHANGE_RENAME, (b'a', F, blob.id), (b'c', F, blob.id)),
653 TreeChange.delete((b'b', F, blob.id))],
654 self.detect_renames(tree1, tree2))
656 def test_exact_rename_many_to_many(self):
657 blob = make_object(Blob, data=b'1')
658 tree1 = self.commit_tree([(b'a', blob), (b'b', blob)])
659 tree2 = self.commit_tree([(b'c', blob), (b'd', blob), (b'e', blob)])
661 [TreeChange(CHANGE_RENAME, (b'a', F, blob.id),
663 TreeChange(CHANGE_COPY, (b'a', F, blob.id),
665 TreeChange(CHANGE_RENAME, (b'b', F, blob.id),
666 (b'd', F, blob.id))],
667 self.detect_renames(tree1, tree2))
669 def test_exact_copy_modify(self):
670 blob1 = make_object(Blob, data=b'a\nb\nc\nd\n')
671 blob2 = make_object(Blob, data=b'a\nb\nc\ne\n')
672 tree1 = self.commit_tree([(b'a', blob1)])
673 tree2 = self.commit_tree([(b'a', blob2), (b'b', blob1)])
675 [TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id),
676 (b'a', F, blob2.id)),
677 TreeChange(CHANGE_COPY, (b'a', F, blob1.id),
678 (b'b', F, blob1.id))],
679 self.detect_renames(tree1, tree2))
681 def test_exact_copy_change_mode(self):
682 blob = make_object(Blob, data=b'a\nb\nc\nd\n')
683 tree1 = self.commit_tree([(b'a', blob)])
684 tree2 = self.commit_tree([(b'a', blob, 0o100755), (b'b', blob)])
686 [TreeChange(CHANGE_MODIFY, (b'a', F, blob.id),
687 (b'a', 0o100755, blob.id)),
688 TreeChange(CHANGE_COPY, (b'a', F, blob.id), (b'b', F, blob.id))],
689 self.detect_renames(tree1, tree2))
691 def test_rename_threshold(self):
692 blob1 = make_object(Blob, data=b'a\nb\nc\n')
693 blob2 = make_object(Blob, data=b'a\nb\nd\n')
694 tree1 = self.commit_tree([(b'a', blob1)])
695 tree2 = self.commit_tree([(b'b', blob2)])
697 [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
698 (b'b', F, blob2.id))],
699 self.detect_renames(tree1, tree2, rename_threshold=50))
701 [TreeChange.delete((b'a', F, blob1.id)),
702 TreeChange.add((b'b', F, blob2.id))],
703 self.detect_renames(tree1, tree2, rename_threshold=75))
705 def test_content_rename_max_files(self):
706 blob1 = make_object(Blob, data=b'a\nb\nc\nd')
707 blob4 = make_object(Blob, data=b'a\nb\nc\ne\n')
708 blob2 = make_object(Blob, data=b'e\nf\ng\nh\n')
709 blob3 = make_object(Blob, data=b'e\nf\ng\ni\n')
710 tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
711 tree2 = self.commit_tree([(b'c', blob3), (b'd', blob4)])
713 [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
714 (b'd', F, blob4.id)),
715 TreeChange(CHANGE_RENAME, (b'b', F, blob2.id),
716 (b'c', F, blob3.id))],
717 self.detect_renames(tree1, tree2))
719 [TreeChange.delete((b'a', F, blob1.id)),
720 TreeChange.delete((b'b', F, blob2.id)),
721 TreeChange.add((b'c', F, blob3.id)),
722 TreeChange.add((b'd', F, blob4.id))],
723 self.detect_renames(tree1, tree2, max_files=1))
725 def test_content_rename_one_to_one(self):
726 b11 = make_object(Blob, data=b'a\nb\nc\nd\n')
727 b12 = make_object(Blob, data=b'a\nb\nc\ne\n')
728 b21 = make_object(Blob, data=b'e\nf\ng\n\nh')
729 b22 = make_object(Blob, data=b'e\nf\ng\n\ni')
730 tree1 = self.commit_tree([(b'a', b11), (b'b', b21)])
731 tree2 = self.commit_tree([(b'c', b12), (b'd', b22)])
733 [TreeChange(CHANGE_RENAME, (b'a', F, b11.id), (b'c', F, b12.id)),
734 TreeChange(CHANGE_RENAME, (b'b', F, b21.id), (b'd', F, b22.id))],
735 self.detect_renames(tree1, tree2))
737 def test_content_rename_one_to_one_ordering(self):
738 blob1 = make_object(Blob, data=b'a\nb\nc\nd\ne\nf\n')
739 blob2 = make_object(Blob, data=b'a\nb\nc\nd\ng\nh\n')
740 # 6/10 match to blob1, 8/10 match to blob2
741 blob3 = make_object(Blob, data=b'a\nb\nc\nd\ng\ni\n')
742 tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
743 tree2 = self.commit_tree([(b'c', blob3)])
745 [TreeChange.delete((b'a', F, blob1.id)),
746 TreeChange(CHANGE_RENAME, (b'b', F, blob2.id),
747 (b'c', F, blob3.id))],
748 self.detect_renames(tree1, tree2))
750 tree3 = self.commit_tree([(b'a', blob2), (b'b', blob1)])
751 tree4 = self.commit_tree([(b'c', blob3)])
753 [TreeChange(CHANGE_RENAME, (b'a', F, blob2.id),
754 (b'c', F, blob3.id)),
755 TreeChange.delete((b'b', F, blob1.id))],
756 self.detect_renames(tree3, tree4))
758 def test_content_rename_one_to_many(self):
759 blob1 = make_object(Blob, data=b'aa\nb\nc\nd\ne\n')
760 blob2 = make_object(Blob, data=b'ab\nb\nc\nd\ne\n') # 8/11 match
761 blob3 = make_object(Blob, data=b'aa\nb\nc\nd\nf\n') # 9/11 match
762 tree1 = self.commit_tree([(b'a', blob1)])
763 tree2 = self.commit_tree([(b'b', blob2), (b'c', blob3)])
765 [TreeChange(CHANGE_COPY, (b'a', F, blob1.id), (b'b', F, blob2.id)),
766 TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
767 (b'c', F, blob3.id))],
768 self.detect_renames(tree1, tree2))
770 def test_content_rename_many_to_one(self):
771 blob1 = make_object(Blob, data=b'a\nb\nc\nd\n')
772 blob2 = make_object(Blob, data=b'a\nb\nc\ne\n')
773 blob3 = make_object(Blob, data=b'a\nb\nc\nf\n')
774 tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
775 tree2 = self.commit_tree([(b'c', blob3)])
777 [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
778 (b'c', F, blob3.id)),
779 TreeChange.delete((b'b', F, blob2.id))],
780 self.detect_renames(tree1, tree2))
782 def test_content_rename_many_to_many(self):
783 blob1 = make_object(Blob, data=b'a\nb\nc\nd\n')
784 blob2 = make_object(Blob, data=b'a\nb\nc\ne\n')
785 blob3 = make_object(Blob, data=b'a\nb\nc\nf\n')
786 blob4 = make_object(Blob, data=b'a\nb\nc\ng\n')
787 tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
788 tree2 = self.commit_tree([(b'c', blob3), (b'd', blob4)])
789 # TODO(dborowitz): Distribute renames rather than greedily choosing
792 [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
793 (b'c', F, blob3.id)),
794 TreeChange(CHANGE_COPY, (b'a', F, blob1.id), (b'd', F, blob4.id)),
795 TreeChange.delete((b'b', F, blob2.id))],
796 self.detect_renames(tree1, tree2))
798 def test_content_rename_with_more_deletions(self):
799 blob1 = make_object(Blob, data=b'')
800 tree1 = self.commit_tree([(b'a', blob1), (b'b', blob1), (b'c', blob1),
802 tree2 = self.commit_tree([(b'e', blob1), (b'f', blob1), (b'g', blob1)])
805 [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'e', F, blob1.id)),
806 TreeChange(CHANGE_RENAME, (b'b', F, blob1.id), (b'f', F, blob1.id)),
807 TreeChange(CHANGE_RENAME, (b'c', F, blob1.id), (b'g', F, blob1.id)),
808 TreeChange.delete((b'd', F, blob1.id))],
809 self.detect_renames(tree1, tree2))
811 def test_content_rename_gitlink(self):
812 blob1 = make_object(Blob, data=b'blob1')
813 blob2 = make_object(Blob, data=b'blob2')
816 tree1 = self.commit_tree([(b'a', blob1), (b'b', link1, 0o160000)])
817 tree2 = self.commit_tree([(b'c', blob2), (b'd', link2, 0o160000)])
819 [TreeChange.delete((b'a', 0o100644, blob1.id)),
820 TreeChange.delete((b'b', 0o160000, link1)),
821 TreeChange.add((b'c', 0o100644, blob2.id)),
822 TreeChange.add((b'd', 0o160000, link2))],
823 self.detect_renames(tree1, tree2))
825 def test_exact_rename_swap(self):
826 blob1 = make_object(Blob, data=b'1')
827 blob2 = make_object(Blob, data=b'2')
828 tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
829 tree2 = self.commit_tree([(b'a', blob2), (b'b', blob1)])
831 [TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id),
832 (b'a', F, blob2.id)),
833 TreeChange(CHANGE_MODIFY, (b'b', F, blob2.id),
834 (b'b', F, blob1.id))],
835 self.detect_renames(tree1, tree2))
837 [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
838 (b'b', F, blob1.id)),
839 TreeChange(CHANGE_RENAME, (b'b', F, blob2.id),
840 (b'a', F, blob2.id))],
841 self.detect_renames(tree1, tree2, rewrite_threshold=50))
843 def test_content_rename_swap(self):
844 blob1 = make_object(Blob, data=b'a\nb\nc\nd\n')
845 blob2 = make_object(Blob, data=b'e\nf\ng\nh\n')
846 blob3 = make_object(Blob, data=b'a\nb\nc\ne\n')
847 blob4 = make_object(Blob, data=b'e\nf\ng\ni\n')
848 tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
849 tree2 = self.commit_tree([(b'a', blob4), (b'b', blob3)])
851 [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
852 (b'b', F, blob3.id)),
853 TreeChange(CHANGE_RENAME, (b'b', F, blob2.id),
854 (b'a', F, blob4.id))],
855 self.detect_renames(tree1, tree2, rewrite_threshold=60))
857 def test_rewrite_threshold(self):
858 blob1 = make_object(Blob, data=b'a\nb\nc\nd\n')
859 blob2 = make_object(Blob, data=b'a\nb\nc\ne\n')
860 blob3 = make_object(Blob, data=b'a\nb\nf\ng\n')
862 tree1 = self.commit_tree([(b'a', blob1)])
863 tree2 = self.commit_tree([(b'a', blob3), (b'b', blob2)])
866 TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id),
867 (b'a', F, blob3.id)),
868 TreeChange(CHANGE_COPY, (b'a', F, blob1.id), (b'b', F, blob2.id))]
870 no_renames, self.detect_renames(tree1, tree2))
872 no_renames, self.detect_renames(
873 tree1, tree2, rewrite_threshold=40))
875 [TreeChange.add((b'a', F, blob3.id)),
876 TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
877 (b'b', F, blob2.id))],
878 self.detect_renames(tree1, tree2, rewrite_threshold=80))
880 def test_find_copies_harder_exact(self):
881 blob = make_object(Blob, data=b'blob')
882 tree1 = self.commit_tree([(b'a', blob)])
883 tree2 = self.commit_tree([(b'a', blob), (b'b', blob)])
884 self.assertEqual([TreeChange.add((b'b', F, blob.id))],
885 self.detect_renames(tree1, tree2))
887 [TreeChange(CHANGE_COPY, (b'a', F, blob.id), (b'b', F, blob.id))],
888 self.detect_renames(tree1, tree2, find_copies_harder=True))
890 def test_find_copies_harder_content(self):
891 blob1 = make_object(Blob, data=b'a\nb\nc\nd\n')
892 blob2 = make_object(Blob, data=b'a\nb\nc\ne\n')
893 tree1 = self.commit_tree([(b'a', blob1)])
894 tree2 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
895 self.assertEqual([TreeChange.add((b'b', F, blob2.id))],
896 self.detect_renames(tree1, tree2))
898 [TreeChange(CHANGE_COPY, (b'a', F, blob1.id),
899 (b'b', F, blob2.id))],
900 self.detect_renames(tree1, tree2, find_copies_harder=True))
902 def test_find_copies_harder_with_rewrites(self):
903 blob_a1 = make_object(Blob, data=b'a\nb\nc\nd\n')
904 blob_a2 = make_object(Blob, data=b'f\ng\nh\ni\n')
905 blob_b2 = make_object(Blob, data=b'a\nb\nc\ne\n')
906 tree1 = self.commit_tree([(b'a', blob_a1)])
907 tree2 = self.commit_tree([(b'a', blob_a2), (b'b', blob_b2)])
909 [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id),
910 (b'a', F, blob_a2.id)),
911 TreeChange(CHANGE_COPY, (b'a', F, blob_a1.id),
912 (b'b', F, blob_b2.id))],
913 self.detect_renames(tree1, tree2, find_copies_harder=True))
915 [TreeChange.add((b'a', F, blob_a2.id)),
916 TreeChange(CHANGE_RENAME, (b'a', F, blob_a1.id),
917 (b'b', F, blob_b2.id))],
918 self.detect_renames(tree1, tree2, rewrite_threshold=50,
919 find_copies_harder=True))
921 def test_reuse_detector(self):
922 blob = make_object(Blob, data=b'blob')
923 tree1 = self.commit_tree([(b'a', blob)])
924 tree2 = self.commit_tree([(b'b', blob)])
925 detector = RenameDetector(self.store)
926 changes = [TreeChange(CHANGE_RENAME, (b'a', F, blob.id),
928 self.assertEqual(changes,
929 detector.changes_with_renames(tree1.id, tree2.id))
930 self.assertEqual(changes,
931 detector.changes_with_renames(tree1.id, tree2.id))
933 def test_want_unchanged(self):
934 blob_a1 = make_object(Blob, data=b'a\nb\nc\nd\n')
935 blob_b = make_object(Blob, data=b'b')
936 blob_c2 = make_object(Blob, data=b'a\nb\nc\ne\n')
937 tree1 = self.commit_tree([(b'a', blob_a1), (b'b', blob_b)])
938 tree2 = self.commit_tree([(b'c', blob_c2), (b'b', blob_b)])
940 [TreeChange(CHANGE_RENAME, (b'a', F, blob_a1.id),
941 (b'c', F, blob_c2.id))],
942 self.detect_renames(tree1, tree2))
944 [TreeChange(CHANGE_RENAME, (b'a', F, blob_a1.id),
945 (b'c', F, blob_c2.id)),
946 TreeChange(CHANGE_UNCHANGED, (b'b', F, blob_b.id),
947 (b'b', F, blob_b.id))],
948 self.detect_renames(tree1, tree2, want_unchanged=True))