netcmd: Determine which files are to be copied for an offline domain backup
authorJoseph Sutton <josephsutton@catalyst.net.nz>
Tue, 16 Mar 2021 03:22:40 +0000 (16:22 +1300)
committerKarolin Seeger <kseeger@samba.org>
Tue, 13 Jul 2021 12:31:15 +0000 (12:31 +0000)
The old behaviour attempted to check for and remove files with duplicate
names, but did not do so due to a bug, and would have left undetermined
which files were given priority when duplicate filenames were present.
Now when hardlinks are present, only one instance of each file is
chosen, with files in the private directory having priority. If one
backup dir is nested inside another, the files contained in the nested
directory are only added once. Additionally, the BIND DNS database is
omitted from the backup.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14027

Signed-off-by: Joseph Sutton <josephsutton@catalyst.net.nz>
(cherry picked from commit 3723148e7aa7e6d4a48a1a38112f121f52b6ee6f)

python/samba/netcmd/domain_backup.py
selftest/knownfail.d/bug-14027 [deleted file]

index fff39fcbc2a935427f877bcb858424530a478899..c5749a5c8e28abdfeb97b86e478d182defd3862a 100644 (file)
@@ -1097,6 +1097,10 @@ class cmd_domain_backup_offline(samba.netcmd.Command):
         samdb = SamDB(url=paths.samdb, session_info=system_session(), lp=lp)
         sid = get_sid_for_restore(samdb, logger)
 
+        # Iterating over the directories in this specific order ensures that
+        # when the private directory contains hardlinks that are also contained
+        # in other directories to be backed up (such as in paths.binddns_dir),
+        # the hardlinks in the private directory take precedence.
         backup_dirs = [paths.private_dir, paths.state_dir,
                        os.path.dirname(paths.smbconf)]  # etc dir
         logger.info('running backup on dirs: {0}'.format(' '.join(backup_dirs)))
@@ -1109,22 +1113,31 @@ class cmd_domain_backup_offline(samba.netcmd.Command):
                     continue
                 if working_dir.endswith('.sock') or '.sock/' in working_dir:
                     continue
+                # The BIND DNS database can be regenerated, so it doesn't need
+                # to be backed up.
+                if working_dir.startswith(os.path.join(paths.binddns_dir, 'dns')):
+                    continue
 
                 for filename in filenames:
-                    if filename in all_files:
+                    full_path = os.path.join(working_dir, filename)
+
+                    # Ignore files that have already been added. This prevents
+                    # duplicates if one backup dir is a subdirectory of another,
+                    # or if backup dirs contain hardlinks.
+                    if any(os.path.samefile(full_path, file) for file in all_files):
                         continue
 
                     # Assume existing backup files are from a previous backup.
                     # Delete and ignore.
                     if filename.endswith(self.backup_ext):
-                        os.remove(os.path.join(working_dir, filename))
+                        os.remove(full_path)
                         continue
 
                     # Sock files are autogenerated at runtime, ignore.
                     if filename.endswith('.sock'):
                         continue
 
-                    all_files.append(os.path.join(working_dir, filename))
+                    all_files.append(full_path)
 
         # Backup secrets, sam.ldb and their downstream files
         self.backup_secrets(paths.private_dir, lp, logger)
diff --git a/selftest/knownfail.d/bug-14027 b/selftest/knownfail.d/bug-14027
deleted file mode 100644 (file)
index f074647..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-^samba.tests.domain_backup_offline.samba.tests.domain_backup_offline.DomainBackupOfflineCmp.test_domain_backup_offline_hard_link
-^samba.tests.domain_backup_offline.samba.tests.domain_backup_offline.DomainBackupOfflineCmp.test_domain_backup_offline_nested