ansible/node: Add extra build dependencies for GPFS kernel module
[autocluster.git] / autocluster.py
index 985979ba1073688d20a8fe73ec4ab21db964c1ab..942700112a679ac92e0440f9e90c59da93c3d81c 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 '''Autocluster: Generate test clusters for clustered Samba
 
@@ -7,6 +7,14 @@
    Uses Vagrant to create cluster, Ansible to configure
 '''
 
+
+# Copyright (C) Martin Schwenke  2019, 2020
+#
+# Based on ideas from a previous design/implementation:
+#
+#   Copyright (C) 2008 Andrew Tridgell and Martin Schwenke
+
+
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3 of the License, or
@@ -28,6 +36,7 @@ import sys
 import re
 import subprocess
 import shutil
+import time
 
 import ipaddress
 
@@ -40,7 +49,7 @@ except ImportError as err:
 
 INSTALL_DIR = '.'
 
-NODE_TYPES = ['nas', 'base', 'build', 'cbuild', 'ad', 'test']
+NODE_TYPES = ['nas', 'base', 'build', 'cbuild', 'tbuild', 'ad', 'test']
 GENERATED_KEYS = ['cluster', 'nodes', 'shares']
 
 
@@ -191,7 +200,8 @@ def calculate_dependencies(cluster, defaults, config):
         config['resolv_conf']['search'] = config['resolv_conf']['domain']
 
     # Presence of distro repositories means delete existing ones
-    if 'repositories_delete_existing' not in config:
+    if 'repositories' in config and \
+       'repositories_delete_existing' not in config:
         for repo in config['repositories']:
             if repo['type'] == 'distro':
                 config['repositories_delete_existing'] = True
@@ -412,7 +422,7 @@ def generate_ansible_inventory(config, outdir):
 
     with open(outfile, 'w') as stream:
         for node_type, hostnames in type_map.items():
-            print('[%s-nodes]' % node_type, file=stream)
+            print('[%s_nodes]' % node_type, file=stream)
             hostnames.sort()
             for hostname in hostnames:
                 print(hostname, file=stream)
@@ -591,13 +601,13 @@ def create_shared_disk_images(cluster, config):
     conn.close()
 
 
-def cluster_destroy_quiet(cluster):
+def cluster_destroy_quiet(cluster, retries=1):
     '''Destroy and undefine cluster using Vagrant - don't announce'''
 
     config = load_config(cluster)
 
     # First attempt often fails, so try a few times
-    for _ in range(10):
+    for _ in range(retries):
         try:
             vagrant_command(cluster,
                             config,
@@ -611,15 +621,19 @@ def cluster_destroy_quiet(cluster):
     raise saved_err
 
 
-def cluster_destroy(cluster):
+def cluster_destroy(cluster, retries=1):
     '''Destroy and undefine cluster using Vagrant'''
 
     announce('cluster', cluster, 'destroy')
 
-    cluster_destroy_quiet(cluster)
+    config_file = get_config_file_path(cluster)
+    if not os.path.exists(config_file):
+        sys.exit('ERROR: Generated configuration for cluster does not exist')
+
+    cluster_destroy_quiet(cluster, retries)
 
 
-def cluster_create(cluster):
+def cluster_create(cluster, retries=1):
     '''Create and boot cluster using Vagrant'''
 
     announce('cluster', cluster, 'create')
@@ -631,7 +645,7 @@ def cluster_create(cluster):
     create_shared_disk_images(cluster, config)
 
     # First attempt sometimes fails, so try a few times
-    for _ in range(10):
+    for _ in range(retries):
         try:
             vagrant_command(cluster, config, ['up'])
         except subprocess.CalledProcessError as err:
@@ -655,7 +669,7 @@ def cluster_ssh_config(cluster):
     shutil.copyfile(src, dst)
 
 
-def cluster_setup(cluster):
+def cluster_setup(cluster, retries=1):
     '''Setup cluster using Ansible'''
 
     announce('cluster', cluster, 'setup')
@@ -671,20 +685,33 @@ def cluster_setup(cluster):
             '-e', '@%s' % config_file,
             '-i', inventory,
             playbook]
-    try:
-        subprocess.check_call(args)
-    except subprocess.CalledProcessError as err:
-        sys.exit('ERROR: cluster setup exited with %d' % err.returncode)
+
+    # First attempt sometimes fails, so try a few times
+    for _ in range(retries):
+        try:
+            subprocess.check_call(args)
+        except subprocess.CalledProcessError as err:
+            print('warning: cluster setup exited with %d, retrying' %
+                  err.returncode,
+                  file=sys.stderr)
+            saved_err = err
+            time.sleep(1)
+        else:
+            return
+
+    sys.exit('ERROR: cluster setup exited with %d' % saved_err.returncode)
 
 
 def cluster_build(cluster):
     '''Build cluster using Ansible'''
 
-    cluster_destroy(cluster)
+    config_file = get_config_file_path(cluster)
+    if os.path.exists(config_file):
+        cluster_destroy(cluster, 10)
     cluster_generate(cluster)
-    cluster_create(cluster)
+    cluster_create(cluster, 10)
     cluster_ssh_config(cluster)
-    cluster_setup(cluster)
+    cluster_setup(cluster, 5)
 
 
 def cluster_command(cluster, command):
@@ -699,7 +726,7 @@ def cluster_command(cluster, command):
     elif command == 'generate':
         cluster_generate(cluster)
     elif command == 'destroy':
-        cluster_destroy(cluster)
+        cluster_destroy(cluster, 10)
     elif command == 'create':
         cluster_create(cluster)
     elif command == 'ssh_config':