3 '''Autocluster: Generate test clusters for clustered Samba
5 Reads configuration file in YAML format
7 Uses Vagrant to create cluster, Ansible to configure
11 # Copyright (C) Martin Schwenke 2019, 2020
13 # Based on ideas from a previous design/implementation:
15 # Copyright (C) 2008 Andrew Tridgell and Martin Schwenke
18 # This program is free software; you can redistribute it and/or modify
19 # it under the terms of the GNU General Public License as published by
20 # the Free Software Foundation; either version 3 of the License, or
21 # (at your option) any later version.
23 # This program is distributed in the hope that it will be useful,
24 # but WITHOUT ANY WARRANTY; without even the implied warranty of
25 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 # GNU General Public License for more details.
28 # You should have received a copy of the GNU General Public License
29 # along with this program; if not, see <http://www.gnu.org/licenses/>.
31 from __future__ import print_function
46 except ImportError as err:
47 LIBVIRT_IMPORT_ERROR = err
52 NODE_TYPES = ['nas', 'base', 'build', 'cbuild', 'tbuild', 'ad', 'test']
53 GENERATED_KEYS = ['cluster', 'nodes', 'shares']
57 '''Print usage message'''
60 '''Usage: %s <group> <args>
63 cluster <cluster> <command> ...
66 defaults Dump default configuration to stdout
67 dump Dump cluster configuration to stdout
68 status Show cluster status
69 generate Generate cluster metadata for Vagrant, Ansible and SSH
70 destroy Destroy cluster
72 ssh_config Install cluster SSH configuration in current account
73 setup Perform configuration/setup of cluster nodes
74 build Short for: destroy generate create ssh_config setup
80 def sanity_check_cluster_name(cluster):
81 '''Ensure that the cluster name is sane'''
83 if not re.match('^[A-Za-z][A-Za-z0-9]+$', cluster):
84 sys.exit('''ERROR: Invalid cluster name "%s"
85 Some cluster filesystems only allow cluster names matching
86 ^[A-Za-z][A-Za-z0-9]+$''' % cluster)
89 def calculate_nodes(cluster, defaults, config):
90 '''Calculate hostname, IP and other attributes for each node'''
92 combined = dict(defaults)
93 combined.update(config)
95 if 'node_list' not in config:
96 sys.exit('Error: node_list not defined')
98 have_dedicated_storage_nodes = False
99 for node_type in combined['node_list']:
101 if node_type not in NODE_TYPES:
102 sys.exit('ERROR: Invalid node type %s in node_list' % node_type)
104 if type == 'storage':
105 have_dedicated_storage_nodes = True
109 for idx, node_type in enumerate(combined['node_list']):
112 node['type'] = node_type
114 # Construct hostname, whether node is CTDB node
115 if node_type == 'nas':
117 node['is_ctdb_node'] = True
120 node['is_ctdb_node'] = False
122 type_counts[node_type] = type_counts.get(node_type, 0) + 1
123 hostname = '%s%s%d' % (cluster, tag, type_counts[node_type])
125 # Does the node have shared storage?
126 if node_type == 'storage':
127 node['has_shared_storage'] = True
128 elif node_type == 'nas' and not have_dedicated_storage_nodes:
129 node['has_shared_storage'] = True
131 node['has_shared_storage'] = False
133 # List of IP addresses, one for each network
135 for net in combined['networks']:
136 offset = config['firstip'] + idx
137 if sys.version_info[0] < 3:
138 # Backported Python 2 ipaddress demands unicode instead of str
139 net = net.decode('utf-8')
140 ip_address = ipaddress.ip_network(net, strict=False)
141 node['ips'].append(str(ip_address[offset]))
143 nodes[hostname] = node
145 config['nodes'] = nodes
148 def calculate_dependencies_ad(config):
149 '''Calculate nameserver and auth method based on the first AD node'''
151 for _, node in config['nodes'].items():
152 if node['type'] == 'ad':
153 nameserver = node['ips'][0]
154 if 'resolv_conf' not in config:
155 config['resolv_conf'] = {}
156 if 'nameserver' not in config['resolv_conf']:
157 config['resolv_conf']['nameserver'] = nameserver
159 if 'auth_method' not in config:
160 config['auth_method'] = 'winbind'
165 def calculate_dependencies_virthost(defaults, config):
166 '''Handle special values that depend on virthost'''
168 if 'virthost' in config:
169 virthost = config['virthost']
171 virthost = defaults['virthost']
173 if 'resolv_conf' not in config:
174 config['resolv_conf'] = {}
175 if 'nameserver' not in config['resolv_conf']:
176 config['resolv_conf']['nameserver'] = virthost
178 if 'repository_baseurl' not in config:
179 config['repository_baseurl'] = 'http://%s/mediasets' % virthost
181 if 'ad' not in config:
183 if 'dns_forwarder' not in config['ad']:
184 config['ad']['dns_forwarder'] = virthost
187 def calculate_dependencies(cluster, defaults, config):
188 '''Handle special values that depend on updated config values'''
190 config['cluster'] = cluster
192 calculate_dependencies_ad(config)
193 calculate_dependencies_virthost(defaults, config)
196 if 'resolv_conf' in config and \
197 'domain' in config['resolv_conf'] and \
198 'search' not in config['resolv_conf']:
200 config['resolv_conf']['search'] = config['resolv_conf']['domain']
202 # Presence of distro repositories means delete existing ones
203 if 'repositories' in config and \
204 'repositories_delete_existing' not in config:
205 for repo in config['repositories']:
206 if repo['type'] == 'distro':
207 config['repositories_delete_existing'] = True
211 def calculate_kdc(config):
212 '''Calculate KDC setting if unset and there is an AD node'''
214 if 'kdc' not in config:
215 for hostname, node in config['nodes'].items():
216 if node['type'] == 'ad':
217 config['kdc'] = hostname
221 def calculate_timezone(config):
222 '''Calculate timezone setting if unset'''
224 if 'timezone' not in config:
225 timezone_file = os.environ.get('AUTOCLUSTER_TEST_TIMEZONE_FILE',
228 with open(timezone_file) as stream:
229 content = stream.readlines()
230 timezone = content[0]
231 config['timezone'] = timezone.strip()
232 except IOError as err:
233 if err.errno != errno.ENOENT:
236 if 'timezone' not in config:
237 clock_file = os.environ.get('AUTOCLUSTER_TEST_CLOCK_FILE',
238 '/etc/sysconfig/clock')
240 with open(clock_file) as stream:
241 zone_re = re.compile('^ZONE="([^"]+)".*')
242 lines = stream.readlines()
243 matches = [l for l in lines if zone_re.match(l)]
245 timezone = zone_re.match(matches[0]).group(1)
246 config['timezone'] = timezone.strip()
247 except IOError as err:
248 if err.errno != errno.ENOENT:
252 def calculate_shares(defaults, config):
253 '''Calculate share definitions based on cluster filesystem mountpoint'''
255 if 'clusterfs' in config and 'mountpoint' in config['clusterfs']:
256 mountpoint = config['clusterfs']['mountpoint']
258 mountpoint = defaults['clusterfs']['mountpoint']
259 directory = os.path.join(mountpoint, 'data')
260 share = {'name': 'data', 'directory': directory, 'mode': '0o777'}
262 config['shares'] = [share]
266 '''Load default configuration'''
268 # Any failures here are internal errors, so allow default
271 defaults_file = os.path.join(INSTALL_DIR, 'defaults.yml')
273 with open(defaults_file, 'r') as stream:
274 defaults = yaml.safe_load(stream)
279 def nested_update(dst, src, context=None):
280 '''Update dictionary dst from dictionary src. Sanity check that all
281 keys in src are defined in dst, except those in GENERATED_KEYS. This
282 means that defaults.yml acts as a template for configuration options.'''
284 for key, val in src.items():
288 ctx = '%s.%s' % (context, key)
290 if key not in dst and key not in GENERATED_KEYS:
291 sys.exit('ERROR: Invalid configuration key "%s"' % ctx)
293 if isinstance(val, dict) and key in dst:
294 nested_update(dst[key], val, ctx)
299 def load_config_with_includes(config_file):
300 '''Load a config file, recursively respecting "include" options'''
302 if not os.path.exists(config_file):
303 sys.exit('ERROR: Configuration file %s not found' % config_file)
305 with open(config_file, 'r') as stream:
307 config = yaml.safe_load(stream)
308 except yaml.YAMLError as exc:
309 sys.exit('Error parsing config file %s, %s' % (config_file, exc))
314 # Handle include item, either a single string or a list
315 if 'include' not in config:
317 includes = config['include']
318 config.pop('include', None)
319 if isinstance(includes, str):
320 includes = [includes]
321 if not isinstance(includes, list):
322 print('warning: Ignoring non-string/list include', file=sys.stderr)
324 for include in includes:
325 if not isinstance(include, str):
326 print('warning: Ignoring non-string include', file=sys.stderr)
329 included_config = load_config_with_includes(include)
330 config.update(included_config)
335 def load_config(cluster):
336 '''Load default and user configuration; combine them'''
338 defaults = load_defaults()
340 config_file = '%s.yml' % cluster
342 config = load_config_with_includes(config_file)
344 calculate_nodes(cluster, defaults, config)
345 calculate_dependencies(cluster, defaults, config)
346 calculate_timezone(config)
347 calculate_kdc(config)
348 calculate_shares(defaults, config)
351 nested_update(out, config)
356 def generate_config_yml(cluster, config):
357 '''Output combined YAML configuration to "config.yml"'''
359 outfile = get_config_file_path(cluster)
361 with open(outfile, 'w') as stream:
362 out = yaml.dump(config, default_flow_style=False)
364 print('---', file=stream)
365 print(out, file=stream)
368 def generate_hosts(cluster, config, outdir):
369 '''Output hosts file snippet to "hosts"'''
371 outfile = os.path.join(outdir, 'hosts')
373 with open(outfile, 'w') as stream:
374 print("# autocluster %s" % cluster, file=stream)
376 domain = config['resolv_conf']['domain']
378 for hostname, node in config['nodes'].items():
379 ip_address = node['ips'][0]
380 line = "%s\t%s.%s %s" % (ip_address, hostname, domain, hostname)
382 print(line, file=stream)
385 def generate_ssh_config(config, outdir):
386 '''Output ssh_config file snippet to "ssh_config"'''
388 outfile = os.path.join(outdir, 'ssh_config')
390 with open(outfile, 'w') as stream:
391 for hostname, node in config['nodes'].items():
392 ip_address = node['ips'][0]
393 ssh_key = os.path.join(os.environ['HOME'], '.ssh/id_autocluster')
398 UserKnownHostsFile /dev/null
399 StrictHostKeyChecking no
400 PasswordAuthentication no
404 ''' % (hostname, ip_address, ssh_key)
406 print(section, file=stream)
409 def generate_ansible_inventory(config, outdir):
410 '''Output Ansible inventory file to "ansible.inventory"'''
414 for hostname, node in config['nodes'].items():
416 node_type = node['type']
417 hostnames = type_map.get(node['type'], [])
418 hostnames.append(hostname)
419 type_map[node['type']] = hostnames
421 outfile = os.path.join(outdir, 'ansible.inventory')
423 with open(outfile, 'w') as stream:
424 for node_type, hostnames in type_map.items():
425 print('[%s_nodes]' % node_type, file=stream)
427 for hostname in hostnames:
428 print(hostname, file=stream)
432 def cluster_defaults():
433 '''Dump default YAML configuration to stdout'''
435 defaults = load_defaults()
436 out = yaml.dump(defaults, default_flow_style=False)
441 def cluster_dump(cluster):
442 '''Dump cluster YAML configuration to stdout'''
444 config = load_config(cluster)
446 # Remove some generated, internal values that aren't in an input
448 for key in ['nodes', 'shares']:
449 config.pop(key, None)
451 out = yaml.dump(config, default_flow_style=False)
456 def get_state_dir(cluster):
457 '''Return the state directory for the current cluster'''
459 return os.path.join(os.getcwd(), '.autocluster', cluster)
462 def get_config_file_path(cluster):
463 '''Return the name of the generated config file for cluster'''
465 return os.path.join(get_state_dir(cluster), 'config.yml')
468 def announce(group, cluster, command):
469 '''Print a banner announcing the current step'''
471 hashes = '############################################################'
472 heading = '%s %s %s' % (group, cluster, command)
473 banner = "%s\n# %-56s #\n%s" % (hashes, heading, hashes)
478 def cluster_generate(cluster):
479 '''Generate metadata files from configuration'''
481 announce('cluster', cluster, 'generate')
483 config = load_config(cluster)
485 outdir = get_state_dir(cluster)
488 except OSError as err:
489 if err.errno != errno.EEXIST:
492 generate_config_yml(cluster, config)
493 generate_hosts(cluster, config, outdir)
494 generate_ssh_config(config, outdir)
495 generate_ansible_inventory(config, outdir)
498 def vagrant_command(cluster, config, args):
499 '''Run vagrant with the given arguments'''
501 state_dir = get_state_dir(cluster)
503 os.environ['VAGRANT_DEFAULT_PROVIDER'] = config['vagrant_provider']
504 os.environ['VAGRANT_CWD'] = os.path.join(INSTALL_DIR, 'vagrant')
505 os.environ['VAGRANT_DOTFILE_PATH'] = os.path.join(state_dir, '.vagrant')
506 os.environ['AUTOCLUSTER_STATE'] = state_dir
508 full_args = args[:] # copy
509 full_args.insert(0, 'vagrant')
511 subprocess.check_call(full_args)
514 def cluster_status(cluster):
515 '''Check status of cluster using Vagrant'''
517 announce('cluster', cluster, 'status')
519 config = load_config(cluster)
521 vagrant_command(cluster, config, ['status'])
524 def get_shared_disk_names(cluster, config):
525 '''Return shared disks names for cluster, None if none'''
527 have_shared_disks = False
528 for _, node in config['nodes'].items():
529 if node['has_shared_storage']:
530 have_shared_disks = True
532 if not have_shared_disks:
535 count = config['shared_disks']['count']
539 return ['autocluster_%s_shared%02d.img' % (cluster, n + 1)
540 for n in range(count)]
543 def delete_shared_disk_images(cluster, config):
544 '''Delete any shared disks for the given cluster'''
546 if config['vagrant_provider'] != 'libvirt':
549 shared_disks = get_shared_disk_names(cluster, config)
550 if shared_disks is None:
554 print('warning: unable to check for stale shared disks (no libvirt)',
558 conn = libvirt.open()
559 storage_pool = conn.storagePoolLookupByName('autocluster')
560 for disk in shared_disks:
562 volume = storage_pool.storageVolLookupByName(disk)
564 except libvirt.libvirtError as err:
565 if err.get_error_code() != libvirt.VIR_ERR_NO_STORAGE_VOL:
570 def create_shared_disk_images(cluster, config):
571 '''Create shared disks for the given cluster'''
573 if config['vagrant_provider'] != 'libvirt':
576 shared_disks = get_shared_disk_names(cluster, config)
577 if shared_disks is None:
581 raise LIBVIRT_IMPORT_ERROR
583 conn = libvirt.open()
584 storage_pool = conn.storagePoolLookupByName('autocluster')
586 size = str(config['shared_disks']['size'])
587 if size[-1].isdigit():
594 for disk in shared_disks:
595 xml = '''<volume type='file'>
597 <capacity unit="%s">%s</capacity>
598 </volume>''' % (disk, unit, capacity)
599 storage_pool.createXML(xml)
604 def cluster_destroy_quiet(cluster, retries=1):
605 '''Destroy and undefine cluster using Vagrant - don't announce'''
607 config = load_config(cluster)
609 # First attempt often fails, so try a few times
610 for _ in range(retries):
612 vagrant_command(cluster,
614 ['destroy', '-f', '--no-parallel'])
615 except subprocess.CalledProcessError as err:
618 delete_shared_disk_images(cluster, config)
624 def cluster_destroy(cluster, retries=1):
625 '''Destroy and undefine cluster using Vagrant'''
627 announce('cluster', cluster, 'destroy')
629 config_file = get_config_file_path(cluster)
630 if not os.path.exists(config_file):
631 sys.exit('ERROR: Generated configuration for cluster does not exist')
633 cluster_destroy_quiet(cluster, retries)
636 def cluster_create(cluster, retries=1):
637 '''Create and boot cluster using Vagrant'''
639 announce('cluster', cluster, 'create')
641 config = load_config(cluster)
643 # Create our own shared disk images to protect against
644 # https://github.com/vagrant-libvirt/vagrant-libvirt/issues/825
645 create_shared_disk_images(cluster, config)
647 # First attempt sometimes fails, so try a few times
648 for _ in range(retries):
650 vagrant_command(cluster, config, ['up'])
651 except subprocess.CalledProcessError as err:
653 cluster_destroy(cluster)
660 def cluster_ssh_config(cluster):
661 '''Install SSH configuration for cluster'''
663 announce('cluster', cluster, 'ssh_config')
665 src = os.path.join(get_state_dir(cluster), 'ssh_config')
666 dst = os.path.join(os.environ['HOME'],
667 '.ssh/autocluster.d',
668 '%s.config' % cluster)
669 shutil.copyfile(src, dst)
672 def cluster_setup(cluster, retries=1):
673 '''Setup cluster using Ansible'''
675 announce('cluster', cluster, 'setup')
677 # Could put these in the state directory, but disable for now
678 os.environ['ANSIBLE_RETRY_FILES_ENABLED'] = 'false'
680 state_dir = get_state_dir(cluster)
681 config_file = get_config_file_path(cluster)
682 inventory = os.path.join(state_dir, 'ansible.inventory')
683 playbook = os.path.join(INSTALL_DIR, 'ansible/node/site.yml')
684 args = ['ansible-playbook',
685 '-e', '@%s' % config_file,
689 # First attempt sometimes fails, so try a few times
690 for _ in range(retries):
692 subprocess.check_call(args)
693 except subprocess.CalledProcessError as err:
694 print('warning: cluster setup exited with %d, retrying' %
702 sys.exit('ERROR: cluster setup exited with %d' % saved_err.returncode)
705 def cluster_build(cluster):
706 '''Build cluster using Ansible'''
708 config_file = get_config_file_path(cluster)
709 if os.path.exists(config_file):
710 cluster_destroy(cluster, 10)
711 cluster_generate(cluster)
712 cluster_create(cluster, 10)
713 cluster_ssh_config(cluster)
714 cluster_setup(cluster, 5)
717 def cluster_command(cluster, command):
718 '''Run appropriate cluster command function'''
720 if command == 'defaults':
722 elif command == 'dump':
723 cluster_dump(cluster)
724 elif command == 'status':
725 cluster_status(cluster)
726 elif command == 'generate':
727 cluster_generate(cluster)
728 elif command == 'destroy':
729 cluster_destroy(cluster, 10)
730 elif command == 'create':
731 cluster_create(cluster)
732 elif command == 'ssh_config':
733 cluster_ssh_config(cluster)
734 elif command == 'setup':
735 cluster_setup(cluster)
736 elif command == 'build':
737 cluster_build(cluster)
742 def get_host_setup_path(file):
743 '''Return the path for host setup file'''
745 return os.path.join(INSTALL_DIR, 'ansible/host', file)
748 def get_platform_file(platform):
749 '''Return the name of the host setup file for platform'''
751 return get_host_setup_path('autocluster_setup_%s.yml' % platform)
754 def sanity_check_platform_name(platform):
755 '''Ensure that host platform is supported'''
757 platform_file = get_platform_file(platform)
759 if not os.access(platform_file, os.R_OK):
760 sys.exit('Host platform "%s" not supported' % platform)
763 def host_setup(platform):
764 '''Set up host machine for use with Autocluster'''
766 announce('host', platform, 'setup')
768 platform_file = get_platform_file(platform)
769 ssh_file = get_host_setup_path('autocluster_setup_%s.yml' % 'ssh')
770 os.environ['ANSIBLE_RETRY_FILES_ENABLED'] = 'false'
771 args = ['ansible-playbook', platform_file, ssh_file]
774 subprocess.check_call(args)
775 except subprocess.CalledProcessError as err:
776 sys.exit('ERROR: host setup exited with %d' % err.returncode)
780 '''Main autocluster command-line handling'''
782 if len(sys.argv) < 2:
785 if sys.argv[1] == 'cluster':
786 if len(sys.argv) < 4:
789 cluster = sys.argv[2]
791 sanity_check_cluster_name(cluster)
793 for command in sys.argv[3:]:
794 cluster_command(cluster, command)
796 elif sys.argv[1] == 'host':
797 if len(sys.argv) < 4:
800 platform = sys.argv[2]
802 sanity_check_platform_name(platform)
804 for command in sys.argv[3:]:
805 if command == 'setup':
812 if __name__ == '__main__':