3 """Autocluster: Generate test clusters for clustered Samba
5 Reads configuration file in YAML format
7 Uses Vagrant to create cluster, Ansible to configure
11 # Copyright (C) Martin Schwenke 2019, 2020
13 # Based on ideas from a previous design/implementation:
15 # Copyright (C) 2008 Andrew Tridgell and Martin Schwenke
18 # This program is free software; you can redistribute it and/or modify
19 # it under the terms of the GNU General Public License as published by
20 # the Free Software Foundation; either version 3 of the License, or
21 # (at your option) any later version.
23 # This program is distributed in the hope that it will be useful,
24 # but WITHOUT ANY WARRANTY; without even the implied warranty of
25 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 # GNU General Public License for more details.
28 # You should have received a copy of the GNU General Public License
29 # along with this program; if not, see <http://www.gnu.org/licenses/>.
31 from __future__ import print_function
47 except ImportError as err:
48 LIBVIRT_IMPORT_ERROR = err
51 INSTALL_DIR = os.path.dirname(os.path.realpath(sys.argv[0]))
54 'nas', 'base', 'storage', 'build', 'cbuild', 'tbuild', 'ad', 'test'
56 GENERATED_KEYS = ['cluster', 'nodes', 'shares']
60 """Print usage message"""
63 f'''Usage: {sys.argv[0]} <group> <args>
66 cluster <cluster> <command> ...
69 defaults Dump default configuration to stdout
70 dump Dump cluster configuration to stdout
71 status Show cluster status
72 generate Generate cluster metadata for Vagrant, Ansible and SSH
73 destroy Destroy cluster
75 ssh_config Install cluster SSH configuration in current account
76 setup Perform configuration/setup of cluster nodes
77 build Short for: destroy generate create ssh_config setup
83 def sanity_check_cluster_name(cluster):
84 """Ensure that the cluster name is sane"""
86 if not re.match('^[A-Za-z][A-Za-z0-9]+$', cluster):
87 sys.exit(f'''ERROR: Invalid cluster name "{cluster}"
88 Some cluster filesystems only allow cluster names matching
89 ^[A-Za-z][A-Za-z0-9]+$''')
92 def calculate_nodes(cluster, defaults, config):
93 """Calculate hostname, IP and other attributes for each node"""
95 combined = dict(defaults)
96 combined.update(config)
98 if 'node_list' not in config:
99 sys.exit('Error: node_list not defined')
101 have_dedicated_storage_nodes = False
102 for node_type in combined['node_list']:
104 if node_type not in NODE_TYPES:
105 sys.exit(f'ERROR: Invalid node type {node_type} in node_list')
107 if type == 'storage':
108 have_dedicated_storage_nodes = True
112 for idx, node_type in enumerate(combined['node_list']):
115 node['type'] = node_type
117 # Construct hostname, whether node is CTDB node
118 if node_type == 'nas':
120 node['is_ctdb_node'] = True
123 node['is_ctdb_node'] = False
125 type_counts[node_type] = type_counts.get(node_type, 0) + 1
126 hostname = f'{cluster}{tag}{type_counts[node_type]}'
128 # Does the node have shared storage?
129 if node_type == 'storage':
130 node['has_shared_storage'] = True
131 elif node_type == 'nas' and not have_dedicated_storage_nodes:
132 node['has_shared_storage'] = True
134 node['has_shared_storage'] = False
136 # List of IP addresses, one for each network
138 for net in combined['networks']:
139 offset = config['firstip'] + idx
140 if sys.version_info[0] < 3:
141 # Backported Python 2 ipaddress demands unicode instead of str
142 net = net.decode('utf-8')
143 ip_address = ipaddress.ip_network(net, strict=False)
144 node['ips'].append(str(ip_address[offset]))
146 nodes[hostname] = node
148 config['nodes'] = nodes
151 def calculate_dependencies_ad(config):
152 """Calculate nameserver and auth method based on the first AD node"""
154 for _, node in config['nodes'].items():
155 if node['type'] == 'ad':
156 nameserver = node['ips'][0]
157 if 'resolv_conf' not in config:
158 config['resolv_conf'] = {}
159 if 'nameserver' not in config['resolv_conf']:
160 config['resolv_conf']['nameserver'] = nameserver
162 if 'auth_method' not in config:
163 config['auth_method'] = 'winbind'
168 def calculate_dependencies_virthost(defaults, config):
169 """Handle special values that depend on virthost"""
171 if 'virthost' in config:
172 virthost = config['virthost']
174 virthost = defaults['virthost']
176 if 'resolv_conf' not in config:
177 config['resolv_conf'] = {}
178 if 'nameserver' not in config['resolv_conf']:
179 config['resolv_conf']['nameserver'] = virthost
180 if 'forwarder' not in config['resolv_conf']:
181 config['resolv_conf']['forwarder'] = virthost
183 if 'repository_baseurl' not in config:
184 config['repository_baseurl'] = f'http://{virthost}/mediasets'
186 if 'ad' not in config:
190 def calculate_dependencies(cluster, defaults, config):
191 """Handle special values that depend on updated config values"""
193 config['cluster'] = cluster
195 calculate_dependencies_ad(config)
196 calculate_dependencies_virthost(defaults, config)
199 if 'resolv_conf' in config and \
200 'domain' in config['resolv_conf'] and \
201 'search' not in config['resolv_conf']:
203 config['resolv_conf']['search'] = config['resolv_conf']['domain']
205 # Presence of distro repositories means delete existing ones
206 if 'repositories' in config and \
207 'repositories_delete_existing' not in config:
208 for repo in config['repositories']:
209 if repo['type'] == 'distro':
210 config['repositories_delete_existing'] = True
214 def calculate_kdc(config):
215 """Calculate KDC setting if unset and there is an AD node"""
217 if 'kdc' not in config:
218 for hostname, node in config['nodes'].items():
219 if node['type'] == 'ad':
220 config['kdc'] = hostname
224 def calculate_timezone(config):
225 """Calculate timezone setting if unset"""
227 if 'timezone' not in config:
228 timezone_file = os.environ.get('AUTOCLUSTER_TEST_TIMEZONE_FILE',
231 with open(timezone_file, encoding='utf-8') as stream:
232 content = stream.readlines()
233 timezone = content[0]
234 config['timezone'] = timezone.strip()
235 except IOError as exc:
236 if exc.errno != errno.ENOENT:
239 if 'timezone' not in config:
240 clock_file = os.environ.get('AUTOCLUSTER_TEST_CLOCK_FILE',
241 '/etc/sysconfig/clock')
243 with open(clock_file, encoding='utf-8') as stream:
244 zone_re = re.compile('^ZONE="([^"]+)".*')
245 lines = stream.readlines()
246 matches = [*filter(zone_re.match, lines)]
248 timezone = zone_re.match(matches[0]).group(1)
249 config['timezone'] = timezone.strip()
250 except IOError as exc:
251 if exc.errno != errno.ENOENT:
255 def calculate_shares(defaults, config):
256 """Calculate share definitions based on cluster filesystem mountpoint"""
258 if 'clusterfs' in config and 'mountpoint' in config['clusterfs']:
259 mountpoint = config['clusterfs']['mountpoint']
261 mountpoint = defaults['clusterfs']['mountpoint']
262 directory = os.path.join(mountpoint, 'data')
263 share = {'name': 'data', 'directory': directory, 'mode': '0o777'}
265 config['shares'] = [share]
269 """Load default configuration"""
271 # Any failures here are internal errors, so allow default
274 defaults_file = os.path.join(INSTALL_DIR, 'defaults.yml')
276 with open(defaults_file, encoding='utf-8') as stream:
277 defaults = yaml.safe_load(stream)
282 def nested_update(dst, src, context=None):
283 """Update dictionary dst from dictionary src. Sanity check that all
284 keys in src are defined in dst, except those in GENERATED_KEYS. This
285 means that defaults.yml acts as a template for configuration options.
288 for key, val in src.items():
292 ctx = f'{context}.{key}'
294 if key not in dst and key not in GENERATED_KEYS:
295 sys.exit(f'ERROR: Invalid configuration key "{ctx}"')
297 if isinstance(val, dict) and key in dst:
298 nested_update(dst[key], val, ctx)
303 def load_config_with_includes(config_file):
304 """Load a config file, recursively respecting "include" options"""
306 if not os.path.exists(config_file):
307 sys.exit(f'ERROR: Configuration file {config_file} not found')
309 with open(config_file, encoding='utf-8') as stream:
311 config = yaml.safe_load(stream)
312 except yaml.YAMLError as exc:
313 sys.exit(f'Error parsing config file {config_file}, {exc}')
318 # Handle include item, either a single string or a list
319 if 'include' not in config:
321 includes = config['include']
322 config.pop('include', None)
323 if isinstance(includes, str):
324 includes = [includes]
325 if not isinstance(includes, list):
326 print('warning: Ignoring non-string/list include', file=sys.stderr)
328 for include in includes:
329 if not isinstance(include, str):
330 print('warning: Ignoring non-string include', file=sys.stderr)
333 included_config = load_config_with_includes(include)
334 config.update(included_config)
339 def load_config(cluster):
340 """Load default and user configuration; combine them"""
342 defaults = load_defaults()
344 config_file = f'{cluster}.yml'
346 config = load_config_with_includes(config_file)
348 calculate_nodes(cluster, defaults, config)
349 calculate_dependencies(cluster, defaults, config)
350 calculate_timezone(config)
351 calculate_kdc(config)
352 calculate_shares(defaults, config)
355 nested_update(out, config)
360 def generate_config_yml(cluster, config):
361 """Output combined YAML configuration to "config.yml"""
363 outfile = get_config_file_path(cluster)
365 with open(outfile, 'w', encoding='utf-8') as stream:
366 out = yaml.dump(config, default_flow_style=False)
368 print('---', file=stream)
369 print(out, file=stream)
372 def generate_hosts(cluster, config, outdir):
373 """Output hosts file snippet to 'hosts'"""
375 outfile = os.path.join(outdir, 'hosts')
377 with open(outfile, 'w', encoding='utf-8') as stream:
378 print(f'# autocluster {cluster}', file=stream)
380 domain = config['resolv_conf']['domain']
382 for hostname, node in config['nodes'].items():
383 ip_address = node['ips'][0]
384 line = f'{ip_address}\t{hostname}.{domain} {hostname}'
386 print(line, file=stream)
389 def generate_ssh_config(config, outdir):
390 """Output ssh_config file snippet to 'ssh_config'"""
392 outfile = os.path.join(outdir, 'ssh_config')
394 with open(outfile, 'w', encoding='utf-8') as stream:
395 for hostname, node in config['nodes'].items():
396 ip_address = node['ips'][0]
397 ssh_key = os.path.join(os.environ['HOME'], '.ssh/id_autocluster')
398 section = f'''Host {hostname}
399 HostName {ip_address}
402 UserKnownHostsFile /dev/null
403 StrictHostKeyChecking no
404 PasswordAuthentication no
405 IdentityFile {ssh_key}
410 print(section, file=stream)
413 def generate_ansible_inventory(config, outdir):
414 """Output Ansible inventory file to 'ansible.inventory'"""
418 for hostname, node in config['nodes'].items():
420 node_type = node['type']
421 hostnames = type_map.get(node['type'], [])
422 hostnames.append(hostname)
423 type_map[node['type']] = hostnames
425 outfile = os.path.join(outdir, 'ansible.inventory')
427 with open(outfile, 'w', encoding='utf-8') as stream:
428 for node_type, hostnames in type_map.items():
429 print(f'[{node_type}_nodes]', file=stream)
431 for hostname in hostnames:
432 print(hostname, file=stream)
436 def cluster_defaults():
437 """Dump default YAML configuration to stdout"""
439 defaults = load_defaults()
440 out = yaml.dump(defaults, default_flow_style=False)
445 def cluster_dump(cluster):
446 """Dump cluster YAML configuration to stdout"""
448 config = load_config(cluster)
450 # Remove some generated, internal values that aren't in an input
452 for key in ['nodes', 'shares']:
453 config.pop(key, None)
455 out = yaml.dump(config, default_flow_style=False)
460 def get_state_dir(cluster):
461 """Return the state directory for the current cluster"""
463 return os.path.join(os.getcwd(), '.autocluster', cluster)
466 def get_config_file_path(cluster):
467 """Return the name of the generated config file for cluster"""
469 return os.path.join(get_state_dir(cluster), 'config.yml')
472 def announce(group, cluster, command):
473 """Print a banner announcing the current step"""
475 hashes = '############################################################'
476 heading = f'{group} {cluster} {command}'
477 banner = f'{hashes}\n# {heading:56} #\n{hashes}'
482 def cluster_generate(cluster):
483 """Generate metadata files from configuration"""
485 announce('cluster', cluster, 'generate')
487 config = load_config(cluster)
489 outdir = get_state_dir(cluster)
490 os.makedirs(outdir, exist_ok=True)
492 generate_config_yml(cluster, config)
493 generate_hosts(cluster, config, outdir)
494 generate_ssh_config(config, outdir)
495 generate_ansible_inventory(config, outdir)
497 if config['clusterfs']['type'] in ['9p', 'virtiofs']:
498 clusterfs_dir = os.path.join(outdir, 'clusterfs')
499 if 'AUTOCLUSTER_QEMU_GROUP' in os.environ:
500 mode = (stat.S_IRWXU |
501 stat.S_ISGID | stat.S_IRWXG |
502 stat.S_IROTH | stat.S_IXOTH)
503 os.makedirs(clusterfs_dir, exist_ok=True)
504 os.chmod(clusterfs_dir, mode)
505 shutil.chown(clusterfs_dir,
506 group=os.environ['AUTOCLUSTER_QEMU_GROUP'])
508 os.makedirs(clusterfs_dir, exist_ok=True)
511 def vagrant_command(cluster, config, args):
512 """Run vagrant with the given arguments"""
514 state_dir = get_state_dir(cluster)
516 os.environ['VAGRANT_DEFAULT_PROVIDER'] = config['vagrant_provider']
517 os.environ['VAGRANT_CWD'] = os.path.join(INSTALL_DIR, 'vagrant')
518 os.environ['VAGRANT_DOTFILE_PATH'] = os.path.join(state_dir, '.vagrant')
519 os.environ['AUTOCLUSTER_STATE'] = state_dir
521 full_args = args[:] # copy
522 full_args.insert(0, 'vagrant')
524 subprocess.check_call(full_args)
527 def cluster_status(cluster):
528 """Check status of cluster using Vagrant"""
530 announce('cluster', cluster, 'status')
532 config = load_config(cluster)
534 vagrant_command(cluster, config, ['status'])
537 def get_shared_disk_names(cluster, config):
538 """Return shared disks names for cluster, None if none"""
540 if config['clusterfs']['type'] in ['9p', 'virtiofs']:
543 have_shared_disks = False
544 for _, node in config['nodes'].items():
545 if node['has_shared_storage']:
546 have_shared_disks = True
548 if not have_shared_disks:
551 count = config['shared_disks']['count']
555 return [f'autocluster_{cluster}_shared{n + 1:02d}.img'
556 for n in range(count)]
559 def delete_shared_disk_images(cluster, config):
560 """Delete any shared disks for the given cluster"""
562 if config['vagrant_provider'] != 'libvirt':
565 shared_disks = get_shared_disk_names(cluster, config)
566 if shared_disks is None:
570 print('warning: unable to check for stale shared disks (no libvirt)',
574 conn = libvirt.open()
575 storage_pool = conn.storagePoolLookupByName('autocluster')
576 for disk in shared_disks:
578 volume = storage_pool.storageVolLookupByName(disk)
580 except libvirt.libvirtError as exc:
581 if exc.get_error_code() != libvirt.VIR_ERR_NO_STORAGE_VOL:
586 def create_shared_disk_images(cluster, config):
587 """Create shared disks for the given cluster"""
589 if config['vagrant_provider'] != 'libvirt':
592 shared_disks = get_shared_disk_names(cluster, config)
593 if shared_disks is None:
597 raise LIBVIRT_IMPORT_ERROR
599 conn = libvirt.open()
600 storage_pool = conn.storagePoolLookupByName('autocluster')
602 size = str(config['shared_disks']['size'])
603 if size[-1].isdigit():
610 for disk in shared_disks:
611 xml = f'''<volume type='file'>
613 <capacity unit="{unit}">{capacity}</capacity>
615 storage_pool.createXML(xml)
620 def cluster_destroy_quiet(cluster, retries=1):
621 """Destroy and undefine cluster using Vagrant - don't announce"""
623 config = load_config(cluster)
625 # First attempt often fails, so try a few times
626 for _ in range(retries):
628 vagrant_command(cluster,
630 ['destroy', '-f', '--no-parallel'])
631 except subprocess.CalledProcessError as exc:
634 delete_shared_disk_images(cluster, config)
635 # Remove the state directory
636 statedir = get_state_dir(cluster)
637 shutil.rmtree(statedir)
643 def cluster_destroy(cluster, retries=1):
644 """Destroy and undefine cluster using Vagrant"""
646 announce('cluster', cluster, 'destroy')
648 config_file = get_config_file_path(cluster)
649 if not os.path.exists(config_file):
650 sys.exit('ERROR: Generated configuration for cluster does not exist')
652 cluster_destroy_quiet(cluster, retries)
655 def cluster_create(cluster, retries=1):
656 """Create and boot cluster using Vagrant"""
658 announce('cluster', cluster, 'create')
660 config = load_config(cluster)
662 # Create our own shared disk images to protect against
663 # https://github.com/vagrant-libvirt/vagrant-libvirt/issues/825
664 create_shared_disk_images(cluster, config)
666 # First attempt sometimes fails, so try a few times
667 for _ in range(retries):
669 vagrant_command(cluster, config, ['up'])
670 except subprocess.CalledProcessError as exc:
672 cluster_destroy(cluster)
679 def cluster_ssh_config(cluster):
680 """Install SSH configuration for cluster"""
682 announce('cluster', cluster, 'ssh_config')
684 src = os.path.join(get_state_dir(cluster), 'ssh_config')
685 dst_dir = os.path.join(os.environ['HOME'], '.ssh/autocluster.d')
686 os.makedirs(dst_dir, exist_ok=True)
687 dst = os.path.join(dst_dir, f'{cluster}.config')
688 shutil.copyfile(src, dst)
691 def cluster_setup(cluster, retries=1):
692 """Setup cluster using Ansible"""
694 announce('cluster', cluster, 'setup')
696 # Could put these in the state directory, but disable for now
697 os.environ['ANSIBLE_RETRY_FILES_ENABLED'] = 'false'
699 state_dir = get_state_dir(cluster)
700 config_file = get_config_file_path(cluster)
701 inventory = os.path.join(state_dir, 'ansible.inventory')
702 playbook = os.path.join(INSTALL_DIR, 'ansible/node/site.yml')
703 args = ['ansible-playbook',
704 '-e', f'@{config_file}',
708 # First attempt sometimes fails, so try a few times
709 for _ in range(retries):
711 subprocess.check_call(args)
712 except subprocess.CalledProcessError as exc:
714 print(f'warning: cluster setup exited with {ret}, retrying',
720 sys.exit(f'ERROR: cluster setup exited with {ret}')
723 def cluster_build(cluster):
724 """Build cluster using Ansible"""
726 config_file = get_config_file_path(cluster)
727 if os.path.exists(config_file):
728 cluster_destroy(cluster, 10)
729 cluster_generate(cluster)
730 cluster_create(cluster, 10)
731 cluster_ssh_config(cluster)
732 cluster_setup(cluster, 5)
735 def cluster_command(cluster, command):
736 """Run appropriate cluster command function"""
738 if command == 'defaults':
740 elif command == 'dump':
741 cluster_dump(cluster)
742 elif command == 'status':
743 cluster_status(cluster)
744 elif command == 'generate':
745 cluster_generate(cluster)
746 elif command == 'destroy':
747 cluster_destroy(cluster, 10)
748 elif command == 'create':
749 cluster_create(cluster)
750 elif command == 'ssh_config':
751 cluster_ssh_config(cluster)
752 elif command == 'setup':
753 cluster_setup(cluster)
754 elif command == 'build':
755 cluster_build(cluster)
760 def get_host_setup_path(file):
761 """Return the path for host setup file"""
763 return os.path.join(INSTALL_DIR, 'ansible/host', file)
766 def get_platform_file(platform):
767 """Return the name of the host setup file for platform"""
769 return get_host_setup_path(f'autocluster_setup_{platform}.yml')
772 def sanity_check_platform_name(platform):
773 """Ensure that host platform is supported"""
775 platform_file = get_platform_file(platform)
777 if not os.access(platform_file, os.R_OK):
778 sys.exit(f'Host platform "{platform}" not supported')
781 def host_setup(platform):
782 """Set up host machine for use with Autocluster"""
784 announce('host', platform, 'setup')
786 platform_file = get_platform_file(platform)
787 ssh_file = get_host_setup_path('autocluster_setup_ssh.yml')
788 os.environ['ANSIBLE_RETRY_FILES_ENABLED'] = 'false'
789 args = ['ansible-playbook', platform_file, ssh_file]
792 subprocess.check_call(args)
793 except subprocess.CalledProcessError as exc:
794 sys.exit(f'ERROR: host setup exited with {exc.returncode}')
798 """Main autocluster command-line handling"""
800 if len(sys.argv) < 2:
803 if sys.argv[1] == 'cluster':
804 if len(sys.argv) < 4:
807 cluster = sys.argv[2]
809 sanity_check_cluster_name(cluster)
811 for command in sys.argv[3:]:
812 cluster_command(cluster, command)
814 elif sys.argv[1] == 'host':
815 if len(sys.argv) < 4:
818 platform = sys.argv[2]
820 sanity_check_platform_name(platform)
822 for command in sys.argv[3:]:
823 if command == 'setup':
830 if __name__ == '__main__':