From 5517d60653b0aa25a354c3a35cbfb2aa89afb41a Mon Sep 17 00:00:00 2001 From: Tim Beale Date: Tue, 27 Nov 2018 13:50:32 +1300 Subject: [PATCH] traffic_replay: Add a max-members option to cap group size traffic_replay tries to distribute the users among the groups in a realistic manner - some groups will have almost all users in them. However, this becomes a problem when testing a really large database, e.g. we may want 100K users, but no more than 5K users in each group. This patch adds a max-member option so we can limit how big the groups actually get. If we detect that a group exceeds the max-members, we reset the group's probability (of getting selected) to zero, and then recalculate the cumulative distribution. The means that the group should no longer get selected by generate_random_membership(). (Note we can't completely remove the group from the list because that changes the list-index-to-group-ID mapping). Signed-off-by: Tim Beale Reviewed-by: Andrew Bartlett Autobuild-User(master): Andrew Bartlett Autobuild-Date(master): Tue Dec 4 12:22:50 CET 2018 on sn-devel-144 --- python/samba/emulate/traffic.py | 34 +++++++++++++++++++++++++++++---- script/traffic_replay | 4 ++++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/python/samba/emulate/traffic.py b/python/samba/emulate/traffic.py index fd886e3865e..291162f279a 100644 --- a/python/samba/emulate/traffic.py +++ b/python/samba/emulate/traffic.py @@ -1764,8 +1764,8 @@ def clean_up_accounts(ldb, instance_id): def generate_users_and_groups(ldb, instance_id, password, number_of_users, number_of_groups, - group_memberships, machine_accounts, - traffic_accounts=True): + group_memberships, max_members, + machine_accounts, traffic_accounts=True): """Generate the required users and groups, allocating the users to those groups.""" memberships_added = 0 @@ -1792,7 +1792,8 @@ def generate_users_and_groups(ldb, instance_id, password, groups_added, number_of_users, users_added, - group_memberships) + group_memberships, + max_members) LOGGER.info("Adding users to groups") add_users_to_groups(ldb, instance_id, assignments) memberships_added = assignments.total() @@ -1808,11 +1809,12 @@ def generate_users_and_groups(ldb, instance_id, password, class GroupAssignments(object): def __init__(self, number_of_groups, groups_added, number_of_users, - users_added, group_memberships): + users_added, group_memberships, max_members): self.count = 0 self.generate_group_distribution(number_of_groups) self.generate_user_distribution(number_of_users, group_memberships) + self.max_members = max_members self.assignments = defaultdict(list) self.assign_groups(number_of_groups, groups_added, number_of_users, users_added, group_memberships) @@ -1825,6 +1827,9 @@ class GroupAssignments(object): # value, so we can use random.random() as a simple index into the list dist = [] total = sum(weights) + if total == 0: + return None + cumulative = 0.0 for probability in weights: cumulative += probability @@ -1868,6 +1873,7 @@ class GroupAssignments(object): weights.append(p) # convert the weights to a cumulative distribution between 0.0 and 1.0 + self.group_weights = weights self.group_dist = self.cumulative_distribution(weights) def generate_random_membership(self): @@ -1888,6 +1894,18 @@ class GroupAssignments(object): def get_groups(self): return self.assignments.keys() + def cap_group_membership(self, group, max_members): + """Prevent the group's membership from exceeding the max specified""" + num_members = len(self.assignments[group]) + if num_members >= max_members: + LOGGER.info("Group {0} has {1} members".format(group, num_members)) + + # remove this group and then recalculate the cumulative + # distribution, so this group is no longer selected + self.group_weights[group - 1] = 0 + new_dist = self.cumulative_distribution(self.group_weights) + self.group_dist = new_dist + def add_assignment(self, user, group): # the assignments are stored in a dictionary where key=group, # value=list-of-users-in-group (indexing by group-ID allows us to @@ -1896,6 +1914,10 @@ class GroupAssignments(object): self.assignments[group].append(user) self.count += 1 + # check if there'a cap on how big the groups can grow + if self.max_members: + self.cap_group_membership(group, self.max_members) + def assign_groups(self, number_of_groups, groups_added, number_of_users, users_added, group_memberships): """Allocate users to groups. @@ -1915,6 +1937,10 @@ class GroupAssignments(object): float(group_memberships) * (float(users_added) / float(number_of_users))) + if self.max_members: + group_memberships = min(group_memberships, + self.max_members * number_of_groups) + existing_users = number_of_users - users_added - 1 existing_groups = number_of_groups - groups_added - 1 while self.total() < group_memberships: diff --git a/script/traffic_replay b/script/traffic_replay index 991c9a9eb03..0ee0f9b6575 100755 --- a/script/traffic_replay +++ b/script/traffic_replay @@ -112,6 +112,8 @@ def main(): user_gen_group.add_option('--group-memberships', type='int', default=0, help='Total memberships to assign across all ' 'test users and all groups') + user_gen_group.add_option('--max-members', type='int', default=None, + help='Max users to add to any one group') parser.add_option_group(user_gen_group) sambaopts = options.SambaOptions(parser) @@ -333,6 +335,7 @@ def main(): opts.number_of_users, opts.number_of_groups, opts.group_memberships, + opts.max_members, machine_accounts=computer_accounts, traffic_accounts=False) sys.exit() @@ -346,6 +349,7 @@ def main(): number_of_users, opts.number_of_groups, opts.group_memberships, + opts.max_members, machine_accounts=len(conversations), traffic_accounts=True) -- 2.34.1