diff options
author | Martin Babinsky <mbabinsk@redhat.com> | 2015-10-30 13:59:03 +0100 |
---|---|---|
committer | Martin Basti <mbasti@redhat.com> | 2015-11-13 17:39:24 +0100 |
commit | a6cdafd37451c99c1a7492ce5901195405881bb2 (patch) | |
tree | e60b6c75f45b275a87ab2f27e437a212050c6cfe /install/tools/ipa-replica-manage | |
parent | 19044e87ac54200b7710b8ec5405175c3d749e76 (diff) | |
download | freeipa-a6cdafd37451c99c1a7492ce5901195405881bb2.tar.gz freeipa-a6cdafd37451c99c1a7492ce5901195405881bb2.tar.xz freeipa-a6cdafd37451c99c1a7492ce5901195405881bb2.zip |
check for disconnected topology and deleted agreements for all suffices
The code in ipa-replica-manage which checks for disconnected topology and
deleted agreements during node removal was generalized so that it now performs
these checks for all suffixes to which the node belongs.
https://fedorahosted.org/freeipa/ticket/5309
Reviewed-By: Petr Vobornik <pvoborni@redhat.com>
Diffstat (limited to 'install/tools/ipa-replica-manage')
-rwxr-xr-x | install/tools/ipa-replica-manage | 245 |
1 files changed, 165 insertions, 80 deletions
diff --git a/install/tools/ipa-replica-manage b/install/tools/ipa-replica-manage index 2de6fd799..ebbdf5c33 100755 --- a/install/tools/ipa-replica-manage +++ b/install/tools/ipa-replica-manage @@ -570,46 +570,97 @@ def check_last_link(delrepl, realm, dirman_passwd, force): else: return None -def check_last_link_managed(api, masters, hostname, force): + +def map_masters_to_suffices(masters, suffices): + masters_to_suffix = {} + suffix_name_to_root = { + s['iparepltopoconfroot'][0]: s['cn'][0] for s in suffices + } + + for master in masters: + managed_suffices = master['iparepltopomanagedsuffix'] + for suffix in managed_suffices: + suffix_name = suffix_name_to_root[suffix] + try: + masters_to_suffix[suffix_name].append(master) + except KeyError: + masters_to_suffix[suffix_name] = [master] + + return masters_to_suffix + + +def check_hostname_in_masters(hostname, masters): + master_cns = {m['cn'][0] for m in masters} + return hostname in master_cns + + +def check_last_link_managed(api, hostname, masters, force): """ Check if 'hostname' is safe to delete. - :returns: tuple with lists of current and future errors in topology - (current_errors, new_errors) + :returns: a dictionary of topology errors across all suffices in the form + {<suffix name>: (<original errors>, + <errors after removing the node>)} """ + suffices = api.Command.topologysuffix_find(u'')['result'] + suffix_to_masters = map_masters_to_suffices(masters, suffices) + topo_errors_by_suffix = {} + + for suffix in suffices: + suffix_name = suffix['cn'][0] + suffix_members = suffix_to_masters[suffix_name] + print("Checking connectivity in topology suffix '{0}'".format( + suffix_name)) + if not check_hostname_in_masters(hostname, suffix_members): + print( + "'{0}' is not a part of topology suffix '{1}'".format( + hostname, suffix_name + ) + ) + print("Not checking connectivity") + continue - segments = api.Command.topologysegment_find(u'realm', sizelimit=0).get('result') - graph = create_topology_graph(masters, segments) + segments = api.Command.topologysegment_find( + suffix_name, sizelimit=0).get('result') + graph = create_topology_graph(suffix_to_masters[suffix_name], segments) + + # check topology before removal + orig_errors = get_topology_connection_errors(graph) + if orig_errors: + print("Current topology in suffix '{0}' is disconnected:".format( + suffix_name)) + print("Changes are not replicated to all servers and data are " + "probably inconsistent.") + print("You need to add segments to reconnect the topology.") + print_connect_errors(orig_errors) + + # after removal + try: + graph.remove_vertex(hostname) + except ValueError: + pass # ignore already deleted master, continue to clean + + new_errors = get_topology_connection_errors(graph) + if new_errors: + print("WARNING: Removal of '{0}' will lead to disconnected " + "topology in suffix '{1}'".format(hostname, suffix_name)) + print("Changes will not be replicated to all servers and data will" + " become inconsistent.") + print("You need to add segments to prevent disconnection of the " + "topology.") + print("Errors in topology after removal:") + print_connect_errors(new_errors) + + if orig_errors or new_errors: + if not force: + sys.exit("Aborted") + else: + print("Forcing removal of %s" % hostname) - # check topology before removal - orig_errors = get_topology_connection_errors(graph) - if orig_errors: - print("Current topology is disconnected:") - print("Changes are not replicated to all servers and data are probably inconsistent.") - print("You need to add segments to reconnect the topology.") - print_connect_errors(orig_errors) + topo_errors_by_suffix[suffix_name] = (orig_errors, new_errors) - # after removal - try: - graph.remove_vertex(hostname) - except ValueError: - pass # ignore already deleted master, continue to clean - - new_errors = get_topology_connection_errors(graph) - if new_errors: - print("WARNING: Topology after removal of %s will be disconnected." % hostname) - print("Changes will not be replicated to all servers and data will become inconsistent.") - print("You need to add segments to prevent disconnection of the topology.") - print("Errors in topology after removal:") - print_connect_errors(new_errors) - - if orig_errors or new_errors: - if not force: - sys.exit("Aborted") - else: - print("Forcing removal of %s" % hostname) + return topo_errors_by_suffix - return (orig_errors, new_errors) def print_connect_errors(errors): for error in errors: @@ -725,8 +776,9 @@ def del_master_managed(realm, hostname, options): # 2. Get all masters masters = api.Command.server_find('', sizelimit=0)['result'] - # 3. Check topology - topo_errors = check_last_link_managed(api, masters, hostname, options.force) + # 3. Check topology connectivity in all suffices + topo_errors = check_last_link_managed( + api, hostname, masters, options.force) # 4. Check that we are not leaving the installation without CA and/or DNS # And pick new CA master. @@ -757,58 +809,91 @@ def del_master_managed(realm, hostname, options): # 7. Clean RUV for the deleted master # Wait for topology plugin to delete segments - i = 0 - m_cns = [m['cn'][0] for m in masters] # masters before removal - while True: - left = api.Command.topologysegment_find( - u'realm', iparepltoposegmentleftnode=hostname_u, sizelimit=0)['result'] - right = api.Command.topologysegment_find( - u'realm', iparepltoposegmentrightnode=hostname_u, sizelimit=0)['result'] - - # If the server was already deleted, we can expect that all removals - # had been done in previous run and dangling segments were not deleted. - if hostname not in m_cns: - print("Skipping replication agreement deletion check") - break - - # Relax check if topology was or is disconnected. Disconnected topology - # can contain segments with already deleted servers. Check only if - # segments of servers, which can contact this server, and the deleted - # server were removed. - # This code should handle a case where there was a topology with - # a central node(B): A <-> B <-> C, where A is current server. - # After removal of B, topology will be disconnected and removal of - # segment B <-> C won't be replicated back to server A, therefore - # presence of the segment has to be ignored. - if topo_errors[0] or topo_errors[1]: - # use errors after deletion because we don't care if some server - # can't contact the deleted one - cant_contact_me = [e[0] for e in topo_errors[1] if options.host in e[2]] - can_contact_me = set(m_cns) - set(cant_contact_me) - left = [s for s in left if s['iparepltoposegmentrightnode'][0] in can_contact_me] - right = [s for s in right if s['iparepltoposegmentleftnode'][0] in can_contact_me] - - if not left and not right: - print("Agreements deleted") - break - time.sleep(2) - if i == 2: # taking too long, something is wrong, report - print("Waiting for removal of replication agreements") - if i > 90: - print("Taking too long, skipping") - print("Following segments were not deleted:") - for s in left: - print(" %s" % s['cn'][0]) - for s in right: - print(" %s" % s['cn'][0]) - break - i += 1 + check_deleted_segments(hostname_u, masters, topo_errors, options.host) # Clean RUV is handled by the topolgy plugin # 8. And clean up the removed replica DNS entries if any. cleanup_server_dns_entries(realm, hostname, thisrepl.suffix, options) + +def check_deleted_segments(hostname, masters, topo_errors, starting_host): + + def wait_for_segment_removal(hostname, master_cns, suffix_name, + topo_errors): + i = 0 + while True: + left = api.Command.topologysegment_find( + suffix_name, iparepltoposegmentleftnode=hostname, sizelimit=0 + )['result'] + right = api.Command.topologysegment_find( + suffix_name, iparepltoposegmentrightnode=hostname, sizelimit=0 + )['result'] + + # Relax check if topology was or is disconnected. Disconnected + # topology can contain segments with already deleted servers. + # Check only if segments of servers, which can contact this server, + # and the deleted server were removed. + # This code should handle a case where there was a topology with + # a central node(B): A <-> B <-> C, where A is current server. + # After removal of B, topology will be disconnected and removal of + # segment B <-> C won't be replicated back to server A, therefore + # presence of the segment has to be ignored. + if topo_errors[0] or topo_errors[1]: + # use errors after deletion because we don't care if some + # server can't contact the deleted one + cant_contact_me = [e[0] for e in topo_errors[1] + if starting_host in e[2]] + can_contact_me = set(master_cns) - set(cant_contact_me) + left = [s for s in left if s['iparepltoposegmentrightnode'][0] + in can_contact_me] + right = [s for s in right if s['iparepltoposegmentleftnode'][0] + in can_contact_me] + + if not left and not right: + print("Agreements deleted") + return + time.sleep(2) + if i == 2: # taking too long, something is wrong, report + print("Waiting for removal of replication agreements") + if i > 90: + print("Taking too long, skipping") + print("Following segments were not deleted:") + for s in left: + print(" %s" % s['cn'][0]) + for s in right: + print(" %s" % s['cn'][0]) + return + i += 1 + + if not check_hostname_in_masters(hostname, masters): + print("{0} not in masters, skipping agreement deletion check".format( + hostname)) + return + + suffices = api.Command.topologysuffix_find('', sizelimit=0)['result'] + suffix_to_masters = map_masters_to_suffices(masters, suffices) + + for suffix in suffices: + suffix_name = suffix['cn'][0] + suffix_member_cns = [ + m['cn'][0] for m in suffix_to_masters[suffix_name] + ] + + if hostname not in suffix_member_cns: + # If the server was already deleted, we can expect that all + # removals had been done in previous run and dangling segments + # were not deleted. + print("Skipping replication agreement deletion check for " + "suffix '{0}'".format(suffix_name)) + continue + + print("Checking for deleted segments in suffix '{0}'".format( + suffix_name)) + wait_for_segment_removal(hostname, suffix_member_cns, suffix_name, + topo_errors[suffix_name]) + + def del_master_direct(realm, hostname, options): """ Removing of master for realm without managed topology |