Tim Andersson has proposed merging ~andersson123/autopkgtest-cloud:copy-security-group-more-robust into autopkgtest-cloud:master.
Requested reviews: Canonical's Ubuntu QA (canonical-ubuntu-qa) For more details, see: https://code.launchpad.net/~andersson123/autopkgtest-cloud/+git/autopkgtest-cloud/+merge/477569 Adds a timeout, and sleep, etc, to make calls to nova more robust, in order to avoid having autopkgtest@*.service services end up in an inactive/dead state -- Your team Canonical's Ubuntu QA is requested to review the proposed merge of ~andersson123/autopkgtest-cloud:copy-security-group-more-robust into autopkgtest-cloud:master.
diff --git a/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/copy-security-group b/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/copy-security-group index 3de9a98..f1df12a 100755 --- a/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/copy-security-group +++ b/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/copy-security-group @@ -9,11 +9,37 @@ If --delete-only is given, it only deletes existing groups called NAME. import argparse import os +import signal +import time +from contextlib import contextmanager from keystoneauth1 import session +from keystoneauth1.exceptions import InternalServerError from keystoneauth1.identity import v2, v3 from neutronclient.v2_0 import client + +class TimeOutException(Exception): + pass + + +@contextmanager +def raise_timeout(timeout): + def _handler(signum, frame): + raise TimeOutException + + signal.signal(signal.SIGALRM, _handler) + signal.alarm(timeout) + + try: + yield + except TimeOutException: + print(f"Timeout after {timeout} seconds") + raise + finally: + signal.alarm(0) + + # Members in a security group rule that cannot be copied. RULE_MEMBERS_IGNORE = [ "id", @@ -27,6 +53,10 @@ RULE_MEMBERS_IGNORE = [ "normalized_cidr", ] +# keystoneauth1.exceptions.http.InternalServerError +NOVA_TIMEOUT = 600 +NOVA_RETRY_SLEEP_DURATION = 30 + def main(): parser = argparse.ArgumentParser(description="Copy security groups") @@ -68,43 +98,87 @@ def main(): region_name=os.environ["OS_REGION_NAME"], ) - # Find the source group - crashes if it does not exists - source = [ - g - for g in neutron.list_security_groups()["security_groups"] - if g["name"] == args.source - ][0] + security_groups = None + with raise_timeout(NOVA_TIMEOUT): + while not security_groups: + try: + security_groups = neutron.list_security_groups()[ + "security_groups" + ] + except InternalServerError as e: + print(f"Listing security groups failed with {e}") + time.sleep(NOVA_RETRY_SLEEP_DURATION) + + source = [g for g in security_groups if g["name"] == args.source][0] description = "copy {} of {} ({})".format( args.name, args.source, source["description"] ) # Delete any existing group with the same name - existing_groups = [ - g - for g in neutron.list_security_groups()["security_groups"] - if g["name"] == args.name - ] - existing_ports = neutron.list_ports()["ports"] + existing_groups = [g for g in security_groups if g["name"] == args.name] + + existing_ports = None + with raise_timeout(NOVA_TIMEOUT): + while not existing_ports: + try: + existing_ports = neutron.list_ports()["ports"] + except InternalServerError as e: + print(f"Listing ports failed with {e}") + time.sleep(NOVA_RETRY_SLEEP_DURATION) + for target in existing_groups: print("Deleting existing group", target) for port in existing_ports: if target["id"] in port["security_groups"]: print("Deleting port in group:", target["id"]) + deleted = False + with raise_timeout(NOVA_TIMEOUT): + while not deleted: + try: + neutron.delete_port(port["id"]) + deleted = True + except Exception as e: + print(f"Could not delete port: {e}") + time.sleep(NOVA_RETRY_SLEEP_DURATION) + + with raise_timeout(NOVA_TIMEOUT): + deleted = False + while not deleted: try: - neutron.delete_port(port["id"]) + neutron.delete_security_group(target["id"]) except Exception as e: - print("Could not delete port:", e) - neutron.delete_security_group(target["id"]) + print(f"Could not delete security group: {e}") + time.sleep(NOVA_RETRY_SLEEP_DURATION) if not args.delete_only: print("Creating", description) - target = neutron.create_security_group( - {"security_group": {"name": args.name, "description": description}} - )["security_group"] + with raise_timeout(NOVA_TIMEOUT): + target = None + while not target: + try: + target = neutron.create_security_group( + { + "security_group": { + "name": args.name, + "description": description, + } + } + )["security_group"] + except Exception as e: + print(f"Failed to create security group: {e}") + time.sleep(NOVA_RETRY_SLEEP_DURATION) for rule in target["security_group_rules"]: - neutron.delete_security_group_rule(rule["id"]) + with raise_timeout(NOVA_TIMEOUT): + deleted = False + while not deleted: + try: + neutron.delete_security_group_rule(rule["id"]) + deleted = True + except Exception as e: + print(f"Failed to delete security group with: {e}") + time.sleep(NOVA_RETRY_SLEEP_DURATION) for rule in source["security_group_rules"]: rule = { @@ -116,7 +190,19 @@ def main(): rule["security_group_id"] = target["id"] print("Copying rule", rule) - neutron.create_security_group_rule({"security_group_rule": rule}) + with raise_timeout(NOVA_TIMEOUT): + created = False + while not created: + try: + neutron.create_security_group_rule( + {"security_group_rule": rule} + ) + created = True + except Exception as e: + print( + f"Failed to create security group rule with: {e}" + ) + time.sleep(NOVA_RETRY_SLEEP_DURATION) if __name__ == "__main__":
-- Mailing list: https://launchpad.net/~canonical-ubuntu-qa Post to : canonical-ubuntu-qa@lists.launchpad.net Unsubscribe : https://launchpad.net/~canonical-ubuntu-qa More help : https://help.launchpad.net/ListHelp