Tim Andersson has proposed merging ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master.
Requested reviews: Canonical's Ubuntu QA (canonical-ubuntu-qa) For more details, see: https://code.launchpad.net/~andersson123/autopkgtest-cloud/+git/autopkgtest-cloud/+merge/461654 -- Your team Canonical's Ubuntu QA is requested to review the proposed merge of ~andersson123/autopkgtest-cloud:stop-tests-from-webpage into autopkgtest-cloud:master.
diff --git a/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/test-killer b/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/test-killer new file mode 100755 index 0000000..bfec858 --- /dev/null +++ b/charms/focal/autopkgtest-cloud-worker/autopkgtest-cloud/tools/test-killer @@ -0,0 +1,294 @@ +#!/usr/bin/python3 +"""Kills running tests.""" + +import configparser +import json +import logging +import pathlib +import socket +import subprocess +import time +from typing import List + +import amqplib.client_0_8 as amqp +import requests + +WRITER_EXCHANGE_NAME = "stop-running.fanout" +RABBIT_CREDS = "/home/ubuntu/rabbitmq.cred" +MSG_ONLY_KEYS = [ + "uuid", + "not-running-on", +] + +RABBIT_CFG = configparser.ConfigParser() +with open(RABBIT_CREDS, "r") as f: + RABBIT_CFG.read_string("[rabbit]\n" + f.read().replace('"', "")) + + +def amqp_connect() -> amqp.Connection: + """ + Creates an amqp.Connection object from the relevant creds + """ + amqp_con = amqp.Connection( + RABBIT_CFG["rabbit"]["RABBIT_HOST"], + userid=RABBIT_CFG["rabbit"]["RABBIT_USER"], + password=RABBIT_CFG["rabbit"]["RABBIT_PASSWORD"], + confirm_publish=True, + ) + return amqp_con + + +def check_message(msg: dict) -> bool: + """ + Checks the "kill-request" message sent has only the desired keys + + :param msg: the amqp message converted from bytes to dictionary + """ + return list(msg.keys()) == MSG_ONLY_KEYS + + +def get_test_pid(uuid: str) -> int: + """ + Parses the output of ps aux and finds the pid of a running test + with a given uuid + + :param uuid: The given test uuid that is desired to be killed + """ + try: + # get list of running processes + ps_aux_run = subprocess.run( + ["ps", "aux"], + stdout=subprocess.PIPE, + check=True, + ) + # Filter the list for only 'runner' processes + runner_run = subprocess.run( + ["grep", "runner"], + input=ps_aux_run.stdout, + stdout=subprocess.PIPE, + check=True, + ) + # Check all runner processes for the given uuid + # If this one fails, the test isn't running on this worker + uuid_run = subprocess.run( + ["grep", uuid], + input=runner_run.stdout, + capture_output=True, + check=True, + ) + except subprocess.CalledProcessError as _: + # We hit this exception if the test with the given uuid + # isn't running on this cloud worker + return None + search_for_test_output = uuid_run.stdout + search_me = search_for_test_output.splitlines() + # We have to assert the length is 1 otherwise we'll only kill + # the first one in the list - which may be the incorrect one + # if there's two processes with same uuid - something is wrong! + assert len(search_me) == 1 + line = search_me[0].decode("utf-8") + if uuid in line: + line = line.split(" ") + line = [x for x in line if x] + pid = line[1] + return int(pid) + + +def place_message_in_queue(info: dict, amqp_con: amqp.Connection): + """ + Places a given dictionary into amqp as an amqp.Message object + into the queue with the WRITER_EXCHANGE_NAME exchange + + :param info: dictionary that'll be converted to an amqp message + :param amqp_con: the amqp connection that test-killer is using + """ + complete_amqp = amqp_con.channel() + complete_amqp.access_request( + "/complete", active=True, read=False, write=True + ) + complete_amqp.exchange_declare( + WRITER_EXCHANGE_NAME, "fanout", durable=True, auto_delete=False + ) + complete_amqp.basic_publish( + amqp.Message(json.dumps(info), delivery_mode=2), + WRITER_EXCHANGE_NAME, + "", + ) + + +def kill_process(pid: int, uuid: str) -> bool: + """ + Sends SIGUSR1 to worker. + This causes the worker to go into the fallback failure mode, + in which the worker then exits the test and kills the + openstack server. The worker goes on to the next test in the + queue + + :param pid: pid of autopkgtest process to kill + :param uuid: The given test uuid that is desired to be killed + """ + kill_cmd = "kill -USR1 %i" % pid + try: + _ = subprocess.run( + kill_cmd.split(" "), + check=True, + ) + while get_test_pid(uuid) is not None: + time.sleep(1) + return True + except subprocess.CalledProcessError as _: + return False + + +def test_is_queued(uuid: str) -> bool: + """ + Checks autopkgtest-web's queued.json file for the presence of a test + with the given uuid. If the uuid is found, we know the test is currently + queued, and not running on any unit. + + :param uuid: The given test uuid that is desired to be killed + """ + net_name_path = pathlib.Path("/home/ubuntu/net-name.rc") + if not net_name_path.exists(): + logging.warning( + "No net-name.rc! Cannot check queues. Presuming test is not queued." + ) + return False + if "net_prod-proposed-migration" in net_name_path.read_text(): + autopkgtest_url = "https://autopkgtest.ubuntu.com" + elif "net_stg-proposed-migration" in net_name_path.read_text(): + autopkgtest_url = "https://autopkgtest.staging.ubuntu.com" + else: + logging.warning( + "Couldn't check mojo stage, presuming test isn't queued." + ) + return False + queue_req = requests.get(autopkgtest_url + "/queued.json") + if uuid in queue_req.content.decode("utf-8"): + return True + return False + + +def already_checked_this_host(hostnames: List[str]) -> bool: + """ + Checks if the hostname of the worker this is running on is in + the list of hostnames provided + + :param hostnames: List of hostnames that have already checked for the test to be killed + """ + return socket.getfqdn() in hostnames + + +def get_num_workers() -> int: + """ + Retrieves the value for the juju config autopkgtest-cloud-worker option "num_workers" + Falls back to a safe default if parsing fails + """ + worker_conf_files = [ + "/home/ubuntu/worker-lxd-armhf.conf", + "/home/ubuntu/worker-lcy02.conf", + ] + worker_conf = configparser.ConfigParser() + for file in worker_conf_files: + if pathlib.Path(file).exists(): + worker_conf.read_file(file) + try: + return int(worker_conf["autopkgtest"]["num_workers"]) + except Exception as _: + logging.info( + "Parsing worker conf for num_workers failed, falling back to default of 3" + ) + return 3 + + +def process_message(msg: amqp.Message, amqp_con: amqp.Connection): + """ + Callback function processing the amqp message + + :param msg: The amqp message with the values required to kill a specified test + Should look like this when coming directly from the test_manager app: + {"uuid": "uuid-of-test", "not-running-on": []} + And will look like this after two units have failed to find the test + with the specified uuid: + {"uuid": "uuid-of-test", "not-running-on": [ + "hostname1", + "hostname2", + ] + } + :param amqp_con: the amqp connection that test-killer will be using + """ + body = msg.body + if isinstance(body, bytes): + body = body.decode("UTF-8", errors="replace") + info = json.loads(body) + num_workers = get_num_workers() + logging.info("Received request to kill test: %s" % json.dumps(info)) + if not check_message(info): + logging.error( + "Message %s is invalid. Ignoring.", json.dumps(info, indent=2) + ) + # Remove the message from the queue + msg.channel.basic_ack(msg.delivery_tag) + return + if len(info["not-running-on"]) == num_workers: + # If the test hasn't been found on any of the workers, we reach this + # Check if the test is currently queued - this could happen in the case + # of infinite looping. + if test_is_queued(info["uuid"]): + msg.channel.basic_ack(msg.delivery_tag) + info["not-running-on"] = [] + place_message_in_queue(info, amqp_con) + else: + msg.channel.basic_ack(msg.delivery_tag) + return + + if already_checked_this_host(info["not-running-on"]): + # We check to see if we've already checked for the job on this cloud worker unit. + msg.channel.basic_ack(msg.delivery_tag) + logging.info( + "Test already found to not be running on this host, placing back into queue." + ) + place_message_in_queue(info, amqp_con) + return + # get the test pid + pid = get_test_pid(info["uuid"]) + if pid is None: + # The test isn't running on this unit + # append this hostname to not-running-on + msg.channel.basic_ack(msg.delivery_tag) + if len(info["not-running-on"]) == (num_workers - 1): + logging.info( + "Job %s not found on any workers, not re-queueing." + % json.dumps(info) + ) + return + info["not-running-on"].append(socket.getfqdn()) + place_message_in_queue(info, amqp_con) + return + # Kill the process + if kill_process(pid, info["uuid"]): + logging.info("Job %s has been killed." % json.dumps(info)) + else: + logging.error( + "Job %s couldn't be killed! Ignoring." % json.dumps(info) + ) + msg.channel.basic_ack(msg.delivery_tag) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + amqp_con = amqp_connect() + status_ch = amqp_con.channel() + status_ch.access_request("/complete", active=True, read=True, write=True) + status_ch.exchange_declare( + WRITER_EXCHANGE_NAME, "fanout", durable=True, auto_delete=False + ) + queue_name = "tests-to-kill" + status_ch.queue_declare(queue_name, durable=True, auto_delete=False) + status_ch.queue_bind(queue_name, WRITER_EXCHANGE_NAME, queue_name) + logging.info("Listening to requests on %s", queue_name) + status_ch.basic_consume( + "", callback=lambda msg: process_message(msg, amqp_con) + ) + while status_ch.callbacks: + status_ch.wait() diff --git a/charms/focal/autopkgtest-cloud-worker/config.yaml b/charms/focal/autopkgtest-cloud-worker/config.yaml index 3719fad..f374d48 100644 --- a/charms/focal/autopkgtest-cloud-worker/config.yaml +++ b/charms/focal/autopkgtest-cloud-worker/config.yaml @@ -119,3 +119,7 @@ options: description: Percentage of workers that'll accept upstream tests. This is useful to prioritise certain tests. type: int + num-workers: + default: 3 + description: The combined total of the number of cloud and lxd workers + type: int diff --git a/charms/focal/autopkgtest-cloud-worker/reactive/autopkgtest_cloud_worker.py b/charms/focal/autopkgtest-cloud-worker/reactive/autopkgtest_cloud_worker.py index c8874c3..4011346 100644 --- a/charms/focal/autopkgtest-cloud-worker/reactive/autopkgtest_cloud_worker.py +++ b/charms/focal/autopkgtest-cloud-worker/reactive/autopkgtest_cloud_worker.py @@ -506,6 +506,7 @@ def write_swift_config(): "config.changed.mirror", "config.changed.net-name", "config.changed.worker-upstream-percentage", + "config.changed.num-workers", ) @when_any("config.set.nova-rcs", "config.set.lxd-remotes") def write_worker_config(): @@ -533,6 +534,7 @@ def write_worker_config(): "worker_upstream_percentage": config().get( "worker-upstream-percentage" ), + "num_workers": config().get("num-workers"), }, "virt": { "package_size_default": config().get("worker-default-flavor"), diff --git a/charms/focal/autopkgtest-cloud-worker/units/test-killer.service b/charms/focal/autopkgtest-cloud-worker/units/test-killer.service new file mode 100644 index 0000000..dcdf846 --- /dev/null +++ b/charms/focal/autopkgtest-cloud-worker/units/test-killer.service @@ -0,0 +1,13 @@ +[Unit] +Description=Test killer +StartLimitIntervalSec=60s +StartLimitBurst=10 + +[Service] +User=ubuntu +ExecStart=/home/ubuntu/autopkgtest-cloud/tools/test-killer +Restart=on-failure +RestartSec=1s + +[Install] +WantedBy=autopkgtest.target diff --git a/charms/focal/autopkgtest-web/config.yaml b/charms/focal/autopkgtest-web/config.yaml index a60637a..ced70a9 100644 --- a/charms/focal/autopkgtest-web/config.yaml +++ b/charms/focal/autopkgtest-web/config.yaml @@ -57,3 +57,7 @@ options: type: string default: ~ description: "List of teams that are allowed to request autopkgtest tests." + admin-nicks: + type: string + default: + description: "Comma separated list of admin nicknames." diff --git a/charms/focal/autopkgtest-web/reactive/autopkgtest_web.py b/charms/focal/autopkgtest-web/reactive/autopkgtest_web.py index 1cc391d..fbe82da 100644 --- a/charms/focal/autopkgtest-web/reactive/autopkgtest_web.py +++ b/charms/focal/autopkgtest-web/reactive/autopkgtest_web.py @@ -31,6 +31,7 @@ CONFIG_DIR = pathlib.Path("/home/ubuntu/.config/autopkgtest-web/") for parent in reversed(CONFIG_DIR.parents): parent.mkdir(mode=0o770, exist_ok=True) ALLOWED_REQUESTOR_TEAMS_PATH = CONFIG_DIR / "allowed-requestor-teams" +ADMIN_NICKS_PATH = CONFIG_DIR / "admin-nicks" PUBLIC_SWIFT_CREDS_PATH = os.path.expanduser("~ubuntu/public-swift-creds") @@ -256,6 +257,7 @@ def set_up_web_config(apache): # webcontrol CGI scripts ScriptAlias /request.cgi {webcontrol_dir}/request.cgi/ + ScriptAlias /test-manager.cgi {webcontrol_dir}/test-manager.cgi/ ScriptAlias /login {webcontrol_dir}/request.cgi/login ScriptAlias /logout {webcontrol_dir}/request.cgi/logout ScriptAlias /private-results {webcontrol_dir}/private-results.cgi/ @@ -284,6 +286,15 @@ def write_allowed_teams(): allowed_teams_path.write_text(allowed_requestor_teams, encoding="utf-8") +@when_all( + "config.changed.admin-nicks", + "config.set.admin-nicks", +) +def write_admin_nicks(): + admin_nicks = config().get("admin-nicks") + ADMIN_NICKS_PATH.write_text(admin_nicks, encoding="utf-8") + + @when_all("config.changed.github-secrets", "config.set.github-secrets") def write_github_secrets(): status.maintenance("Writing github secrets") diff --git a/charms/focal/autopkgtest-web/webcontrol/browse.cgi b/charms/focal/autopkgtest-web/webcontrol/browse.cgi index 309fb82..a1c88c3 100755 --- a/charms/focal/autopkgtest-web/webcontrol/browse.cgi +++ b/charms/focal/autopkgtest-web/webcontrol/browse.cgi @@ -13,27 +13,17 @@ import flask from helpers.admin import select_abnormally_long_jobs from helpers.exceptions import RunningJSONNotFound from helpers.utils import ( + get_admin_nicks, get_all_releases, get_autopkgtest_cloud_conf, get_supported_releases, - setup_key, + initialise_app, ) -from werkzeug.middleware.proxy_fix import ProxyFix # Initialize app -PATH = os.path.join( - os.path.sep, os.getenv("XDG_RUNTIME_DIR", "/run"), "autopkgtest_webcontrol" -) -os.makedirs(PATH, exist_ok=True) -app = flask.Flask("browse") -# we don't want a long cache, as we only serve files that are regularly updated +PATH, app, secret_path, _ = initialise_app("browse") app.config["SEND_FILE_MAX_AGE_DEFAULT"] = 60 -app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1) - -secret_path = os.path.join(PATH, "secret_key") -setup_key(app, secret_path) - db_con = None swift_container_url = None @@ -326,6 +316,7 @@ def package_overview(package, _=None): } } } + show_stop = flask.session.get("nickname", "") in get_admin_nicks() return render( "browse-package.html", @@ -341,6 +332,7 @@ def package_overview(package, _=None): title_suffix="- %s" % package, running=running_info, queues_info=queues_info, + show_stop=show_stop, ) @@ -491,6 +483,7 @@ def package_release_arch(package, release, arch, _=None): ), ) + show_stop = flask.session.get("nickname", "") in get_admin_nicks() return render( "browse-results.html", package=package, @@ -498,6 +491,7 @@ def package_release_arch(package, release, arch, _=None): arch=arch, package_results=results, title_suffix="- %s/%s/%s" % (package, release, arch), + show_stop=show_stop, ) @@ -573,6 +567,7 @@ def running(): running_count = 0 for pkg in packages: running_count += len(running_info[pkg].keys()) + show_stop = flask.session.get("nickname", "") in get_admin_nicks() return render( "browse-running.html", @@ -582,6 +577,7 @@ def running(): queues_lengths=queues_lengths, running=running_info, running_count=running_count, + show_stop=show_stop, ) @@ -591,9 +587,11 @@ def admin(): pruned_running_info = select_abnormally_long_jobs( running_info, get_test_id=get_test_id, db_con=db_con ) + show_stop = flask.session.get("nickname", "") in get_admin_nicks() return render( "browse-admin.html", running=pruned_running_info, + show_stop=show_stop, ) diff --git a/charms/focal/autopkgtest-web/webcontrol/helpers/utils.py b/charms/focal/autopkgtest-web/webcontrol/helpers/utils.py index 4e26eb8..aed7730 100644 --- a/charms/focal/autopkgtest-web/webcontrol/helpers/utils.py +++ b/charms/focal/autopkgtest-web/webcontrol/helpers/utils.py @@ -14,12 +14,45 @@ import typing # introduced in python3.7, we use 3.8 from dataclasses import dataclass +from html import escape as _escape import distro_info +from flask import Flask +from flask_openid import OpenID +from werkzeug.middleware.proxy_fix import ProxyFix sqlite3.paramstyle = "named" +def get_admin_nicks() -> typing.List[str]: + admin_nicks_path = "/home/ubuntu/.config/autopkgtest-web/admin-nicks" + try: + return pathlib.Path(admin_nicks_path).read_text().split(",") + except FileNotFoundError as _: + return [] + + +def initialise_app(app_name: str) -> tuple[str, Flask, str, OpenID]: + PATH = os.path.join( + os.path.sep, + os.getenv("XDG_RUNTIME_DIR", "/run"), + "autopkgtest_webcontrol", + ) + os.makedirs(PATH, exist_ok=True) + app = Flask(app_name) + app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1) + # keep secret persistent between CGI invocations + secret_path = os.path.join(PATH, "secret_key") + setup_key(app, secret_path) + oid = OpenID(app, os.path.join(PATH, "openid"), safe_roots=[]) + return PATH, app, secret_path, oid + + +def maybe_escape(value: str) -> str: + """Escape the value if it is True-ish""" + return _escape(value) if value else value + + @dataclass class SqliteWriterConfig: writer_exchange_name = "sqlite-write-me.fanout" @@ -220,3 +253,16 @@ def get_test_id(db_con, release, arch, src): get_test_id._cache = {} + +HTML = """ +<!doctype html> +<html> +<head> +<meta charset="utf-8"> +<title>Autopkgtest Test Request</title> +</head> +<body> +{} +</body> +</html> +""" diff --git a/charms/focal/autopkgtest-web/webcontrol/request/app.py b/charms/focal/autopkgtest-web/webcontrol/request/app.py index 4fca679..8ee33d4 100644 --- a/charms/focal/autopkgtest-web/webcontrol/request/app.py +++ b/charms/focal/autopkgtest-web/webcontrol/request/app.py @@ -5,33 +5,17 @@ import logging import os import pathlib from collections import ChainMap -from html import escape as _escape -from flask import Flask, redirect, request, session -from flask_openid import OpenID +from flask import redirect, request, session from helpers.exceptions import WebControlException -from helpers.utils import setup_key +from helpers.utils import HTML, initialise_app, maybe_escape from request.submit import Submit -from werkzeug.middleware.proxy_fix import ProxyFix # map multiple GET vars to AMQP JSON request parameter list MULTI_ARGS = {"trigger": "triggers", "ppa": "ppas", "env": "env"} EMPTY = "" -HTML = """ -<!doctype html> -<html> -<head> -<meta charset="utf-8"> -<title>Autopkgtest Test Request</title> -</head> -<body> -{} -</body> -</html> -""" - LOGIN = """ <form action="/login" method="post"> <input type="submit" value="Log in with Ubuntu SSO"> @@ -106,11 +90,6 @@ def invalid(inv_exception, code=400): return HTML.format(html), code -def maybe_escape(value): - """Escape the value if it is True-ish""" - return _escape(value) if value else value - - def get_api_keys(): """ API keys is a json file like this: @@ -132,17 +111,7 @@ def get_api_keys(): # Initialize app -PATH = os.path.join( - os.path.sep, os.getenv("XDG_RUNTIME_DIR", "/run"), "autopkgtest_webcontrol" -) -os.makedirs(PATH, exist_ok=True) -app = Flask("request") -app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1) -# keep secret persistent between CGI invocations -secret_path = os.path.join(PATH, "secret_key") -setup_key(app, secret_path) -oid = OpenID(app, os.path.join(PATH, "openid"), safe_roots=[]) - +PATH, app, secret_path, oid = initialise_app("request") # # Flask routes diff --git a/charms/focal/autopkgtest-web/webcontrol/request/tests/test_app.py b/charms/focal/autopkgtest-web/webcontrol/request/tests/test_app.py index 6fbcef5..aa0c407 100644 --- a/charms/focal/autopkgtest-web/webcontrol/request/tests/test_app.py +++ b/charms/focal/autopkgtest-web/webcontrol/request/tests/test_app.py @@ -7,6 +7,7 @@ from unittest.mock import mock_open, patch import request.app from helpers.exceptions import WebControlException +from helpers.utils import setup_key from request.submit import Submit @@ -33,7 +34,7 @@ class DistroRequestTests(AppTestBase): """Secret key gets saved and loaded between app restarts.""" orig_key = request.app.app.secret_key - request.app.setup_key(request.app, request.app.secret_path) + setup_key(request.app, request.app.secret_path) self.assertEqual(request.app.app.secret_key, orig_key) @patch("request.app.Submit") diff --git a/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html b/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html index 72d5d5b..07c5ff2 100644 --- a/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html +++ b/charms/focal/autopkgtest-web/webcontrol/templates/browse-admin.html @@ -9,7 +9,7 @@ <!-- Running tests --> {% for p, info in running.items()|sort %} <h2 id="pkg-{{ p }}"><a href="/packages/{{ p }}">{{ p }}</a></h2> - {{ macros.display_running_job(p, info) }} + {{ macros.display_running_job(p, info, show_stop) }} {% endfor %} {% endblock %} diff --git a/charms/focal/autopkgtest-web/webcontrol/templates/browse-package.html b/charms/focal/autopkgtest-web/webcontrol/templates/browse-package.html index 165cfd8..eff4667 100644 --- a/charms/focal/autopkgtest-web/webcontrol/templates/browse-package.html +++ b/charms/focal/autopkgtest-web/webcontrol/templates/browse-package.html @@ -32,7 +32,7 @@ <h3>Running tests</h3> {% for p, info in running.items()|sort %} - {{ macros.display_running_job(p, info) }} + {{ macros.display_running_job(p, info, show_stop) }} {% endfor %} <h3>Queued tests</h3> diff --git a/charms/focal/autopkgtest-web/webcontrol/templates/browse-results.html b/charms/focal/autopkgtest-web/webcontrol/templates/browse-results.html index fadff6d..57af00f 100644 --- a/charms/focal/autopkgtest-web/webcontrol/templates/browse-results.html +++ b/charms/focal/autopkgtest-web/webcontrol/templates/browse-results.html @@ -58,6 +58,11 @@ {% endif %} {% endif %} </td> + <td class="nowrap"> + {% if show_stop and row[6] == "running" and row[10] not in ["-", ""] %} + <a href="{{ base_url }}test-manager.cgi?uuid={{ row[10] }}">☠</a> <!-- Displays skull and crossbones--> + {% endif %} + </td> </tr> {% endfor %} </table> diff --git a/charms/focal/autopkgtest-web/webcontrol/templates/browse-running.html b/charms/focal/autopkgtest-web/webcontrol/templates/browse-running.html index 48080ce..091b52a 100644 --- a/charms/focal/autopkgtest-web/webcontrol/templates/browse-running.html +++ b/charms/focal/autopkgtest-web/webcontrol/templates/browse-running.html @@ -45,7 +45,7 @@ <!-- Running tests --> {% for p, info in running.items()|sort %} <h2 id="pkg-{{ p }}"><a href="/packages/{{ p }}">{{ p }}</a></h2> - {{ macros.display_running_job(p, info) }} + {{ macros.display_running_job(p, info, show_stop) }} {% endfor %} <!-- queue contents --> diff --git a/charms/focal/autopkgtest-web/webcontrol/templates/macros.html b/charms/focal/autopkgtest-web/webcontrol/templates/macros.html index 941dc77..f75ebcb 100644 --- a/charms/focal/autopkgtest-web/webcontrol/templates/macros.html +++ b/charms/focal/autopkgtest-web/webcontrol/templates/macros.html @@ -1,4 +1,4 @@ -{% macro display_running_job(package, info) -%} +{% macro display_running_job(package, info, show_stop) -%} {% for runhash, relinfo in info.items() %} {% for release, archinfo in relinfo.items() %} {% for arch, (params, duration, logtail) in archinfo.items() %} @@ -15,6 +15,11 @@ {% endif %} {% endfor %} <tr><th>Running for:</th><td>{{ duration//3600 }}h {{ duration % 3600//60 }}m {{ duration % 60 }}s ({{ duration }}s)</td></tr> + {% if show_stop %} + {% if "uuid" in params.keys() %} + <tr><td><a href="{{ base_url }}test-manager.cgi?uuid={{ params.get("uuid") }}">Stop this test</a></td></tr> + {% endif %} + {% endif %} </table> <pre> {{ logtail }} diff --git a/charms/focal/autopkgtest-web/webcontrol/test-manager.cgi b/charms/focal/autopkgtest-web/webcontrol/test-manager.cgi new file mode 100755 index 0000000..9df3db3 --- /dev/null +++ b/charms/focal/autopkgtest-web/webcontrol/test-manager.cgi @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +"""Run test-manager app as CGI script """ + +from wsgiref.handlers import CGIHandler + +from test_manager.app import app + +if __name__ == "__main__": + app.config["DEBUG"] = True + CGIHandler().run(app) diff --git a/charms/focal/autopkgtest-web/webcontrol/test_manager/__init__.py b/charms/focal/autopkgtest-web/webcontrol/test_manager/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/charms/focal/autopkgtest-web/webcontrol/test_manager/__init__.py diff --git a/charms/focal/autopkgtest-web/webcontrol/test_manager/app.py b/charms/focal/autopkgtest-web/webcontrol/test_manager/app.py new file mode 100644 index 0000000..8a62aae --- /dev/null +++ b/charms/focal/autopkgtest-web/webcontrol/test_manager/app.py @@ -0,0 +1,135 @@ +""" +test-manager is an app for autopkgtest-web which sends kill requests to +the worker units, detailing the test uuid. + +The worker units then kill the test with the matching uuid. + +On the running page, admins will have a hyperlink under running jobs which, when clicked, +will send the kill request. + +Before sending the kill request, test_manager checks that the uuid is indeed in running.json + +After sending the kill request, the request is picked up by a systemd unit named "test-killer" +on the cloud worker units. + +This unit, through all the cloud worker units, will find which unit the test is running on, +and kill the test, removing the test request from the queue and making the worker unit move +on to the next test in the queue. +""" + + +import configparser +import json +import logging +import os +import pathlib +import urllib + +import amqplib.client_0_8 as amqp +from flask import request, session +from helpers.exceptions import RunningJSONNotFound +from helpers.utils import ( + HTML, + get_admin_nicks, + get_all_releases, + initialise_app, + maybe_escape, +) + +ALL_UBUNTU_RELEASES = get_all_releases() + +RUNNING_FP = "/run/amqp-status-collector/running.json" +RUNNING_FILE = pathlib.Path("/run/amqp-status-collector/running.json") +WRITER_EXCHANGE_NAME = "stop-running.fanout" + + +def submit_to_queue(message: dict): + """ + Submits a dictionary as an amqp message to the WRITER_EXCHANGE_NAME exchange + + :param message: Dictionary to be converted to an amqp.Message and placed into the queue + """ + amqp_con = amqp_connect() + complete_amqp = amqp_con.channel() + complete_amqp.access_request( + "/complete", active=True, read=False, write=True + ) + complete_amqp.exchange_declare( + WRITER_EXCHANGE_NAME, "fanout", durable=True, auto_delete=False + ) + complete_amqp.basic_publish( + amqp.Message(json.dumps(message), delivery_mode=2), + WRITER_EXCHANGE_NAME, + "", + ) + + +# THIS CAN BE REFACTORED AFTER THE AUTO-QUEUE-CLEANUP MP IS MERGED!!! +# AMQP_CONNECT IS NOW SHARED FUNCTION IN HELPERS/UTILS.PY +def amqp_connect() -> amqp.Connection: + """Connect to AMQP server""" + cp = configparser.ConfigParser() + cp.read(os.path.expanduser("~ubuntu/autopkgtest-cloud.conf")) + amqp_uri = cp["amqp"]["uri"] + parts = urllib.parse.urlsplit(amqp_uri, allow_fragments=False) + amqp_con = amqp.Connection( + parts.hostname, userid=parts.username, password=parts.password + ) + logging.info( + "Connected to AMQP server at %s@%s" % (parts.username, parts.hostname) + ) + return amqp_con + + +PATH, app, secret_path, oid = initialise_app("test_manager") + + +@app.route("/", methods=["GET", "POST"]) +def index_root(): + """Handle stop test requests""" + session.permanent = True + nick = maybe_escape(session.get("nickname")) + if nick not in get_admin_nicks(): + return ( + HTML.format( + ( + "<p>You are not an admin. You are not " + "allowed to use this endpoint.</p>" + ) + ), + 200, + ) + params = { + maybe_escape(k): maybe_escape(v) for k, v in request.args.items() + } + if list(params.keys()) != ["uuid"]: + return ( + HTML.format( + "<p>You have passed %s, please only pass the uuid</p>" + % ",".join(params.keys()) + ), + 200, + ) + if not RUNNING_FILE.is_file(): + raise RunningJSONNotFound + running_data = json.loads(RUNNING_FILE.read_text()) + if params["uuid"] not in json.dumps(running_data): + return ( + HTML.format( + "<p>uuid %s not found in running jobs</p>" % params["uuid"] + ), + 200, + ) + queue_message = { + "uuid": params["uuid"], + "not-running-on": [], + } + submit_to_queue(queue_message) + while params["uuid"] in RUNNING_FILE.read_text(): + pass + return ( + HTML.format( + "<p>Test with uuid %s has been killed.</p>" % params["uuid"] + ), + 200, + ) diff --git a/mojo/service-bundle b/mojo/service-bundle index 6da0b03..02c95e7 100644 --- a/mojo/service-bundle +++ b/mojo/service-bundle @@ -1,11 +1,15 @@ {%- if stage_name == "production" %} {%- set releases = "trusty xenial bionic focal jammy mantic noble" %} {%- set channel = "latest/stable" %} + {%- set num_cloud = 2 %} {%- elif stage_name == "staging" or stage_name == "devel" %} {%- set releases = "focal jammy mantic noble" %} {%- set channel = "latest/edge" %} + {%- set num_cloud = 1 %} {%- endif %} +{%- set num_lxd = 1 %} + {%- if stage_name == "production" %} {%- set hostname = "autopkgtest.ubuntu.com" %} {%- elif stage_name == "staging" %} @@ -24,17 +28,14 @@ applications: autopkgtest-cloud-worker: charm: ubuntu-release-autopkgtest-cloud-worker channel: {{ channel }} -{%- if stage_name == "production" or stage_name == "staging" %} - num_units: 2 -{%- else %} - num_units: 1 -{%- endif %} + num_units: {{ num_cloud }} constraints: mem=16G cores=8 root-disk=40G {%- if stage_name == "production" or stage_name == "staging" %} storage: tmp: 350G {%- endif %} options: &common-options + num-workers: {{ num_cloud + num_lxd }} swift-password: include-file://{{local_dir}}/swift_password releases: {{ releases }} influxdb-hostname: include-file://{{ local_dir }}/influx-hostname.txt @@ -132,7 +133,7 @@ applications: autopkgtest-lxd-worker: charm: ubuntu-release-autopkgtest-cloud-worker channel: {{ channel }} - num_units: 1 + num_units: {{ num_lxd }} constraints: mem=16G cores=8 root-disk=40G {%- if stage_name == "production" or stage_name == "staging" %} storage: @@ -141,6 +142,7 @@ applications: options: <<: *common-options worker-args: lxd -r $LXD_REMOTE $LXD_REMOTE:autopkgtest/ubuntu/$RELEASE/$ARCHITECTURE + num-workers: {{ num_cloud + num_lxd }} {%- if stage_name == "production" or stage_name == "staging" %} worker-setup-command2: ln -s /dev/null /etc/systemd/system/bluetooth.service; printf "http_proxy={{ http_proxy }}\nhttps_proxy={{ https_proxy }}\nno_proxy={{ no_proxy }}\n" >> /etc/environment {%- endif %} @@ -206,6 +208,7 @@ applications: canonical-security canonical-server canonical-ubuntu-qa + admin-nicks: andersson123,brian-murray,hyask,paride,sil2000,vorlon {%- if stage_name == "production" %} {%- set storage_host_internal = "objectstorage.prodstack5.canonical.com:443" %} {%- set storage_path_internal = "/swift/v1/AUTH_0f9aae918d5b4744bf7b827671c86842" %}
-- Mailing list: https://launchpad.net/~canonical-ubuntu-qa Post to : canonical-ubuntu-qa@lists.launchpad.net Unsubscribe : https://launchpad.net/~canonical-ubuntu-qa More help : https://help.launchpad.net/ListHelp