Review: Needs Fixing

Added a bunch of notes but it's mostly trivial stuff (unnecessary globals). 
There's a few bits that warrant a bit of attention though (and/or explanation 
as to why they're actually correct). Looking much better though, thanks!

Diff comments:

> diff --git a/charms/focal/autopkgtest-web/webcontrol/db-backup 
> b/charms/focal/autopkgtest-web/webcontrol/db-backup
> new file mode 100755
> index 0000000..273e153
> --- /dev/null
> +++ b/charms/focal/autopkgtest-web/webcontrol/db-backup
> @@ -0,0 +1,201 @@
> +#!/usr/bin/python3
> +"""
> +This script periodically backs up the sqlite3 db to swift storage
> +and clears up old backups
> +"""
> +
> +import atexit
> +import configparser
> +import datetime
> +import gzip
> +import logging
> +import os
> +import sqlite3
> +import sys
> +from pathlib import Path
> +
> +import swiftclient
> +from helpers.utils import init_db
> +
> +DB_PATH = ""
> +DB_COPY_LOCATION = ""
> +DB_NAME = ""
> +CONTAINER_NAME = "db-backups"
> +MAX_DAYS = 7
> +SQLITE_DUMP_CMD = (
> +    'sqlite3 /home/ubuntu/autopkgtest.db ".backup autopkgtest.db.bak"'
> +)
> +DB_BACKUP_NAME = ""
> +DB_BACKUP_PATH = ""
> +
> +
> +def db_connect() -> sqlite3.Connection:
> +    """
> +    Establish connection to sqlite3 db
> +    """
> +    global DB_PATH
> +    global DB_NAME
> +    global DB_BACKUP_NAME
> +    global DB_BACKUP_PATH
> +    cp = configparser.ConfigParser()
> +    cp.read(os.path.expanduser("~ubuntu/autopkgtest-cloud.conf"))

You may want to check the return of ConfigParser.read here. If it can't read 
the specified file, it won't complain (because that method's meant to be used 
with multiple files, some of which may not exist). If you *definitely* want to 
read that file you probably want ConfigParser.read_file instead.

> +    DB_PATH = Path(cp["web"]["database"])
> +    DB_NAME = DB_PATH.name
> +    DB_BACKUP_NAME = "%s.bak" % DB_NAME
> +    DB_BACKUP_PATH = Path("/tmp") / (DB_PATH.name + ".bak")
> +
> +    db_con = init_db(cp["web"]["database"])
> +
> +    return db_con
> +
> +
> +def backup_db(db_con: sqlite3.Connection):
> +    global DB_BACKUP_PATH

No need to declare this global; it's not being modified by the function

> +    db_backup_con = sqlite3.connect(DB_BACKUP_PATH)
> +    with db_backup_con:
> +        db_con.backup(db_backup_con, pages=1)
> +    db_backup_con.close()
> +
> +
> +def compress_db():
> +    """
> +    use gzip to compress database
> +    """
> +    global DB_BACKUP_PATH

Likewise, no need for global here

> +    with open(DB_BACKUP_PATH, "rb") as f_in, gzip.open(
> +        "%s.gz" % DB_BACKUP_PATH, "wb"
> +    ) as f_out:
> +        f_out.writelines(f_in)

I'm not sure writelines is the correct method here. That'll try and iterate 
through the "lines" of f_in, but f_in isn't a text file (it does work, but the 
"lines" aren't really lines, they're just blocks delimited by a coincidental, 
essentially random \n's in the binary data).

I think what you probably want here is shutil.copyfileobj(f_in, f_out) which'll 
ensure a fixed-size (minimal) buffer is used for the copy.

> +
> +
> +def init_swift_con() -> swiftclient.Connection:
> +    """
> +    Establish connection to swift storage
> +    """
> +    swift_creds = {
> +        "authurl": os.environ["OS_AUTH_URL"],
> +        "user": os.environ["OS_USERNAME"],
> +        "key": os.environ["OS_PASSWORD"],
> +        "os_options": {
> +            "region_name": os.environ["OS_REGION_NAME"],
> +            "project_domain_name": os.environ["OS_PROJECT_DOMAIN_NAME"],
> +            "project_name": os.environ["OS_PROJECT_NAME"],
> +            "user_domain_name": os.environ["OS_USER_DOMAIN_NAME"],
> +        },
> +        "auth_version": 3,
> +    }
> +    swift_conn = swiftclient.Connection(**swift_creds)
> +    return swift_conn
> +
> +
> +def create_container_if_it_doesnt_exist(swift_conn: swiftclient.Connection):
> +    """
> +    create db-backups container if it doesn't already exist
> +    """
> +    global CONTAINER_NAME

Again, no need for global here

> +    try:
> +        swift_conn.get_container(CONTAINER_NAME)
> +    except swiftclient.exceptions.ClientException:
> +        swift_conn.put_container(
> +            CONTAINER_NAME,
> +        )
> +
> +
> +def upload_backup_to_db(
> +    swift_conn: swiftclient.Connection,
> +) -> swiftclient.Connection:
> +    """
> +    Upload compressed database to swift storage under container db-backups
> +    """
> +    global DB_PATH

Same for these three; they're all used read-only in this routine

> +    global DB_BACKUP_PATH
> +    global CONTAINER_NAME
> +    now = datetime.datetime.now().strftime("%Y/%m/%d/%H_%M_%S")

Do we really want now() here and not utcnow()? If the host's local timezone 
uses DST there's the potential for confusion in the ordering of backups (though 
it rather depends how often backups are taken -- if it's only daily then this 
is a nitpick)

> +    object_path = "%s/%s" % (now, DB_PATH.name + ".gz")
> +    for _ in range(5):

Wow, that's ... disturbing. Swift is sufficiently unreliable that we just throw 
5 retries with a re-auth on connection failure around every single swift 
transaction? Now I understand why the main bit down the bottom keeps 
re-assigning swift_conn.

Still, is this really necessary? I've no experience here, so maybe it is, but 
my gut instinct is "urgh!"

> +        try:
> +            swift_conn.put_object(
> +                CONTAINER_NAME,
> +                object_path,
> +                "%s.gz" % DB_BACKUP_PATH,
> +                content_type="text/plain; charset=UTF-8",
> +                headers={"Content-Encoding": "gzip"},
> +            )
> +            break
> +        except swiftclient.exceptions.ClientException as e:
> +            logging.info("exception: %s" % str(e))
> +            swift_conn = init_swift_con()
> +    return swift_conn
> +
> +
> +def delete_old_backups(
> +    swift_conn: swiftclient.Connection,
> +) -> swiftclient.Connection:
> +    """
> +    Delete objects in db-backups container that are older than 7 days
> +    """
> +    global CONTAINER_NAME
> +    global MAX_DAYS

Same again, no need for global

> +    logging.info("Removing old db backups...")
> +    _, objects = swift_conn.get_container(CONTAINER_NAME)
> +    now = datetime.datetime.now()
> +
> +    for obj in objects:
> +        last_modified = obj["last_modified"].split(".")[0]

Is this just stripping off microseconds? You could just parse them into the 
timestamp below with .%f in the format string?

> +        timestamp = datetime.datetime.strptime(
> +            last_modified, "%Y-%m-%dT%H:%M:%S"
> +        )
> +        diff = now - timestamp
> +        if diff > datetime.timedelta(days=MAX_DAYS):
> +            logging.info("Deleting %s" % obj["name"])
> +            for _ in range(5):

More swifty-retry shenanigans; again, if it's really necessary ... okay, but 
I'd rather not do this if it's not absolutely necessary

> +                try:
> +                    swift_conn.delete_object(CONTAINER_NAME, obj["name"])
> +                    break
> +                except swiftclient.exceptions.ClientException as _:
> +                    swift_conn = init_swift_con()
> +    return swift_conn
> +
> +
> +def cleanup():
> +    """
> +    Delete db and compressed db under /tmp
> +    """
> +    global DB_COPY_LOCATION

Another global you can scrub :)

> +    if os.path.isfile(DB_COPY_LOCATION):
> +        os.remove(DB_COPY_LOCATION)

TOCTOU; don't bother with isfile; just os.remove and catch 
OSError/FileNotFoundError (or use Path.unlink(missingok=True)).

> +    if os.path.isfile("%s.gz" % DB_COPY_LOCATION):
> +        os.remove("%s.gz" % DB_COPY_LOCATION)

Same; just remove and catch the error.

> +
> +
> +if __name__ == "__main__":
> +    # Only bother running script if this unit is the leader.
> +    logging.basicConfig(level="INFO")
> +    if not os.path.isfile("/run/autopkgtest-web-is-leader"):
> +        logging.info("unit is not leader, exiting...")
> +        sys.exit(0)
> +    # connect to db
> +    logging.info("Connecting to db")
> +    db_con = db_connect()
> +    # backup the db
> +    logging.info("Creating a backup of the db...")
> +    backup_db(db_con)
> +    # compress it
> +    logging.info("Compressing db")
> +    compress_db()
> +    # register cleanup if anything fails
> +    logging.info("Registering cleanup function")
> +    atexit.register(cleanup)
> +    # initialise swift conn
> +    logging.info("Setting up swift connection")
> +    swift_conn = init_swift_con()
> +    # create container if it doesn't exist
> +    create_container_if_it_doesnt_exist(swift_conn)
> +    # upload to swift container
> +    logging.info("Uploading db to swift!")
> +    swift_conn = upload_backup_to_db(swift_conn)
> +    # Remove old results
> +    logging.info("Pruning old database backups")
> +    swift_conn = delete_old_backups(swift_conn)
> +    # run cleanup
> +    cleanup()


-- 
https://code.launchpad.net/~andersson123/autopkgtest-cloud/+git/autopkgtest-cloud/+merge/460043
Your team Canonical's Ubuntu QA is requested to review the proposed merge of 
~andersson123/autopkgtest-cloud:sqlite-db-backup into autopkgtest-cloud:master.


-- 
Mailing list: https://launchpad.net/~canonical-ubuntu-qa
Post to     : canonical-ubuntu-qa@lists.launchpad.net
Unsubscribe : https://launchpad.net/~canonical-ubuntu-qa
More help   : https://help.launchpad.net/ListHelp

Reply via email to