summaryrefslogtreecommitdiffstats
path: root/roles/copr/backend/files/cleanup_vm_nova.py
blob: e1562b82e5e226d8b1b65af351d41534ca3a4fb4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/python
# coding: utf-8

import os
import sys
import time
import logging

from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from dateutil.parser import parse as dt_parse

import psutil
import yaml
from novaclient.client import Client

sys.path.append("/usr/share/copr/")

from backend.helpers import BackendConfigReader
from backend.helpers import utc_now

try:
    from backend.vm_manage.manager import VmManager
except ImportError:
    VmManager = None

logging.getLogger("requests").setLevel(logging.ERROR)


nova_cloud_vars_path = os.environ.get("NOVA_CLOUD_VARS", "/home/copr/provision/nova_cloud_vars.yml")


def read_config():
    with open(nova_cloud_vars_path) as handle:
        conf = yaml.load(handle.read())
    return conf


def get_client(conf):
    username = conf["OS_USERNAME"]
    password = conf["OS_PASSWORD"]
    tenant_name = conf["OS_TENANT_NAME"]
    auth_url = conf["OS_AUTH_URL"]
    return Client('2', username, password, tenant_name, auth_url)


def get_managed_vms_names():
    result = []
    if VmManager:
        opts = BackendConfigReader().read()
        vmm = VmManager(opts, log)
        result.extend(vmd.vm_name.lower() for vmd in vmm.get_all_vm())
    return result


class Cleaner(object):
    def __init__(self, conf):
        self.conf = conf
        self.nt = None

    @staticmethod
    def terminate(srv):
        try:
            srv.delete()
            log.info("delete invoked for: {}".format(srv))
        except Exception as err:
            log.exception("failed to request VM termination: {}".format(err))

    @staticmethod
    def old_enough(srv):
        dt_created = dt_parse(srv.created)
        delta = (utc_now() - dt_created).total_seconds()
        # log.info("Server {} created {} now {}; delta: {}".format(srv, dt_created, utc_now(), delta))
        return delta > 60 * 5  # 5 minutes

    def check_one(self, srv_id, vms_names):
        srv = self.nt.servers.get(srv_id)
        log.info("checking vm: {}".format(srv))
        srv.get()
        if srv.status.lower().strip() == "error":
            log.info("server {} got into the error state, terminating".format(srv))
            self.terminate(srv)
        elif self.old_enough(srv) and srv.human_id.lower() not in vms_names:
            log.info("server {} not placed in our db, terminating".format(srv))
            self.terminate(srv)

    def main(self):
        """
        Terminate erred VM's and VM's with uptime > 10 minutes and which doesn't have associated process
        """
        start = time.time()
        log.info("Cleanup start")

        self.nt = get_client(self.conf)
        srv_list = self.nt.servers.list(detailed=False)
        vms_names = get_managed_vms_names()
        with ThreadPoolExecutor(max_workers=20) as executor:
            future_check = {executor.submit(self.check_one, srv.id, vms_names): srv.id for srv in srv_list}
            for future in as_completed(future_check):
                try:
                    future.result()
                except Exception as exc:
                    log.exception(exc)

        log.info("cleanup consumed: {} seconds".format(time.time() - start))

if __name__ == "__main__":
    logging.basicConfig(
        filename="/var/log/copr-backend/cleanup_vms.log",
        # filename="/tmp/cleanup_vms.log",
        # stream=sys.stdout,
        format='[%(asctime)s][%(thread)s][%(levelname)6s]: %(message)s',
        level=logging.INFO)

    log = logging.getLogger(__name__)

    cleaner = Cleaner(read_config())
    cleaner.main()