summaryrefslogtreecommitdiffstats
path: root/nova/servicegroup/drivers/db.py
blob: aa29596db053be6290f60f8da39e042c621775b6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# Copyright 2012 IBM Corp.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from oslo.config import cfg

from nova import conductor
from nova import context
from nova.openstack.common import log as logging
from nova.openstack.common import timeutils
from nova.servicegroup import api
from nova import utils


CONF = cfg.CONF
CONF.import_opt('service_down_time', 'nova.service')

LOG = logging.getLogger(__name__)


class DbDriver(api.ServiceGroupDriver):

    def __init__(self, *args, **kwargs):
        self.db_allowed = kwargs.get('db_allowed', True)
        self.conductor_api = conductor.API(use_local=self.db_allowed)

    def join(self, member_id, group_id, service=None):
        """Join the given service with it's group."""

        msg = _('DB_Driver: join new ServiceGroup member %(member_id)s to '
                    'the %(group_id)s group, service = %(service)s')
        LOG.debug(msg, locals())
        if service is None:
            raise RuntimeError(_('service is a mandatory argument for DB based'
                                 ' ServiceGroup driver'))
        report_interval = service.report_interval
        if report_interval:
            pulse = utils.FixedIntervalLoopingCall(self._report_state, service)
            pulse.start(interval=report_interval,
                        initial_delay=report_interval)
            return pulse

    def is_up(self, service_ref):
        """Moved from nova.utils
        Check whether a service is up based on last heartbeat.
        """
        last_heartbeat = service_ref['updated_at'] or service_ref['created_at']
        if isinstance(last_heartbeat, basestring):
            # NOTE(russellb) If this service_ref came in over rpc via
            # conductor, then the timestamp will be a string and needs to be
            # converted back to a datetime.
            last_heartbeat = timeutils.parse_strtime(last_heartbeat)
        # Timestamps in DB are UTC.
        elapsed = utils.total_seconds(timeutils.utcnow() - last_heartbeat)
        LOG.debug('DB_Driver.is_up last_heartbeat = %(lhb)s elapsed = %(el)s',
                  {'lhb': str(last_heartbeat), 'el': str(elapsed)})
        return abs(elapsed) <= CONF.service_down_time

    def get_all(self, group_id):
        """
        Returns ALL members of the given group
        """
        LOG.debug(_('DB_Driver: get_all members of the %s group') % group_id)
        rs = []
        ctxt = context.get_admin_context()
        services = self.conductor_api.service_get_all_by_topic(ctxt, group_id)
        for service in services:
            if self.is_up(service):
                rs.append(service['host'])
        return rs

    def _report_state(self, service):
        """Update the state of this service in the datastore."""
        ctxt = context.get_admin_context()
        state_catalog = {}
        try:
            report_count = service.service_ref['report_count'] + 1
            state_catalog['report_count'] = report_count

            service.service_ref = self.conductor_api.service_update(ctxt,
                    service.service_ref, state_catalog)

            # TODO(termie): make this pattern be more elegant.
            if getattr(service, 'model_disconnected', False):
                service.model_disconnected = False
                LOG.error(_('Recovered model server connection!'))

        # TODO(vish): this should probably only catch connection errors
        except Exception:  # pylint: disable=W0702
            if not getattr(service, 'model_disconnected', False):
                service.model_disconnected = True
                LOG.exception(_('model server went away'))