diff options
| author | Justin Santa Barbara <justin@fathomdb.com> | 2011-03-24 09:21:51 +0000 |
|---|---|---|
| committer | Tarmac <> | 2011-03-24 09:21:51 +0000 |
| commit | 08fd7016db5b0e435b8d9728345739afcf3cb152 (patch) | |
| tree | c7d8ac401025037cbc841cbeab47d7b5800b9851 | |
| parent | 86b3cc94bc672fda7925a247c3b7c2f85be2c5b5 (diff) | |
| parent | 40a6ded37544dcfe44ba9d3ef247339122c93b43 (diff) | |
| download | nova-08fd7016db5b0e435b8d9728345739afcf3cb152.tar.gz nova-08fd7016db5b0e435b8d9728345739afcf3cb152.tar.xz nova-08fd7016db5b0e435b8d9728345739afcf3cb152.zip | |
Poll instance states periodically, so that we can detect when something changes 'behind the scenes'.
Beginnings of work on Bug #661214 and Bug #661260.
| -rw-r--r-- | nova/compute/manager.py | 62 | ||||
| -rw-r--r-- | nova/compute/power_state.py | 18 | ||||
| -rw-r--r-- | nova/tests/test_compute.py | 21 | ||||
| -rw-r--r-- | nova/utils.py | 9 | ||||
| -rw-r--r-- | nova/virt/connection.py | 4 | ||||
| -rw-r--r-- | nova/virt/driver.py | 234 | ||||
| -rw-r--r-- | nova/virt/fake.py | 39 | ||||
| -rw-r--r-- | nova/virt/hyperv.py | 19 | ||||
| -rw-r--r-- | nova/virt/libvirt_conn.py | 31 | ||||
| -rw-r--r-- | nova/virt/xenapi/vmops.py | 21 | ||||
| -rw-r--r-- | nova/virt/xenapi_conn.py | 7 |
11 files changed, 442 insertions, 23 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 09af03564..9591a6c19 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2,6 +2,7 @@ # Copyright 2010 United States Government as represented by the # Administrator of the National Aeronautics and Space Administration. +# Copyright 2011 Justin Santa Barbara # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may @@ -51,6 +52,7 @@ from nova import manager from nova import rpc from nova import utils from nova.compute import power_state +from nova.virt import driver FLAGS = flags.FLAGS flags.DEFINE_string('instances_path', '$state_path/instances', @@ -120,7 +122,9 @@ class ComputeManager(manager.Manager): compute_driver = FLAGS.compute_driver try: - self.driver = utils.import_object(compute_driver) + self.driver = utils.check_isinstance( + utils.import_object(compute_driver), + driver.ComputeDriver) except ImportError as e: LOG.error(_("Unable to load the virtualization driver: %s") % (e)) sys.exit(1) @@ -1016,3 +1020,59 @@ class ComputeManager(manager.Manager): for volume in instance_ref['volumes']: self.db.volume_update(ctxt, volume['id'], {'status': 'in-use'}) + + def periodic_tasks(self, context=None): + """Tasks to be run at a periodic interval.""" + error_list = super(ComputeManager, self).periodic_tasks(context) + if error_list is None: + error_list = [] + + try: + self._poll_instance_states(context) + except Exception as ex: + LOG.warning(_("Error during instance poll: %s"), + unicode(ex)) + error_list.append(ex) + return error_list + + def _poll_instance_states(self, context): + vm_instances = self.driver.list_instances_detail() + vm_instances = dict((vm.name, vm) for vm in vm_instances) + + # Keep a list of VMs not in the DB, cross them off as we find them + vms_not_found_in_db = list(vm_instances.keys()) + + db_instances = self.db.instance_get_all_by_host(context, self.host) + + for db_instance in db_instances: + name = db_instance['name'] + vm_instance = vm_instances.get(name) + if vm_instance is None: + LOG.info(_("Found instance '%(name)s' in DB but no VM. " + "Setting state to shutoff.") % locals()) + vm_state = power_state.SHUTOFF + else: + vm_state = vm_instance.state + vms_not_found_in_db.remove(name) + + db_state = db_instance['state'] + if vm_state != db_state: + LOG.info(_("DB/VM state mismatch. Changing state from " + "'%(db_state)s' to '%(vm_state)s'") % locals()) + self.db.instance_set_state(context, + db_instance['id'], + vm_state) + + if vm_state == power_state.SHUTOFF: + # TODO(soren): This is what the compute manager does when you + # terminate an instance. At some point I figure we'll have a + # "terminated" state and some sort of cleanup job that runs + # occasionally, cleaning them out. + self.db.instance_destroy(context, db_instance['id']) + + # Are there VMs not in the DB? + for vm_not_found_in_db in vms_not_found_in_db: + name = vm_not_found_in_db + # TODO(justinsb): What to do here? Adopt it? Shut it down? + LOG.warning(_("Found VM not in DB: '%(name)s'. Ignoring") + % locals()) diff --git a/nova/compute/power_state.py b/nova/compute/power_state.py index adfc2dff0..ef013b2ef 100644 --- a/nova/compute/power_state.py +++ b/nova/compute/power_state.py @@ -2,6 +2,7 @@ # Copyright 2010 United States Government as represented by the # Administrator of the National Aeronautics and Space Administration. +# Copyright 2011 Justin Santa Barbara # All Rights Reserved. # Copyright (c) 2010 Citrix Systems, Inc. # @@ -19,6 +20,7 @@ """The various power states that a VM can be in.""" +#NOTE(justinsb): These are the virDomainState values from libvirt NOSTATE = 0x00 RUNNING = 0x01 BLOCKED = 0x02 @@ -29,9 +31,10 @@ CRASHED = 0x06 SUSPENDED = 0x07 FAILED = 0x08 - -def name(code): - d = { +# TODO(justinsb): Power state really needs to be a proper class, +# so that we're not locked into the libvirt status codes and can put mapping +# logic here rather than spread throughout the code +_STATE_MAP = { NOSTATE: 'pending', RUNNING: 'running', BLOCKED: 'blocked', @@ -41,4 +44,11 @@ def name(code): CRASHED: 'crashed', SUSPENDED: 'suspended', FAILED: 'failed to spawn'} - return d[code] + + +def name(code): + return _STATE_MAP[code] + + +def valid_states(): + return _STATE_MAP.keys() diff --git a/nova/tests/test_compute.py b/nova/tests/test_compute.py index 44d04a12f..14ff8842b 100644 --- a/nova/tests/test_compute.py +++ b/nova/tests/test_compute.py @@ -626,3 +626,24 @@ class ComputeTestCase(test.TestCase): db.instance_destroy(c, instance_id) db.volume_destroy(c, v_ref['id']) db.floating_ip_destroy(c, flo_addr) + + def test_run_kill_vm(self): + """Detect when a vm is terminated behind the scenes""" + instance_id = self._create_instance() + + self.compute.run_instance(self.context, instance_id) + + instances = db.instance_get_all(context.get_admin_context()) + LOG.info(_("Running instances: %s"), instances) + self.assertEqual(len(instances), 1) + + instance_name = instances[0].name + self.compute.driver.test_remove_vm(instance_name) + + # Force the compute manager to do its periodic poll + error_list = self.compute.periodic_tasks(context.get_admin_context()) + self.assertFalse(error_list) + + instances = db.instance_get_all(context.get_admin_context()) + LOG.info(_("After force-killing instances: %s"), instances) + self.assertEqual(len(instances), 0) diff --git a/nova/utils.py b/nova/utils.py index 03a6e8095..e4d8a70eb 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -661,3 +661,12 @@ def get_from_path(items, path): return results else: return get_from_path(results, remainder) + + +def check_isinstance(obj, cls): + """Checks that obj is of type cls, and lets PyLint infer types""" + if isinstance(obj, cls): + return obj + raise Exception(_("Expected object of type: %s") % (str(cls))) + # TODO(justinsb): Can we make this better?? + return cls() # Ugly PyLint hack diff --git a/nova/virt/connection.py b/nova/virt/connection.py index 13181b730..af7001715 100644 --- a/nova/virt/connection.py +++ b/nova/virt/connection.py @@ -23,6 +23,8 @@ import sys from nova import flags from nova import log as logging +from nova import utils +from nova.virt import driver from nova.virt import fake from nova.virt import libvirt_conn from nova.virt import xenapi_conn @@ -72,4 +74,4 @@ def get_connection(read_only=False): if conn is None: LOG.error(_('Failed to open connection to the hypervisor')) sys.exit(1) - return conn + return utils.check_isinstance(conn, driver.ComputeDriver) diff --git a/nova/virt/driver.py b/nova/virt/driver.py new file mode 100644 index 000000000..0e3a4aa3b --- /dev/null +++ b/nova/virt/driver.py @@ -0,0 +1,234 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2011 Justin Santa Barbara +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +Driver base-classes: + + (Beginning of) the contract that compute drivers must follow, and shared + types that support that contract +""" + +from nova.compute import power_state + + +class InstanceInfo(object): + def __init__(self, name, state): + self.name = name + assert state in power_state.valid_states(), "Bad state: %s" % state + self.state = state + + +class ComputeDriver(object): + """Base class for compute drivers. + + Lots of documentation is currently on fake.py. + """ + + def init_host(self, host): + """Adopt existing VM's running here""" + raise NotImplementedError() + + def get_info(self, instance_name): + """Get the current status of an instance, by name (not ID!) + + Returns a dict containing: + :state: the running state, one of the power_state codes + :max_mem: (int) the maximum memory in KBytes allowed + :mem: (int) the memory in KBytes used by the domain + :num_cpu: (int) the number of virtual CPUs for the domain + :cpu_time: (int) the CPU time used in nanoseconds + """ + raise NotImplementedError() + + def list_instances(self): + raise NotImplementedError() + + def list_instances_detail(self): + """Return a list of InstanceInfo for all registered VMs""" + raise NotImplementedError() + + def spawn(self, instance): + """Launch a VM for the specified instance""" + raise NotImplementedError() + + def destroy(self, instance, cleanup=True): + """Shutdown specified VM""" + raise NotImplementedError() + + def reboot(self, instance): + """Reboot specified VM""" + raise NotImplementedError() + + def snapshot_instance(self, context, instance_id, image_id): + raise NotImplementedError() + + def get_console_pool_info(self, console_type): + """??? + + Returns a dict containing: + :address: ??? + :username: ??? + :password: ??? + """ + raise NotImplementedError() + + def get_console_output(self, instance): + raise NotImplementedError() + + def get_ajax_console(self, instance): + raise NotImplementedError() + + def get_diagnostics(self, instance): + """Return data about VM diagnostics""" + raise NotImplementedError() + + def get_host_ip_addr(self): + raise NotImplementedError() + + def attach_volume(self, context, instance_id, volume_id, mountpoint): + raise NotImplementedError() + + def detach_volume(self, context, instance_id, volume_id): + raise NotImplementedError() + + def compare_cpu(self, context, cpu_info): + raise NotImplementedError() + + def migrate_disk_and_power_off(self, instance, dest): + """Transfers the VHD of a running instance to another host, then shuts + off the instance copies over the COW disk""" + raise NotImplementedError() + + def snapshot(self, instance, image_id): + """ Create snapshot from a running VM instance """ + raise NotImplementedError() + + def finish_resize(self, instance, disk_info): + """Completes a resize, turning on the migrated instance""" + raise NotImplementedError() + + def revert_resize(self, instance): + """Reverts a resize, powering back on the instance""" + raise NotImplementedError() + + def pause(self, instance, callback): + """Pause VM instance""" + raise NotImplementedError() + + def unpause(self, instance, callback): + """Unpause paused VM instance""" + raise NotImplementedError() + + def suspend(self, instance, callback): + """suspend the specified instance""" + raise NotImplementedError() + + def resume(self, instance, callback): + """resume the specified instance""" + raise NotImplementedError() + + def rescue(self, instance, callback): + """Rescue the specified instance""" + raise NotImplementedError() + + def unrescue(self, instance, callback): + """Unrescue the specified instance""" + raise NotImplementedError() + + def update_available_resource(self, ctxt, host): + """Updates compute manager resource info on ComputeNode table. + + This method is called when nova-compute launches, and + whenever admin executes "nova-manage service update_resource". + + :param ctxt: security context + :param host: hostname that compute manager is currently running + + """ + raise NotImplementedError() + + def live_migration(self, ctxt, instance_ref, dest, + post_method, recover_method): + """Spawning live_migration operation for distributing high-load. + + :params ctxt: security context + :params instance_ref: + nova.db.sqlalchemy.models.Instance object + instance object that is migrated. + :params dest: destination host + :params post_method: + post operation method. + expected nova.compute.manager.post_live_migration. + :params recover_method: + recovery method when any exception occurs. + expected nova.compute.manager.recover_live_migration. + + """ + raise NotImplementedError() + + def refresh_security_group_rules(self, security_group_id): + raise NotImplementedError() + + def refresh_security_group_members(self, security_group_id): + raise NotImplementedError() + + def reset_network(self, instance): + """reset networking for specified instance""" + raise NotImplementedError() + + def ensure_filtering_rules_for_instance(self, instance_ref): + """Setting up filtering rules and waiting for its completion. + + To migrate an instance, filtering rules to hypervisors + and firewalls are inevitable on destination host. + ( Waiting only for filtering rules to hypervisor, + since filtering rules to firewall rules can be set faster). + + Concretely, the below method must be called. + - setup_basic_filtering (for nova-basic, etc.) + - prepare_instance_filter(for nova-instance-instance-xxx, etc.) + + to_xml may have to be called since it defines PROJNET, PROJMASK. + but libvirt migrates those value through migrateToURI(), + so , no need to be called. + + Don't use thread for this method since migration should + not be started when setting-up filtering rules operations + are not completed. + + :params instance_ref: nova.db.sqlalchemy.models.Instance object + + """ + raise NotImplementedError() + + def unfilter_instance(self, instance): + """Stop filtering instance""" + raise NotImplementedError() + + def set_admin_password(self, context, instance_id, new_pass=None): + """Set the root/admin password for an instance on this server.""" + raise NotImplementedError() + + def inject_file(self, instance, b64_path, b64_contents): + """Create a file on the VM instance. The file path and contents + should be base64-encoded. + """ + raise NotImplementedError() + + def inject_network_info(self, instance): + """inject network info for specified instance""" + raise NotImplementedError() diff --git a/nova/virt/fake.py b/nova/virt/fake.py index 3a06284a1..5b0fe1877 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -26,7 +26,9 @@ semantics of real hypervisor connections. """ from nova import exception +from nova import utils from nova.compute import power_state +from nova.virt import driver def get_connection(_): @@ -34,7 +36,14 @@ def get_connection(_): return FakeConnection.instance() -class FakeConnection(object): +class FakeInstance(object): + + def __init__(self, name, state): + self.name = name + self.state = state + + +class FakeConnection(driver.ComputeDriver): """ The interface to this class talks in terms of 'instances' (Amazon EC2 and internal Nova terminology), by which we mean 'running virtual machine' @@ -90,6 +99,17 @@ class FakeConnection(object): """ return self.instances.keys() + def _map_to_instance_info(self, instance): + instance = utils.check_isinstance(instance, FakeInstance) + info = driver.InstanceInfo(instance.name, instance.state) + return info + + def list_instances_detail(self): + info_list = [] + for instance in self.instances.values(): + info_list.append(self._map_to_instance_info(instance)) + return info_list + def spawn(self, instance): """ Create a new instance/VM/domain on the virtualization platform. @@ -109,9 +129,10 @@ class FakeConnection(object): that it was before this call began. """ - fake_instance = FakeInstance() - self.instances[instance.name] = fake_instance - fake_instance._state = power_state.RUNNING + name = instance.name + state = power_state.RUNNING + fake_instance = FakeInstance(name, state) + self.instances[name] = fake_instance def snapshot(self, instance, name): """ @@ -270,7 +291,7 @@ class FakeConnection(object): raise exception.NotFound(_("Instance %s Not Found") % instance_name) i = self.instances[instance_name] - return {'state': i._state, + return {'state': i.state, 'max_mem': 0, 'mem': 0, 'num_cpu': 2, @@ -428,8 +449,6 @@ class FakeConnection(object): """This method is supported only by libvirt.""" raise NotImplementedError('This method is supported only by libvirt.') - -class FakeInstance(object): - - def __init__(self): - self._state = power_state.NOSTATE + def test_remove_vm(self, instance_name): + """ Removes the named VM, as if it crashed. For testing""" + self.instances.pop(instance_name) diff --git a/nova/virt/hyperv.py b/nova/virt/hyperv.py index 75fed6d4f..a1ed5ebbf 100644 --- a/nova/virt/hyperv.py +++ b/nova/virt/hyperv.py @@ -68,6 +68,7 @@ from nova import flags from nova import log as logging from nova.auth import manager from nova.compute import power_state +from nova.virt import driver from nova.virt import images wmi = None @@ -108,8 +109,9 @@ def get_connection(_): return HyperVConnection() -class HyperVConnection(object): +class HyperVConnection(driver.ComputeDriver): def __init__(self): + super(HyperVConnection, self).__init__() self._conn = wmi.WMI(moniker='//./root/virtualization') self._cim_conn = wmi.WMI(moniker='//./root/cimv2') @@ -124,6 +126,19 @@ class HyperVConnection(object): for v in self._conn.Msvm_ComputerSystem(['ElementName'])] return vms + def list_instances_detail(self): + # TODO(justinsb): This is a terrible implementation (1+N) + instance_infos = [] + for instance_name in self.list_instances(): + info = self.get_info(instance_name) + + state = info['state'] + + instance_info = driver.InstanceInfo(instance_name, state) + instance_infos.append(instance_info) + + return instance_infos + def spawn(self, instance): """ Create a new VM and start it.""" vm = self._lookup(instance.name) @@ -345,7 +360,7 @@ class HyperVConnection(object): newinst = cl.new() #Copy the properties from the original. for prop in wmi_obj._properties: - newinst.Properties_.Item(prop).Value =\ + newinst.Properties_.Item(prop).Value = \ wmi_obj.Properties_.Item(prop).Value return newinst diff --git a/nova/virt/libvirt_conn.py b/nova/virt/libvirt_conn.py index 67094320e..96f42a8f8 100644 --- a/nova/virt/libvirt_conn.py +++ b/nova/virt/libvirt_conn.py @@ -62,6 +62,7 @@ from nova.auth import manager from nova.compute import instance_types from nova.compute import power_state from nova.virt import disk +from nova.virt import driver from nova.virt import images libvirt = None @@ -133,8 +134,8 @@ def get_connection(read_only): def _late_load_cheetah(): global Template if Template is None: - t = __import__('Cheetah.Template', globals(), locals(), ['Template'], - -1) + t = __import__('Cheetah.Template', globals(), locals(), + ['Template'], -1) Template = t.Template @@ -153,9 +154,10 @@ def _get_ip_version(cidr): return int(net.version()) -class LibvirtConnection(object): +class LibvirtConnection(driver.ComputeDriver): def __init__(self, read_only): + super(LibvirtConnection, self).__init__() self.libvirt_uri = self.get_uri() self.libvirt_xml = open(FLAGS.libvirt_xml_template).read() @@ -235,6 +237,29 @@ class LibvirtConnection(object): return [self._conn.lookupByID(x).name() for x in self._conn.listDomainsID()] + def _map_to_instance_info(self, domain): + """Gets info from a virsh domain object into an InstanceInfo""" + + # domain.info() returns a list of: + # state: one of the state values (virDomainState) + # maxMemory: the maximum memory used by the domain + # memory: the current amount of memory used by the domain + # nbVirtCPU: the number of virtual CPU + # puTime: the time used by the domain in nanoseconds + + (state, _max_mem, _mem, _num_cpu, _cpu_time) = domain.info() + name = domain.name() + + return driver.InstanceInfo(name, state) + + def list_instances_detail(self): + infos = [] + for domain_id in self._conn.listDomainsID(): + domain = self._conn.lookupByID(domain_id) + info = self._map_to_instance_info(domain) + infos.append(info) + return infos + def destroy(self, instance, cleanup=True): try: virt_dom = self._conn.lookupByName(instance['name']) diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index cfc3a1c22..af39a3def 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -36,6 +36,7 @@ from nova import utils from nova.auth.manager import AuthManager from nova.compute import power_state +from nova.virt import driver from nova.virt.xenapi.network_utils import NetworkHelper from nova.virt.xenapi.vm_utils import VMHelper from nova.virt.xenapi.vm_utils import ImageType @@ -57,6 +58,8 @@ class VMOps(object): def list_instances(self): """List VM instances""" + # TODO(justinsb): Should we just always use the details method? + # Seems to be the same number of API calls.. vm_refs = [] for vm_ref in self._session.get_xenapi().VM.get_all(): vm_rec = self._session.get_xenapi().VM.get_record(vm_ref) @@ -64,6 +67,22 @@ class VMOps(object): vm_refs.append(vm_rec["name_label"]) return vm_refs + def list_instances_detail(self): + """List VM instances, returning InstanceInfo objects""" + instance_infos = [] + for vm_ref in self._session.get_xenapi().VM.get_all(): + vm_rec = self._session.get_xenapi().VM.get_record(vm_ref) + if not vm_rec["is_a_template"] and not vm_rec["is_control_domain"]: + name = vm_rec["name_label"] + + # TODO(justinsb): This a roundabout way to map the state + openstack_format = VMHelper.compile_info(vm_rec) + state = openstack_format['state'] + + instance_info = driver.InstanceInfo(name, state) + instance_infos.append(instance_info) + return instance_infos + def revert_resize(self, instance): vm_ref = VMHelper.lookup(self._session, instance.name) self._start(instance, vm_ref) @@ -989,7 +1008,7 @@ class VMOps(object): """ vm_ref = self._get_vm_opaque_ref(instance_or_vm) data = self._session.call_xenapi_request('VM.get_xenstore_data', - (vm_ref, )) + (vm_ref,)) ret = {} if keys is None: keys = data.keys() diff --git a/nova/virt/xenapi_conn.py b/nova/virt/xenapi_conn.py index 2884687fb..f20fb29d8 100644 --- a/nova/virt/xenapi_conn.py +++ b/nova/virt/xenapi_conn.py @@ -69,6 +69,7 @@ from nova import db from nova import utils from nova import flags from nova import log as logging +from nova.virt import driver from nova.virt.xenapi.vmops import VMOps from nova.virt.xenapi.volumeops import VolumeOps @@ -141,10 +142,11 @@ def get_connection(_): return XenAPIConnection(url, username, password) -class XenAPIConnection(object): +class XenAPIConnection(driver.ComputeDriver): """A connection to XenServer or Xen Cloud Platform""" def __init__(self, url, user, pw): + super(XenAPIConnection, self).__init__() session = XenAPISession(url, user, pw) self._vmops = VMOps(session) self._volumeops = VolumeOps(session) @@ -160,6 +162,9 @@ class XenAPIConnection(object): """List VM instances""" return self._vmops.list_instances() + def list_instances_detail(self): + return self._vmops.list_instances_detail() + def spawn(self, instance): """Create VM instance""" self._vmops.spawn(instance) |
