summaryrefslogtreecommitdiffstats
path: root/nova/compute
diff options
context:
space:
mode:
authorTrey Morris <trey.morris@rackspace.com>2011-03-28 12:13:20 -0500
committerTrey Morris <trey.morris@rackspace.com>2011-03-28 12:13:20 -0500
commit7eedf3f69ca1bbd1f44252fa01fb4f2676735eb2 (patch)
tree33c3983537da1d894caa6b390b5c488511df83c4 /nova/compute
parent57890776d0d7e9172b1fa056076ce28ae4b34b7b (diff)
parented12a2cd2beef77d1c7e9d16771e766aa068530d (diff)
merge with trunk
Diffstat (limited to 'nova/compute')
-rw-r--r--nova/compute/api.py205
-rw-r--r--nova/compute/instance_types.py123
-rw-r--r--nova/compute/manager.py530
-rw-r--r--nova/compute/power_state.py18
4 files changed, 801 insertions, 75 deletions
diff --git a/nova/compute/api.py b/nova/compute/api.py
index 1737565aa..e3be86679 100644
--- a/nova/compute/api.py
+++ b/nova/compute/api.py
@@ -34,6 +34,7 @@ from nova import rpc
from nova import utils
from nova import volume
from nova.compute import instance_types
+from nova.scheduler import api as scheduler_api
from nova.db import base
FLAGS = flags.FLAGS
@@ -80,17 +81,36 @@ class API(base.Base):
topic,
{"method": "get_network_topic", "args": {'fake': 1}})
+ def _check_injected_file_quota(self, context, injected_files):
+ """
+ Enforce quota limits on injected files
+
+ Raises a QuotaError if any limit is exceeded
+ """
+ if injected_files is None:
+ return
+ limit = quota.allowed_injected_files(context)
+ if len(injected_files) > limit:
+ raise quota.QuotaError(code="OnsetFileLimitExceeded")
+ path_limit = quota.allowed_injected_file_path_bytes(context)
+ content_limit = quota.allowed_injected_file_content_bytes(context)
+ for path, content in injected_files:
+ if len(path) > path_limit:
+ raise quota.QuotaError(code="OnsetFilePathLimitExceeded")
+ if len(content) > content_limit:
+ raise quota.QuotaError(code="OnsetFileContentLimitExceeded")
+
def create(self, context, instance_type,
image_id, kernel_id=None, ramdisk_id=None,
min_count=1, max_count=1,
display_name='', display_description='',
key_name=None, key_data=None, security_group='default',
availability_zone=None, user_data=None, metadata=[],
- onset_files=None):
+ injected_files=None):
"""Create the number of instances requested if quota and
- other arguments check out ok.
- """
- type_data = instance_types.INSTANCE_TYPES[instance_type]
+ other arguments check out ok."""
+
+ type_data = instance_types.get_instance_type(instance_type)
num_instances = quota.allowed_instances(context, max_count, type_data)
if num_instances < min_count:
pid = context.project_id
@@ -124,11 +144,19 @@ class API(base.Base):
LOG.warn(msg)
raise quota.QuotaError(msg, "MetadataLimitExceeded")
+ self._check_injected_file_quota(context, injected_files)
+
image = self.image_service.show(context, image_id)
+
+ os_type = None
+ if 'properties' in image and 'os_type' in image['properties']:
+ os_type = image['properties']['os_type']
+
if kernel_id is None:
- kernel_id = image.get('kernel_id', None)
+ kernel_id = image['properties'].get('kernel_id', None)
if ramdisk_id is None:
- ramdisk_id = image.get('ramdisk_id', None)
+ ramdisk_id = image['properties'].get('ramdisk_id', None)
+ # FIXME(sirp): is there a way we can remove null_kernel?
# No kernel and ramdisk for raw images
if kernel_id == str(FLAGS.null_kernel):
kernel_id = None
@@ -164,6 +192,7 @@ class API(base.Base):
'image_id': image_id,
'kernel_id': kernel_id or '',
'ramdisk_id': ramdisk_id or '',
+ 'state': 0,
'state_description': 'scheduling',
'user_id': context.user_id,
'project_id': context.project_id,
@@ -179,7 +208,8 @@ class API(base.Base):
'key_data': key_data,
'locked': False,
'metadata': metadata,
- 'availability_zone': availability_zone}
+ 'availability_zone': availability_zone,
+ 'os_type': os_type}
elevated = context.elevated()
instances = []
LOG.debug(_("Going to run %s instances..."), num_instances)
@@ -216,13 +246,23 @@ class API(base.Base):
"args": {"topic": FLAGS.compute_topic,
"instance_id": instance_id,
"availability_zone": availability_zone,
- "onset_files": onset_files}})
+ "injected_files": injected_files}})
for group_id in security_groups:
self.trigger_security_group_members_refresh(elevated, group_id)
return [dict(x.iteritems()) for x in instances]
+ def has_finished_migration(self, context, instance_id):
+ """Retrieves whether or not a finished migration exists for
+ an instance"""
+ try:
+ db.migration_get_by_instance_and_status(context, instance_id,
+ 'finished')
+ return True
+ except exception.NotFound:
+ return False
+
def ensure_default_security_group(self, context):
""" Create security group for the security context if it
does not already exist
@@ -312,17 +352,18 @@ class API(base.Base):
rv = self.db.instance_update(context, instance_id, kwargs)
return dict(rv.iteritems())
+ @scheduler_api.reroute_compute("delete")
def delete(self, context, instance_id):
LOG.debug(_("Going to try to terminate %s"), instance_id)
try:
instance = self.get(context, instance_id)
except exception.NotFound:
- LOG.warning(_("Instance %d was not found during terminate"),
+ LOG.warning(_("Instance %s was not found during terminate"),
instance_id)
raise
if (instance['state_description'] == 'terminating'):
- LOG.warning(_("Instance %d is already being terminated"),
+ LOG.warning(_("Instance %s is already being terminated"),
instance_id)
return
@@ -344,24 +385,37 @@ class API(base.Base):
rv = self.db.instance_get(context, instance_id)
return dict(rv.iteritems())
+ @scheduler_api.reroute_compute("get")
+ def routing_get(self, context, instance_id):
+ """Use this method instead of get() if this is the only
+ operation you intend to to. It will route to novaclient.get
+ if the instance is not found."""
+ return self.get(context, instance_id)
+
def get_all(self, context, project_id=None, reservation_id=None,
fixed_ip=None):
"""Get all instances, possibly filtered by one of the
given parameters. If there is no filter and the context is
- an admin, it will retreive all instances in the system."""
+ an admin, it will retreive all instances in the system.
+ """
if reservation_id is not None:
- return self.db.instance_get_all_by_reservation(context,
- reservation_id)
+ return self.db.instance_get_all_by_reservation(
+ context, reservation_id)
+
if fixed_ip is not None:
return self.db.fixed_ip_get_instance(context, fixed_ip)
+
if project_id or not context.is_admin:
if not context.project:
- return self.db.instance_get_all_by_user(context,
- context.user_id)
+ return self.db.instance_get_all_by_user(
+ context, context.user_id)
+
if project_id is None:
project_id = context.project_id
- return self.db.instance_get_all_by_project(context,
- project_id)
+
+ return self.db.instance_get_all_by_project(
+ context, project_id)
+
return self.db.instance_get_all(context)
def _cast_compute_message(self, method, context, instance_id, host=None,
@@ -402,30 +456,105 @@ class API(base.Base):
kwargs = {'method': method, 'args': params}
return rpc.call(context, queue, kwargs)
+ def _cast_scheduler_message(self, context, args):
+ """Generic handler for RPC calls to the scheduler"""
+ rpc.cast(context, FLAGS.scheduler_topic, args)
+
def snapshot(self, context, instance_id, name):
"""Snapshot the given instance.
:retval: A dict containing image metadata
"""
- data = {'name': name, 'is_public': False}
- image_meta = self.image_service.create(context, data)
- params = {'image_id': image_meta['id']}
+ properties = {'instance_id': str(instance_id),
+ 'user_id': str(context.user_id)}
+ sent_meta = {'name': name, 'is_public': False,
+ 'properties': properties}
+ recv_meta = self.image_service.create(context, sent_meta)
+ params = {'image_id': recv_meta['id']}
self._cast_compute_message('snapshot_instance', context, instance_id,
params=params)
- return image_meta
+ return recv_meta
def reboot(self, context, instance_id):
"""Reboot the given instance."""
self._cast_compute_message('reboot_instance', context, instance_id)
+ def revert_resize(self, context, instance_id):
+ """Reverts a resize, deleting the 'new' instance in the process"""
+ context = context.elevated()
+ migration_ref = self.db.migration_get_by_instance_and_status(context,
+ instance_id, 'finished')
+ if not migration_ref:
+ raise exception.NotFound(_("No finished migrations found for "
+ "instance"))
+
+ params = {'migration_id': migration_ref['id']}
+ self._cast_compute_message('revert_resize', context, instance_id,
+ migration_ref['dest_compute'], params=params)
+ self.db.migration_update(context, migration_ref['id'],
+ {'status': 'reverted'})
+
+ def confirm_resize(self, context, instance_id):
+ """Confirms a migration/resize, deleting the 'old' instance in the
+ process."""
+ context = context.elevated()
+ migration_ref = self.db.migration_get_by_instance_and_status(context,
+ instance_id, 'finished')
+ if not migration_ref:
+ raise exception.NotFound(_("No finished migrations found for "
+ "instance"))
+ instance_ref = self.db.instance_get(context, instance_id)
+ params = {'migration_id': migration_ref['id']}
+ self._cast_compute_message('confirm_resize', context, instance_id,
+ migration_ref['source_compute'], params=params)
+
+ self.db.migration_update(context, migration_ref['id'],
+ {'status': 'confirmed'})
+ self.db.instance_update(context, instance_id,
+ {'host': migration_ref['dest_compute'], })
+
+ def resize(self, context, instance_id, flavor_id):
+ """Resize a running instance."""
+ instance = self.db.instance_get(context, instance_id)
+ current_instance_type = self.db.instance_type_get_by_name(
+ context, instance['instance_type'])
+
+ new_instance_type = self.db.instance_type_get_by_flavor_id(
+ context, flavor_id)
+ current_instance_type_name = current_instance_type['name']
+ new_instance_type_name = new_instance_type['name']
+ LOG.debug(_("Old instance type %(current_instance_type_name)s, "
+ " new instance type %(new_instance_type_name)s") % locals())
+ if not new_instance_type:
+ raise exception.ApiError(_("Requested flavor %(flavor_id)d "
+ "does not exist") % locals())
+
+ current_memory_mb = current_instance_type['memory_mb']
+ new_memory_mb = new_instance_type['memory_mb']
+ if current_memory_mb > new_memory_mb:
+ raise exception.ApiError(_("Invalid flavor: cannot downsize"
+ "instances"))
+ if current_memory_mb == new_memory_mb:
+ raise exception.ApiError(_("Invalid flavor: cannot use"
+ "the same flavor. "))
+
+ self._cast_scheduler_message(context,
+ {"method": "prep_resize",
+ "args": {"topic": FLAGS.compute_topic,
+ "instance_id": instance_id,
+ "flavor_id": flavor_id}})
+
+ @scheduler_api.reroute_compute("pause")
def pause(self, context, instance_id):
"""Pause the given instance."""
self._cast_compute_message('pause_instance', context, instance_id)
+ @scheduler_api.reroute_compute("unpause")
def unpause(self, context, instance_id):
"""Unpause the given instance."""
self._cast_compute_message('unpause_instance', context, instance_id)
+ @scheduler_api.reroute_compute("diagnostics")
def get_diagnostics(self, context, instance_id):
"""Retrieve diagnostics for the given instance."""
return self._call_compute_message(
@@ -437,25 +566,30 @@ class API(base.Base):
"""Retrieve actions for the given instance."""
return self.db.instance_get_actions(context, instance_id)
+ @scheduler_api.reroute_compute("suspend")
def suspend(self, context, instance_id):
"""suspend the instance with instance_id"""
self._cast_compute_message('suspend_instance', context, instance_id)
+ @scheduler_api.reroute_compute("resume")
def resume(self, context, instance_id):
"""resume the instance with instance_id"""
self._cast_compute_message('resume_instance', context, instance_id)
+ @scheduler_api.reroute_compute("rescue")
def rescue(self, context, instance_id):
"""Rescue the given instance."""
self._cast_compute_message('rescue_instance', context, instance_id)
+ @scheduler_api.reroute_compute("unrescue")
def unrescue(self, context, instance_id):
"""Unrescue the given instance."""
self._cast_compute_message('unrescue_instance', context, instance_id)
- def set_admin_password(self, context, instance_id):
+ def set_admin_password(self, context, instance_id, password=None):
"""Set the root/admin password for the given instance."""
- self._cast_compute_message('set_admin_password', context, instance_id)
+ self._cast_compute_message('set_admin_password', context, instance_id,
+ password)
def inject_file(self, context, instance_id):
"""Write a file to the given instance."""
@@ -463,7 +597,6 @@ class API(base.Base):
def get_ajax_console(self, context, instance_id):
"""Get a url to an AJAX Console"""
- instance = self.get(context, instance_id)
output = self._call_compute_message('get_ajax_console',
context,
instance_id)
@@ -511,7 +644,7 @@ class API(base.Base):
if not re.match("^/dev/[a-z]d[a-z]+$", device):
raise exception.ApiError(_("Invalid device specified: %s. "
"Example device: /dev/vdb") % device)
- self.volume_api.check_attach(context, volume_id)
+ self.volume_api.check_attach(context, volume_id=volume_id)
instance = self.get(context, instance_id)
host = instance['host']
rpc.cast(context,
@@ -525,7 +658,7 @@ class API(base.Base):
instance = self.db.volume_get_instance(context.elevated(), volume_id)
if not instance:
raise exception.ApiError(_("Volume isn't attached to anything!"))
- self.volume_api.check_detach(context, volume_id)
+ self.volume_api.check_detach(context, volume_id=volume_id)
host = instance['host']
rpc.cast(context,
self.db.queue_get_for(context, FLAGS.compute_topic, host),
@@ -536,5 +669,21 @@ class API(base.Base):
def associate_floating_ip(self, context, instance_id, address):
instance = self.get(context, instance_id)
- self.network_api.associate_floating_ip(context, address,
- instance['fixed_ip'])
+ self.network_api.associate_floating_ip(context,
+ floating_ip=address,
+ fixed_ip=instance['fixed_ip'])
+
+ def get_instance_metadata(self, context, instance_id):
+ """Get all metadata associated with an instance."""
+ rv = self.db.instance_metadata_get(context, instance_id)
+ return dict(rv.iteritems())
+
+ def delete_instance_metadata(self, context, instance_id, key):
+ """Delete the given metadata item"""
+ self.db.instance_metadata_delete(context, instance_id, key)
+
+ def update_or_create_instance_metadata(self, context, instance_id,
+ metadata):
+ """Updates or creates instance metadata"""
+ self.db.instance_metadata_update_or_create(context, instance_id,
+ metadata)
diff --git a/nova/compute/instance_types.py b/nova/compute/instance_types.py
index 309313fd0..fa02a5dfa 100644
--- a/nova/compute/instance_types.py
+++ b/nova/compute/instance_types.py
@@ -4,6 +4,7 @@
# Administrator of the National Aeronautics and Space Administration.
# All Rights Reserved.
# Copyright (c) 2010 Citrix Systems, Inc.
+# Copyright 2011 Ken Pepple
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
@@ -21,30 +22,120 @@
The built-in instance properties.
"""
-from nova import flags
+from nova import context
+from nova import db
from nova import exception
+from nova import flags
+from nova import log as logging
FLAGS = flags.FLAGS
-INSTANCE_TYPES = {
- 'm1.tiny': dict(memory_mb=512, vcpus=1, local_gb=0, flavorid=1),
- 'm1.small': dict(memory_mb=2048, vcpus=1, local_gb=20, flavorid=2),
- 'm1.medium': dict(memory_mb=4096, vcpus=2, local_gb=40, flavorid=3),
- 'm1.large': dict(memory_mb=8192, vcpus=4, local_gb=80, flavorid=4),
- 'm1.xlarge': dict(memory_mb=16384, vcpus=8, local_gb=160, flavorid=5)}
+LOG = logging.getLogger('nova.instance_types')
+
+
+def create(name, memory, vcpus, local_gb, flavorid, swap=0,
+ rxtx_quota=0, rxtx_cap=0):
+ """Creates instance types / flavors
+ arguments: name memory vcpus local_gb flavorid swap rxtx_quota rxtx_cap
+ """
+ for option in [memory, vcpus, local_gb, flavorid]:
+ try:
+ int(option)
+ except ValueError:
+ raise exception.InvalidInputException(
+ _("create arguments must be positive integers"))
+ if (int(memory) <= 0) or (int(vcpus) <= 0) or (int(local_gb) < 0):
+ raise exception.InvalidInputException(
+ _("create arguments must be positive integers"))
+
+ try:
+ db.instance_type_create(
+ context.get_admin_context(),
+ dict(name=name,
+ memory_mb=memory,
+ vcpus=vcpus,
+ local_gb=local_gb,
+ flavorid=flavorid,
+ swap=swap,
+ rxtx_quota=rxtx_quota,
+ rxtx_cap=rxtx_cap))
+ except exception.DBError, e:
+ LOG.exception(_('DB error: %s' % e))
+ raise exception.ApiError(_("Cannot create instance type: %s" % name))
+
+
+def destroy(name):
+ """Marks instance types / flavors as deleted
+ arguments: name"""
+ if name == None:
+ raise exception.InvalidInputException(_("No instance type specified"))
+ else:
+ try:
+ db.instance_type_destroy(context.get_admin_context(), name)
+ except exception.NotFound:
+ LOG.exception(_('Instance type %s not found for deletion' % name))
+ raise exception.ApiError(_("Unknown instance type: %s" % name))
+
+
+def purge(name):
+ """Removes instance types / flavors from database
+ arguments: name"""
+ if name == None:
+ raise exception.InvalidInputException(_("No instance type specified"))
+ else:
+ try:
+ db.instance_type_purge(context.get_admin_context(), name)
+ except exception.NotFound:
+ LOG.exception(_('Instance type %s not found for purge' % name))
+ raise exception.ApiError(_("Unknown instance type: %s" % name))
+
+
+def get_all_types(inactive=0):
+ """Retrieves non-deleted instance_types.
+ Pass true as argument if you want deleted instance types returned also."""
+ return db.instance_type_get_all(context.get_admin_context(), inactive)
+
+
+def get_all_flavors():
+ """retrieves non-deleted flavors. alias for instance_types.get_all_types().
+ Pass true as argument if you want deleted instance types returned also."""
+ return get_all_types(context.get_admin_context())
+
+
+def get_instance_type(name):
+ """Retrieves single instance type by name"""
+ if name is None:
+ return FLAGS.default_instance_type
+ try:
+ ctxt = context.get_admin_context()
+ inst_type = db.instance_type_get_by_name(ctxt, name)
+ return inst_type
+ except exception.DBError:
+ raise exception.ApiError(_("Unknown instance type: %s" % name))
def get_by_type(instance_type):
- """Build instance data structure and save it to the data store."""
+ """retrieve instance type name"""
if instance_type is None:
return FLAGS.default_instance_type
- if instance_type not in INSTANCE_TYPES:
- raise exception.ApiError(_("Unknown instance type: %s") % \
- instance_type, "Invalid")
- return instance_type
+
+ try:
+ ctxt = context.get_admin_context()
+ inst_type = db.instance_type_get_by_name(ctxt, instance_type)
+ return inst_type['name']
+ except exception.DBError, e:
+ LOG.exception(_('DB error: %s' % e))
+ raise exception.ApiError(_("Unknown instance type: %s" %\
+ instance_type))
def get_by_flavor_id(flavor_id):
- for instance_type, details in INSTANCE_TYPES.iteritems():
- if details['flavorid'] == flavor_id:
- return instance_type
- return FLAGS.default_instance_type
+ """retrieve instance type's name by flavor_id"""
+ if flavor_id is None:
+ return FLAGS.default_instance_type
+ try:
+ ctxt = context.get_admin_context()
+ flavor = db.instance_type_get_by_flavor_id(ctxt, flavor_id)
+ return flavor['name']
+ except exception.DBError, e:
+ LOG.exception(_('DB error: %s' % e))
+ raise exception.ApiError(_("Unknown flavor: %s" % flavor_id))
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 076831c0b..2c5d958e6 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -2,6 +2,7 @@
# Copyright 2010 United States Government as represented by the
# Administrator of the National Aeronautics and Space Administration.
+# Copyright 2011 Justin Santa Barbara
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
@@ -34,13 +35,17 @@ terminating it.
:func:`nova.utils.import_object`
"""
-import base64
import datetime
+import os
import random
import string
import socket
+import sys
+import tempfile
import functools
+from eventlet import greenthread
+
from nova import exception
from nova import flags
from nova import log as logging
@@ -48,6 +53,7 @@ from nova import manager
from nova import rpc
from nova import utils
from nova.compute import power_state
+from nova.virt import driver
FLAGS = flags.FLAGS
flags.DEFINE_string('instances_path', '$state_path/instances',
@@ -61,6 +67,12 @@ flags.DEFINE_integer('password_length', 12,
flags.DEFINE_string('console_host', socket.gethostname(),
'Console proxy host to use to connect to instances on'
'this host.')
+flags.DEFINE_integer('live_migration_retry_count', 30,
+ "Retry count needed in live_migration."
+ " sleep 1 sec for each count")
+flags.DEFINE_integer("rescue_timeout", 0,
+ "Automatically unrescue an instance after N seconds."
+ " Set to 0 to disable.")
LOG = logging.getLogger('nova.compute.manager')
@@ -99,7 +111,7 @@ def checks_instance_lock(function):
return decorated_function
-class ComputeManager(manager.Manager):
+class ComputeManager(manager.SchedulerDependentManager):
"""Manages the running instances from creation to destruction."""
@@ -109,10 +121,19 @@ class ComputeManager(manager.Manager):
# and redocument the module docstring
if not compute_driver:
compute_driver = FLAGS.compute_driver
- self.driver = utils.import_object(compute_driver)
+
+ try:
+ self.driver = utils.check_isinstance(
+ utils.import_object(compute_driver),
+ driver.ComputeDriver)
+ except ImportError as e:
+ LOG.error(_("Unable to load the virtualization driver: %s") % (e))
+ sys.exit(1)
+
self.network_manager = utils.import_object(FLAGS.network_manager)
self.volume_manager = utils.import_object(FLAGS.volume_manager)
- super(ComputeManager, self).__init__(*args, **kwargs)
+ super(ComputeManager, self).__init__(service_name="compute",
+ *args, **kwargs)
def init_host(self):
"""Do any initialization that needs to be run if this is a
@@ -120,6 +141,12 @@ class ComputeManager(manager.Manager):
"""
self.driver.init_host(host=self.host)
+ def periodic_tasks(self, context=None):
+ """Tasks to be run at a periodic interval."""
+ super(ComputeManager, self).periodic_tasks(context)
+ if FLAGS.rescue_timeout > 0:
+ self.driver.poll_rescued_instances(FLAGS.rescue_timeout)
+
def _update_state(self, context, instance_id):
"""Update the state of an instance from the driver info."""
# FIXME(ja): include other fields from state?
@@ -174,14 +201,14 @@ class ComputeManager(manager.Manager):
"""Launch a new instance with specified options."""
context = context.elevated()
instance_ref = self.db.instance_get(context, instance_id)
- instance_ref.onset_files = kwargs.get('onset_files', [])
+ instance_ref.injected_files = kwargs.get('injected_files', [])
if instance_ref['name'] in self.driver.list_instances():
raise exception.Error(_("Instance has already been created"))
LOG.audit(_("instance %s: starting..."), instance_id,
context=context)
self.db.instance_update(context,
instance_id,
- {'host': self.host})
+ {'host': self.host, 'launched_on': self.host})
self.db.instance_set_state(context,
instance_id,
@@ -221,9 +248,10 @@ class ComputeManager(manager.Manager):
self.db.instance_update(context,
instance_id,
{'launched_at': now})
- except Exception: # pylint: disable-msg=W0702
- LOG.exception(_("instance %s: Failed to spawn"), instance_id,
- context=context)
+ except Exception: # pylint: disable=W0702
+ LOG.exception(_("Instance '%s' failed to spawn. Is virtualization"
+ " enabled in the BIOS?"), instance_id,
+ context=context)
self.db.instance_set_state(context,
instance_id,
power_state.SHUTDOWN)
@@ -359,15 +387,10 @@ class ComputeManager(manager.Manager):
LOG.warn(_('trying to inject a file into a non-running '
'instance: %(instance_id)s (state: %(instance_state)s '
'expected: %(expected_state)s)') % locals())
- # Files/paths *should* be base64-encoded at this point, but
- # double-check to make sure.
- b64_path = utils.ensure_b64_encoding(path)
- b64_contents = utils.ensure_b64_encoding(file_contents)
- plain_path = base64.b64decode(b64_path)
nm = instance_ref['name']
- msg = _('instance %(nm)s: injecting file to %(plain_path)s') % locals()
+ msg = _('instance %(nm)s: injecting file to %(path)s') % locals()
LOG.audit(msg)
- self.driver.inject_file(instance_ref, b64_path, b64_contents)
+ self.driver.inject_file(instance_ref, path, file_contents)
@exception.wrap_exception
@checks_instance_lock
@@ -376,12 +399,19 @@ class ComputeManager(manager.Manager):
context = context.elevated()
instance_ref = self.db.instance_get(context, instance_id)
LOG.audit(_('instance %s: rescuing'), instance_id, context=context)
- self.db.instance_set_state(context,
- instance_id,
- power_state.NOSTATE,
- 'rescuing')
+ self.db.instance_set_state(
+ context,
+ instance_id,
+ power_state.NOSTATE,
+ 'rescuing')
self.network_manager.setup_compute_network(context, instance_id)
- self.driver.rescue(instance_ref)
+ self.driver.rescue(
+ instance_ref,
+ lambda result: self._update_state_callback(
+ self,
+ context,
+ instance_id,
+ result))
self._update_state(context, instance_id)
@exception.wrap_exception
@@ -391,11 +421,18 @@ class ComputeManager(manager.Manager):
context = context.elevated()
instance_ref = self.db.instance_get(context, instance_id)
LOG.audit(_('instance %s: unrescuing'), instance_id, context=context)
- self.db.instance_set_state(context,
- instance_id,
- power_state.NOSTATE,
- 'unrescuing')
- self.driver.unrescue(instance_ref)
+ self.db.instance_set_state(
+ context,
+ instance_id,
+ power_state.NOSTATE,
+ 'unrescuing')
+ self.driver.unrescue(
+ instance_ref,
+ lambda result: self._update_state_callback(
+ self,
+ context,
+ instance_id,
+ result))
self._update_state(context, instance_id)
@staticmethod
@@ -405,6 +442,141 @@ class ComputeManager(manager.Manager):
@exception.wrap_exception
@checks_instance_lock
+ def confirm_resize(self, context, instance_id, migration_id):
+ """Destroys the source instance"""
+ context = context.elevated()
+ instance_ref = self.db.instance_get(context, instance_id)
+ migration_ref = self.db.migration_get(context, migration_id)
+ self.driver.destroy(instance_ref)
+
+ @exception.wrap_exception
+ @checks_instance_lock
+ def revert_resize(self, context, instance_id, migration_id):
+ """Destroys the new instance on the destination machine,
+ reverts the model changes, and powers on the old
+ instance on the source machine"""
+ instance_ref = self.db.instance_get(context, instance_id)
+ migration_ref = self.db.migration_get(context, migration_id)
+
+ self.driver.destroy(instance_ref)
+ topic = self.db.queue_get_for(context, FLAGS.compute_topic,
+ instance_ref['host'])
+ rpc.cast(context, topic,
+ {'method': 'finish_revert_resize',
+ 'args': {
+ 'migration_id': migration_ref['id'],
+ 'instance_id': instance_id, },
+ })
+
+ @exception.wrap_exception
+ @checks_instance_lock
+ def finish_revert_resize(self, context, instance_id, migration_id):
+ """Finishes the second half of reverting a resize, powering back on
+ the source instance and reverting the resized attributes in the
+ database"""
+ instance_ref = self.db.instance_get(context, instance_id)
+ migration_ref = self.db.migration_get(context, migration_id)
+ instance_type = self.db.instance_type_get_by_flavor_id(context,
+ migration_ref['old_flavor_id'])
+
+ # Just roll back the record. There's no need to resize down since
+ # the 'old' VM already has the preferred attributes
+ self.db.instance_update(context, instance_id,
+ dict(memory_mb=instance_type['memory_mb'],
+ vcpus=instance_type['vcpus'],
+ local_gb=instance_type['local_gb']))
+
+ self.driver.revert_resize(instance_ref)
+ self.db.migration_update(context, migration_id,
+ {'status': 'reverted'})
+
+ @exception.wrap_exception
+ @checks_instance_lock
+ def prep_resize(self, context, instance_id, flavor_id):
+ """Initiates the process of moving a running instance to another
+ host, possibly changing the RAM and disk size in the process"""
+ context = context.elevated()
+ instance_ref = self.db.instance_get(context, instance_id)
+ if instance_ref['host'] == FLAGS.host:
+ raise exception.Error(_(
+ 'Migration error: destination same as source!'))
+
+ instance_type = self.db.instance_type_get_by_flavor_id(context,
+ flavor_id)
+ migration_ref = self.db.migration_create(context,
+ {'instance_id': instance_id,
+ 'source_compute': instance_ref['host'],
+ 'dest_compute': FLAGS.host,
+ 'dest_host': self.driver.get_host_ip_addr(),
+ 'old_flavor_id': instance_type['flavorid'],
+ 'new_flavor_id': flavor_id,
+ 'status': 'pre-migrating'})
+
+ LOG.audit(_('instance %s: migrating to '), instance_id,
+ context=context)
+ topic = self.db.queue_get_for(context, FLAGS.compute_topic,
+ instance_ref['host'])
+ rpc.cast(context, topic,
+ {'method': 'resize_instance',
+ 'args': {
+ 'migration_id': migration_ref['id'],
+ 'instance_id': instance_id, },
+ })
+
+ @exception.wrap_exception
+ @checks_instance_lock
+ def resize_instance(self, context, instance_id, migration_id):
+ """Starts the migration of a running instance to another host"""
+ migration_ref = self.db.migration_get(context, migration_id)
+ instance_ref = self.db.instance_get(context, instance_id)
+ self.db.migration_update(context, migration_id,
+ {'status': 'migrating', })
+
+ disk_info = self.driver.migrate_disk_and_power_off(instance_ref,
+ migration_ref['dest_host'])
+ self.db.migration_update(context, migration_id,
+ {'status': 'post-migrating', })
+
+ service = self.db.service_get_by_host_and_topic(context,
+ migration_ref['dest_compute'], FLAGS.compute_topic)
+ topic = self.db.queue_get_for(context, FLAGS.compute_topic,
+ migration_ref['dest_compute'])
+ rpc.cast(context, topic,
+ {'method': 'finish_resize',
+ 'args': {
+ 'migration_id': migration_id,
+ 'instance_id': instance_id,
+ 'disk_info': disk_info, },
+ })
+
+ @exception.wrap_exception
+ @checks_instance_lock
+ def finish_resize(self, context, instance_id, migration_id, disk_info):
+ """Completes the migration process by setting up the newly transferred
+ disk and turning on the instance on its new host machine"""
+ migration_ref = self.db.migration_get(context, migration_id)
+ instance_ref = self.db.instance_get(context,
+ migration_ref['instance_id'])
+ # TODO(mdietz): apply the rest of the instance_type attributes going
+ # after they're supported
+ instance_type = self.db.instance_type_get_by_flavor_id(context,
+ migration_ref['new_flavor_id'])
+ self.db.instance_update(context, instance_id,
+ dict(instance_type=instance_type['name'],
+ memory_mb=instance_type['memory_mb'],
+ vcpus=instance_type['vcpus'],
+ local_gb=instance_type['local_gb']))
+
+ # reload the updated instance ref
+ # FIXME(mdietz): is there reload functionality?
+ instance_ref = self.db.instance_get(context, instance_id)
+ self.driver.finish_resize(instance_ref, disk_info)
+
+ self.db.migration_update(context, migration_id,
+ {'status': 'finished', })
+
+ @exception.wrap_exception
+ @checks_instance_lock
def pause_instance(self, context, instance_id):
"""Pause an instance on this server."""
context = context.elevated()
@@ -580,7 +752,7 @@ class ComputeManager(manager.Manager):
volume_id,
instance_id,
mountpoint)
- except Exception as exc: # pylint: disable-msg=W0702
+ except Exception as exc: # pylint: disable=W0702
# NOTE(vish): The inline callback eats the exception info so we
# log the traceback here and reraise the same
# ecxception below.
@@ -611,3 +783,307 @@ class ComputeManager(manager.Manager):
self.volume_manager.remove_compute_volume(context, volume_id)
self.db.volume_detached(context, volume_id)
return True
+
+ @exception.wrap_exception
+ def compare_cpu(self, context, cpu_info):
+ """Checks the host cpu is compatible to a cpu given by xml.
+
+ :param context: security context
+ :param cpu_info: json string obtained from virConnect.getCapabilities
+ :returns: See driver.compare_cpu
+
+ """
+ return self.driver.compare_cpu(cpu_info)
+
+ @exception.wrap_exception
+ def create_shared_storage_test_file(self, context):
+ """Makes tmpfile under FLAGS.instance_path.
+
+ This method enables compute nodes to recognize that they mounts
+ same shared storage. (create|check|creanup)_shared_storage_test_file()
+ is a pair.
+
+ :param context: security context
+ :returns: tmpfile name(basename)
+
+ """
+
+ dirpath = FLAGS.instances_path
+ fd, tmp_file = tempfile.mkstemp(dir=dirpath)
+ LOG.debug(_("Creating tmpfile %s to notify to other "
+ "compute nodes that they should mount "
+ "the same storage.") % tmp_file)
+ os.close(fd)
+ return os.path.basename(tmp_file)
+
+ @exception.wrap_exception
+ def check_shared_storage_test_file(self, context, filename):
+ """Confirms existence of the tmpfile under FLAGS.instances_path.
+
+ :param context: security context
+ :param filename: confirm existence of FLAGS.instances_path/thisfile
+
+ """
+
+ tmp_file = os.path.join(FLAGS.instances_path, filename)
+ if not os.path.exists(tmp_file):
+ raise exception.NotFound(_('%s not found') % tmp_file)
+
+ @exception.wrap_exception
+ def cleanup_shared_storage_test_file(self, context, filename):
+ """Removes existence of the tmpfile under FLAGS.instances_path.
+
+ :param context: security context
+ :param filename: remove existence of FLAGS.instances_path/thisfile
+
+ """
+
+ tmp_file = os.path.join(FLAGS.instances_path, filename)
+ os.remove(tmp_file)
+
+ @exception.wrap_exception
+ def update_available_resource(self, context):
+ """See comments update_resource_info.
+
+ :param context: security context
+ :returns: See driver.update_available_resource()
+
+ """
+
+ return self.driver.update_available_resource(context, self.host)
+
+ def pre_live_migration(self, context, instance_id, time=None):
+ """Preparations for live migration at dest host.
+
+ :param context: security context
+ :param instance_id: nova.db.sqlalchemy.models.Instance.Id
+
+ """
+
+ if not time:
+ time = greenthread
+
+ # Getting instance info
+ instance_ref = self.db.instance_get(context, instance_id)
+ ec2_id = instance_ref['hostname']
+
+ # Getting fixed ips
+ fixed_ip = self.db.instance_get_fixed_address(context, instance_id)
+ if not fixed_ip:
+ msg = _("%(instance_id)s(%(ec2_id)s) does not have fixed_ip.")
+ raise exception.NotFound(msg % locals())
+
+ # If any volume is mounted, prepare here.
+ if not instance_ref['volumes']:
+ LOG.info(_("%s has no volume."), ec2_id)
+ else:
+ for v in instance_ref['volumes']:
+ self.volume_manager.setup_compute_volume(context, v['id'])
+
+ # Bridge settings.
+ # Call this method prior to ensure_filtering_rules_for_instance,
+ # since bridge is not set up, ensure_filtering_rules_for instance
+ # fails.
+ #
+ # Retry operation is necessary because continuously request comes,
+ # concorrent request occurs to iptables, then it complains.
+ max_retry = FLAGS.live_migration_retry_count
+ for cnt in range(max_retry):
+ try:
+ self.network_manager.setup_compute_network(context,
+ instance_id)
+ break
+ except exception.ProcessExecutionError:
+ if cnt == max_retry - 1:
+ raise
+ else:
+ LOG.warn(_("setup_compute_network() failed %(cnt)d."
+ "Retry up to %(max_retry)d for %(ec2_id)s.")
+ % locals())
+ time.sleep(1)
+
+ # Creating filters to hypervisors and firewalls.
+ # An example is that nova-instance-instance-xxx,
+ # which is written to libvirt.xml(Check "virsh nwfilter-list")
+ # This nwfilter is necessary on the destination host.
+ # In addition, this method is creating filtering rule
+ # onto destination host.
+ self.driver.ensure_filtering_rules_for_instance(instance_ref)
+
+ def live_migration(self, context, instance_id, dest):
+ """Executing live migration.
+
+ :param context: security context
+ :param instance_id: nova.db.sqlalchemy.models.Instance.Id
+ :param dest: destination host
+
+ """
+
+ # Get instance for error handling.
+ instance_ref = self.db.instance_get(context, instance_id)
+ i_name = instance_ref.name
+
+ try:
+ # Checking volume node is working correctly when any volumes
+ # are attached to instances.
+ if instance_ref['volumes']:
+ rpc.call(context,
+ FLAGS.volume_topic,
+ {"method": "check_for_export",
+ "args": {'instance_id': instance_id}})
+
+ # Asking dest host to preparing live migration.
+ rpc.call(context,
+ self.db.queue_get_for(context, FLAGS.compute_topic, dest),
+ {"method": "pre_live_migration",
+ "args": {'instance_id': instance_id}})
+
+ except Exception:
+ msg = _("Pre live migration for %(i_name)s failed at %(dest)s")
+ LOG.error(msg % locals())
+ self.recover_live_migration(context, instance_ref)
+ raise
+
+ # Executing live migration
+ # live_migration might raises exceptions, but
+ # nothing must be recovered in this version.
+ self.driver.live_migration(context, instance_ref, dest,
+ self.post_live_migration,
+ self.recover_live_migration)
+
+ def post_live_migration(self, ctxt, instance_ref, dest):
+ """Post operations for live migration.
+
+ This method is called from live_migration
+ and mainly updating database record.
+
+ :param ctxt: security context
+ :param instance_id: nova.db.sqlalchemy.models.Instance.Id
+ :param dest: destination host
+
+ """
+
+ LOG.info(_('post_live_migration() is started..'))
+ instance_id = instance_ref['id']
+
+ # Detaching volumes.
+ try:
+ for vol in self.db.volume_get_all_by_instance(ctxt, instance_id):
+ self.volume_manager.remove_compute_volume(ctxt, vol['id'])
+ except exception.NotFound:
+ pass
+
+ # Releasing vlan.
+ # (not necessary in current implementation?)
+
+ # Releasing security group ingress rule.
+ self.driver.unfilter_instance(instance_ref)
+
+ # Database updating.
+ i_name = instance_ref.name
+ try:
+ # Not return if floating_ip is not found, otherwise,
+ # instance never be accessible..
+ floating_ip = self.db.instance_get_floating_address(ctxt,
+ instance_id)
+ if not floating_ip:
+ LOG.info(_('No floating_ip is found for %s.'), i_name)
+ else:
+ floating_ip_ref = self.db.floating_ip_get_by_address(ctxt,
+ floating_ip)
+ self.db.floating_ip_update(ctxt,
+ floating_ip_ref['address'],
+ {'host': dest})
+ except exception.NotFound:
+ LOG.info(_('No floating_ip is found for %s.'), i_name)
+ except:
+ LOG.error(_("Live migration: Unexpected error:"
+ "%s cannot inherit floating ip..") % i_name)
+
+ # Restore instance/volume state
+ self.recover_live_migration(ctxt, instance_ref, dest)
+
+ LOG.info(_('Migrating %(i_name)s to %(dest)s finished successfully.')
+ % locals())
+ LOG.info(_("You may see the error \"libvirt: QEMU error: "
+ "Domain not found: no domain with matching name.\" "
+ "This error can be safely ignored."))
+
+ def recover_live_migration(self, ctxt, instance_ref, host=None):
+ """Recovers Instance/volume state from migrating -> running.
+
+ :param ctxt: security context
+ :param instance_id: nova.db.sqlalchemy.models.Instance.Id
+ :param host:
+ DB column value is updated by this hostname.
+ if none, the host instance currently running is selected.
+
+ """
+
+ if not host:
+ host = instance_ref['host']
+
+ self.db.instance_update(ctxt,
+ instance_ref['id'],
+ {'state_description': 'running',
+ 'state': power_state.RUNNING,
+ 'host': host})
+
+ for volume in instance_ref['volumes']:
+ self.db.volume_update(ctxt, volume['id'], {'status': 'in-use'})
+
+ def periodic_tasks(self, context=None):
+ """Tasks to be run at a periodic interval."""
+ error_list = super(ComputeManager, self).periodic_tasks(context)
+ if error_list is None:
+ error_list = []
+
+ try:
+ self._poll_instance_states(context)
+ except Exception as ex:
+ LOG.warning(_("Error during instance poll: %s"),
+ unicode(ex))
+ error_list.append(ex)
+ return error_list
+
+ def _poll_instance_states(self, context):
+ vm_instances = self.driver.list_instances_detail()
+ vm_instances = dict((vm.name, vm) for vm in vm_instances)
+
+ # Keep a list of VMs not in the DB, cross them off as we find them
+ vms_not_found_in_db = list(vm_instances.keys())
+
+ db_instances = self.db.instance_get_all_by_host(context, self.host)
+
+ for db_instance in db_instances:
+ name = db_instance['name']
+ vm_instance = vm_instances.get(name)
+ if vm_instance is None:
+ LOG.info(_("Found instance '%(name)s' in DB but no VM. "
+ "Setting state to shutoff.") % locals())
+ vm_state = power_state.SHUTOFF
+ else:
+ vm_state = vm_instance.state
+ vms_not_found_in_db.remove(name)
+
+ db_state = db_instance['state']
+ if vm_state != db_state:
+ LOG.info(_("DB/VM state mismatch. Changing state from "
+ "'%(db_state)s' to '%(vm_state)s'") % locals())
+ self.db.instance_set_state(context,
+ db_instance['id'],
+ vm_state)
+
+ if vm_state == power_state.SHUTOFF:
+ # TODO(soren): This is what the compute manager does when you
+ # terminate an instance. At some point I figure we'll have a
+ # "terminated" state and some sort of cleanup job that runs
+ # occasionally, cleaning them out.
+ self.db.instance_destroy(context, db_instance['id'])
+
+ # Are there VMs not in the DB?
+ for vm_not_found_in_db in vms_not_found_in_db:
+ name = vm_not_found_in_db
+ # TODO(justinsb): What to do here? Adopt it? Shut it down?
+ LOG.warning(_("Found VM not in DB: '%(name)s'. Ignoring")
+ % locals())
diff --git a/nova/compute/power_state.py b/nova/compute/power_state.py
index adfc2dff0..ef013b2ef 100644
--- a/nova/compute/power_state.py
+++ b/nova/compute/power_state.py
@@ -2,6 +2,7 @@
# Copyright 2010 United States Government as represented by the
# Administrator of the National Aeronautics and Space Administration.
+# Copyright 2011 Justin Santa Barbara
# All Rights Reserved.
# Copyright (c) 2010 Citrix Systems, Inc.
#
@@ -19,6 +20,7 @@
"""The various power states that a VM can be in."""
+#NOTE(justinsb): These are the virDomainState values from libvirt
NOSTATE = 0x00
RUNNING = 0x01
BLOCKED = 0x02
@@ -29,9 +31,10 @@ CRASHED = 0x06
SUSPENDED = 0x07
FAILED = 0x08
-
-def name(code):
- d = {
+# TODO(justinsb): Power state really needs to be a proper class,
+# so that we're not locked into the libvirt status codes and can put mapping
+# logic here rather than spread throughout the code
+_STATE_MAP = {
NOSTATE: 'pending',
RUNNING: 'running',
BLOCKED: 'blocked',
@@ -41,4 +44,11 @@ def name(code):
CRASHED: 'crashed',
SUSPENDED: 'suspended',
FAILED: 'failed to spawn'}
- return d[code]
+
+
+def name(code):
+ return _STATE_MAP[code]
+
+
+def valid_states():
+ return _STATE_MAP.keys()