diff options
| author | Todd Willey <todd@ansolabs.com> | 2011-04-04 21:12:57 -0400 |
|---|---|---|
| committer | Todd Willey <todd@ansolabs.com> | 2011-04-04 21:12:57 -0400 |
| commit | da346dac037a46582d569180915010f4c4e2cd50 (patch) | |
| tree | 94cbeab7422f22b443868cb21d02406deb35826b /nova/compute | |
| parent | 2ee9070c3824d296bada49fc6637c09f8e18a5eb (diff) | |
| parent | 08417c48c223ad1b698ab1d00686a967b6a2dc0a (diff) | |
Merge trunk.
Diffstat (limited to 'nova/compute')
| -rw-r--r-- | nova/compute/api.py | 306 | ||||
| -rw-r--r-- | nova/compute/instance_types.py | 123 | ||||
| -rw-r--r-- | nova/compute/manager.py | 627 | ||||
| -rw-r--r-- | nova/compute/power_state.py | 18 |
4 files changed, 967 insertions, 107 deletions
diff --git a/nova/compute/api.py b/nova/compute/api.py index e7d2f29ef..bca6863d6 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -34,12 +34,17 @@ from nova import rpc from nova import utils from nova import volume from nova.compute import instance_types +from nova.scheduler import api as scheduler_api from nova.db import base -FLAGS = flags.FLAGS + LOG = logging.getLogger('nova.compute.api') +FLAGS = flags.FLAGS +flags.DECLARE('vncproxy_topic', 'nova.vnc') + + def generate_default_hostname(instance_id): """Default function to generate a hostname given an instance reference.""" return str(instance_id) @@ -80,16 +85,36 @@ class API(base.Base): topic, {"method": "get_network_topic", "args": {'fake': 1}}) + def _check_injected_file_quota(self, context, injected_files): + """ + Enforce quota limits on injected files + + Raises a QuotaError if any limit is exceeded + """ + if injected_files is None: + return + limit = quota.allowed_injected_files(context) + if len(injected_files) > limit: + raise quota.QuotaError(code="OnsetFileLimitExceeded") + path_limit = quota.allowed_injected_file_path_bytes(context) + content_limit = quota.allowed_injected_file_content_bytes(context) + for path, content in injected_files: + if len(path) > path_limit: + raise quota.QuotaError(code="OnsetFilePathLimitExceeded") + if len(content) > content_limit: + raise quota.QuotaError(code="OnsetFileContentLimitExceeded") + def create(self, context, instance_type, image_id, kernel_id=None, ramdisk_id=None, min_count=1, max_count=1, display_name='', display_description='', key_name=None, key_data=None, security_group='default', - availability_zone=None, user_data=None): + availability_zone=None, user_data=None, metadata=[], + injected_files=None): """Create the number of instances requested if quota and other arguments check out ok.""" - type_data = instance_types.INSTANCE_TYPES[instance_type] + type_data = instance_types.get_instance_type(instance_type) num_instances = quota.allowed_instances(context, max_count, type_data) if num_instances < min_count: pid = context.project_id @@ -99,25 +124,55 @@ class API(base.Base): "run %s more instances of this type.") % num_instances, "InstanceLimitExceeded") - is_vpn = image_id == FLAGS.vpn_image_id - if not is_vpn: - image = self.image_service.show(context, image_id) - if kernel_id is None: - kernel_id = image.get('kernel_id', None) - if ramdisk_id is None: - ramdisk_id = image.get('ramdisk_id', None) - # No kernel and ramdisk for raw images - if kernel_id == str(FLAGS.null_kernel): - kernel_id = None - ramdisk_id = None - LOG.debug(_("Creating a raw instance")) - # Make sure we have access to kernel and ramdisk (if not raw) - logging.debug("Using Kernel=%s, Ramdisk=%s" % - (kernel_id, ramdisk_id)) - if kernel_id: - self.image_service.show(context, kernel_id) - if ramdisk_id: - self.image_service.show(context, ramdisk_id) + num_metadata = len(metadata) + quota_metadata = quota.allowed_metadata_items(context, num_metadata) + if quota_metadata < num_metadata: + pid = context.project_id + msg = (_("Quota exceeeded for %(pid)s," + " tried to set %(num_metadata)s metadata properties") + % locals()) + LOG.warn(msg) + raise quota.QuotaError(msg, "MetadataLimitExceeded") + + # Because metadata is stored in the DB, we hard-code the size limits + # In future, we may support more variable length strings, so we act + # as if this is quota-controlled for forwards compatibility + for metadata_item in metadata: + k = metadata_item['key'] + v = metadata_item['value'] + if len(k) > 255 or len(v) > 255: + pid = context.project_id + msg = (_("Quota exceeeded for %(pid)s," + " metadata property key or value too long") + % locals()) + LOG.warn(msg) + raise quota.QuotaError(msg, "MetadataLimitExceeded") + + self._check_injected_file_quota(context, injected_files) + + image = self.image_service.show(context, image_id) + + os_type = None + if 'properties' in image and 'os_type' in image['properties']: + os_type = image['properties']['os_type'] + + if kernel_id is None: + kernel_id = image['properties'].get('kernel_id', None) + if ramdisk_id is None: + ramdisk_id = image['properties'].get('ramdisk_id', None) + # FIXME(sirp): is there a way we can remove null_kernel? + # No kernel and ramdisk for raw images + if kernel_id == str(FLAGS.null_kernel): + kernel_id = None + ramdisk_id = None + LOG.debug(_("Creating a raw instance")) + # Make sure we have access to kernel and ramdisk (if not raw) + logging.debug("Using Kernel=%s, Ramdisk=%s" % + (kernel_id, ramdisk_id)) + if kernel_id: + self.image_service.show(context, kernel_id) + if ramdisk_id: + self.image_service.show(context, ramdisk_id) if security_group is None: security_group = ['default'] @@ -141,6 +196,7 @@ class API(base.Base): 'image_id': image_id, 'kernel_id': kernel_id or '', 'ramdisk_id': ramdisk_id or '', + 'state': 0, 'state_description': 'scheduling', 'user_id': context.user_id, 'project_id': context.project_id, @@ -155,8 +211,9 @@ class API(base.Base): 'key_name': key_name, 'key_data': key_data, 'locked': False, - 'availability_zone': availability_zone} - + 'metadata': metadata, + 'availability_zone': availability_zone, + 'os_type': os_type} elevated = context.elevated() instances = [] LOG.debug(_("Going to run %s instances..."), num_instances) @@ -193,13 +250,24 @@ class API(base.Base): {"method": "run_instance", "args": {"topic": FLAGS.compute_topic, "instance_id": instance_id, - "availability_zone": availability_zone}}) + "availability_zone": availability_zone, + "injected_files": injected_files}}) for group_id in security_groups: self.trigger_security_group_members_refresh(elevated, group_id) return [dict(x.iteritems()) for x in instances] + def has_finished_migration(self, context, instance_id): + """Retrieves whether or not a finished migration exists for + an instance""" + try: + db.migration_get_by_instance_and_status(context, instance_id, + 'finished') + return True + except exception.NotFound: + return False + def ensure_default_security_group(self, context): """ Create security group for the security context if it does not already exist @@ -299,17 +367,18 @@ class API(base.Base): rv = self.db.instance_update(context, instance_id, kwargs) return dict(rv.iteritems()) + @scheduler_api.reroute_compute("delete") def delete(self, context, instance_id): LOG.debug(_("Going to try to terminate %s"), instance_id) try: instance = self.get(context, instance_id) except exception.NotFound: - LOG.warning(_("Instance %d was not found during terminate"), + LOG.warning(_("Instance %s was not found during terminate"), instance_id) raise if (instance['state_description'] == 'terminating'): - LOG.warning(_("Instance %d is already being terminated"), + LOG.warning(_("Instance %s is already being terminated"), instance_id) return @@ -331,24 +400,37 @@ class API(base.Base): rv = self.db.instance_get(context, instance_id) return dict(rv.iteritems()) + @scheduler_api.reroute_compute("get") + def routing_get(self, context, instance_id): + """Use this method instead of get() if this is the only + operation you intend to to. It will route to novaclient.get + if the instance is not found.""" + return self.get(context, instance_id) + def get_all(self, context, project_id=None, reservation_id=None, fixed_ip=None): """Get all instances, possibly filtered by one of the given parameters. If there is no filter and the context is - an admin, it will retreive all instances in the system.""" + an admin, it will retreive all instances in the system. + """ if reservation_id is not None: - return self.db.instance_get_all_by_reservation(context, - reservation_id) + return self.db.instance_get_all_by_reservation( + context, reservation_id) + if fixed_ip is not None: return self.db.fixed_ip_get_instance(context, fixed_ip) + if project_id or not context.is_admin: if not context.project: - return self.db.instance_get_all_by_user(context, - context.user_id) + return self.db.instance_get_all_by_user( + context, context.user_id) + if project_id is None: project_id = context.project_id - return self.db.instance_get_all_by_project(context, - project_id) + + return self.db.instance_get_all_by_project( + context, project_id) + return self.db.instance_get_all(context) def _cast_compute_message(self, method, context, instance_id, host=None, @@ -389,30 +471,105 @@ class API(base.Base): kwargs = {'method': method, 'args': params} return rpc.call(context, queue, kwargs) + def _cast_scheduler_message(self, context, args): + """Generic handler for RPC calls to the scheduler""" + rpc.cast(context, FLAGS.scheduler_topic, args) + def snapshot(self, context, instance_id, name): """Snapshot the given instance. :retval: A dict containing image metadata """ - data = {'name': name, 'is_public': False} - image_meta = self.image_service.create(context, data) - params = {'image_id': image_meta['id']} + properties = {'instance_id': str(instance_id), + 'user_id': str(context.user_id)} + sent_meta = {'name': name, 'is_public': False, + 'properties': properties} + recv_meta = self.image_service.create(context, sent_meta) + params = {'image_id': recv_meta['id']} self._cast_compute_message('snapshot_instance', context, instance_id, params=params) - return image_meta + return recv_meta def reboot(self, context, instance_id): """Reboot the given instance.""" self._cast_compute_message('reboot_instance', context, instance_id) + def revert_resize(self, context, instance_id): + """Reverts a resize, deleting the 'new' instance in the process""" + context = context.elevated() + migration_ref = self.db.migration_get_by_instance_and_status(context, + instance_id, 'finished') + if not migration_ref: + raise exception.NotFound(_("No finished migrations found for " + "instance")) + + params = {'migration_id': migration_ref['id']} + self._cast_compute_message('revert_resize', context, instance_id, + migration_ref['dest_compute'], params=params) + self.db.migration_update(context, migration_ref['id'], + {'status': 'reverted'}) + + def confirm_resize(self, context, instance_id): + """Confirms a migration/resize, deleting the 'old' instance in the + process.""" + context = context.elevated() + migration_ref = self.db.migration_get_by_instance_and_status(context, + instance_id, 'finished') + if not migration_ref: + raise exception.NotFound(_("No finished migrations found for " + "instance")) + instance_ref = self.db.instance_get(context, instance_id) + params = {'migration_id': migration_ref['id']} + self._cast_compute_message('confirm_resize', context, instance_id, + migration_ref['source_compute'], params=params) + + self.db.migration_update(context, migration_ref['id'], + {'status': 'confirmed'}) + self.db.instance_update(context, instance_id, + {'host': migration_ref['dest_compute'], }) + + def resize(self, context, instance_id, flavor_id): + """Resize a running instance.""" + instance = self.db.instance_get(context, instance_id) + current_instance_type = self.db.instance_type_get_by_name( + context, instance['instance_type']) + + new_instance_type = self.db.instance_type_get_by_flavor_id( + context, flavor_id) + current_instance_type_name = current_instance_type['name'] + new_instance_type_name = new_instance_type['name'] + LOG.debug(_("Old instance type %(current_instance_type_name)s, " + " new instance type %(new_instance_type_name)s") % locals()) + if not new_instance_type: + raise exception.ApiError(_("Requested flavor %(flavor_id)d " + "does not exist") % locals()) + + current_memory_mb = current_instance_type['memory_mb'] + new_memory_mb = new_instance_type['memory_mb'] + if current_memory_mb > new_memory_mb: + raise exception.ApiError(_("Invalid flavor: cannot downsize" + "instances")) + if current_memory_mb == new_memory_mb: + raise exception.ApiError(_("Invalid flavor: cannot use" + "the same flavor. ")) + + self._cast_scheduler_message(context, + {"method": "prep_resize", + "args": {"topic": FLAGS.compute_topic, + "instance_id": instance_id, + "flavor_id": flavor_id}}) + + @scheduler_api.reroute_compute("pause") def pause(self, context, instance_id): """Pause the given instance.""" self._cast_compute_message('pause_instance', context, instance_id) + @scheduler_api.reroute_compute("unpause") def unpause(self, context, instance_id): """Unpause the given instance.""" self._cast_compute_message('unpause_instance', context, instance_id) + @scheduler_api.reroute_compute("diagnostics") def get_diagnostics(self, context, instance_id): """Retrieve diagnostics for the given instance.""" return self._call_compute_message( @@ -424,29 +581,37 @@ class API(base.Base): """Retrieve actions for the given instance.""" return self.db.instance_get_actions(context, instance_id) + @scheduler_api.reroute_compute("suspend") def suspend(self, context, instance_id): """suspend the instance with instance_id""" self._cast_compute_message('suspend_instance', context, instance_id) + @scheduler_api.reroute_compute("resume") def resume(self, context, instance_id): """resume the instance with instance_id""" self._cast_compute_message('resume_instance', context, instance_id) + @scheduler_api.reroute_compute("rescue") def rescue(self, context, instance_id): """Rescue the given instance.""" self._cast_compute_message('rescue_instance', context, instance_id) + @scheduler_api.reroute_compute("unrescue") def unrescue(self, context, instance_id): """Unrescue the given instance.""" self._cast_compute_message('unrescue_instance', context, instance_id) - def set_admin_password(self, context, instance_id): + def set_admin_password(self, context, instance_id, password=None): """Set the root/admin password for the given instance.""" - self._cast_compute_message('set_admin_password', context, instance_id) + self._cast_compute_message('set_admin_password', context, instance_id, + password) + + def inject_file(self, context, instance_id): + """Write a file to the given instance.""" + self._cast_compute_message('inject_file', context, instance_id) def get_ajax_console(self, context, instance_id): """Get a url to an AJAX Console""" - instance = self.get(context, instance_id) output = self._call_compute_message('get_ajax_console', context, instance_id) @@ -454,9 +619,28 @@ class API(base.Base): {'method': 'authorize_ajax_console', 'args': {'token': output['token'], 'host': output['host'], 'port': output['port']}}) - return {'url': '%s?token=%s' % (FLAGS.ajax_console_proxy_url, + return {'url': '%s/?token=%s' % (FLAGS.ajax_console_proxy_url, output['token'])} + def get_vnc_console(self, context, instance_id): + """Get a url to a VNC Console.""" + instance = self.get(context, instance_id) + output = self._call_compute_message('get_vnc_console', + context, + instance_id) + rpc.call(context, '%s' % FLAGS.vncproxy_topic, + {'method': 'authorize_vnc_console', + 'args': {'token': output['token'], + 'host': output['host'], + 'port': output['port']}}) + + # hostignore and portignore are compatability params for noVNC + return {'url': '%s/vnc_auto.html?token=%s&host=%s&port=%s' % ( + FLAGS.vncproxy_url, + output['token'], + 'hostignore', + 'portignore')} + def get_console_output(self, context, instance_id): """Get console output for an an instance""" return self._call_compute_message('get_console_output', @@ -476,11 +660,25 @@ class API(base.Base): instance = self.get(context, instance_id) return instance['locked'] + def reset_network(self, context, instance_id): + """ + Reset networking on the instance. + + """ + self._cast_compute_message('reset_network', context, instance_id) + + def inject_network_info(self, context, instance_id): + """ + Inject network info for the instance. + + """ + self._cast_compute_message('inject_network_info', context, instance_id) + def attach_volume(self, context, instance_id, volume_id, device): if not re.match("^/dev/[a-z]d[a-z]+$", device): raise exception.ApiError(_("Invalid device specified: %s. " "Example device: /dev/vdb") % device) - self.volume_api.check_attach(context, volume_id) + self.volume_api.check_attach(context, volume_id=volume_id) instance = self.get(context, instance_id) host = instance['host'] rpc.cast(context, @@ -494,7 +692,7 @@ class API(base.Base): instance = self.db.volume_get_instance(context.elevated(), volume_id) if not instance: raise exception.ApiError(_("Volume isn't attached to anything!")) - self.volume_api.check_detach(context, volume_id) + self.volume_api.check_detach(context, volume_id=volume_id) host = instance['host'] rpc.cast(context, self.db.queue_get_for(context, FLAGS.compute_topic, host), @@ -505,5 +703,21 @@ class API(base.Base): def associate_floating_ip(self, context, instance_id, address): instance = self.get(context, instance_id) - self.network_api.associate_floating_ip(context, address, - instance['fixed_ip']) + self.network_api.associate_floating_ip(context, + floating_ip=address, + fixed_ip=instance['fixed_ip']) + + def get_instance_metadata(self, context, instance_id): + """Get all metadata associated with an instance.""" + rv = self.db.instance_metadata_get(context, instance_id) + return dict(rv.iteritems()) + + def delete_instance_metadata(self, context, instance_id, key): + """Delete the given metadata item""" + self.db.instance_metadata_delete(context, instance_id, key) + + def update_or_create_instance_metadata(self, context, instance_id, + metadata): + """Updates or creates instance metadata""" + self.db.instance_metadata_update_or_create(context, instance_id, + metadata) diff --git a/nova/compute/instance_types.py b/nova/compute/instance_types.py index 309313fd0..fa02a5dfa 100644 --- a/nova/compute/instance_types.py +++ b/nova/compute/instance_types.py @@ -4,6 +4,7 @@ # Administrator of the National Aeronautics and Space Administration. # All Rights Reserved. # Copyright (c) 2010 Citrix Systems, Inc. +# Copyright 2011 Ken Pepple # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -21,30 +22,120 @@ The built-in instance properties. """ -from nova import flags +from nova import context +from nova import db from nova import exception +from nova import flags +from nova import log as logging FLAGS = flags.FLAGS -INSTANCE_TYPES = { - 'm1.tiny': dict(memory_mb=512, vcpus=1, local_gb=0, flavorid=1), - 'm1.small': dict(memory_mb=2048, vcpus=1, local_gb=20, flavorid=2), - 'm1.medium': dict(memory_mb=4096, vcpus=2, local_gb=40, flavorid=3), - 'm1.large': dict(memory_mb=8192, vcpus=4, local_gb=80, flavorid=4), - 'm1.xlarge': dict(memory_mb=16384, vcpus=8, local_gb=160, flavorid=5)} +LOG = logging.getLogger('nova.instance_types') + + +def create(name, memory, vcpus, local_gb, flavorid, swap=0, + rxtx_quota=0, rxtx_cap=0): + """Creates instance types / flavors + arguments: name memory vcpus local_gb flavorid swap rxtx_quota rxtx_cap + """ + for option in [memory, vcpus, local_gb, flavorid]: + try: + int(option) + except ValueError: + raise exception.InvalidInputException( + _("create arguments must be positive integers")) + if (int(memory) <= 0) or (int(vcpus) <= 0) or (int(local_gb) < 0): + raise exception.InvalidInputException( + _("create arguments must be positive integers")) + + try: + db.instance_type_create( + context.get_admin_context(), + dict(name=name, + memory_mb=memory, + vcpus=vcpus, + local_gb=local_gb, + flavorid=flavorid, + swap=swap, + rxtx_quota=rxtx_quota, + rxtx_cap=rxtx_cap)) + except exception.DBError, e: + LOG.exception(_('DB error: %s' % e)) + raise exception.ApiError(_("Cannot create instance type: %s" % name)) + + +def destroy(name): + """Marks instance types / flavors as deleted + arguments: name""" + if name == None: + raise exception.InvalidInputException(_("No instance type specified")) + else: + try: + db.instance_type_destroy(context.get_admin_context(), name) + except exception.NotFound: + LOG.exception(_('Instance type %s not found for deletion' % name)) + raise exception.ApiError(_("Unknown instance type: %s" % name)) + + +def purge(name): + """Removes instance types / flavors from database + arguments: name""" + if name == None: + raise exception.InvalidInputException(_("No instance type specified")) + else: + try: + db.instance_type_purge(context.get_admin_context(), name) + except exception.NotFound: + LOG.exception(_('Instance type %s not found for purge' % name)) + raise exception.ApiError(_("Unknown instance type: %s" % name)) + + +def get_all_types(inactive=0): + """Retrieves non-deleted instance_types. + Pass true as argument if you want deleted instance types returned also.""" + return db.instance_type_get_all(context.get_admin_context(), inactive) + + +def get_all_flavors(): + """retrieves non-deleted flavors. alias for instance_types.get_all_types(). + Pass true as argument if you want deleted instance types returned also.""" + return get_all_types(context.get_admin_context()) + + +def get_instance_type(name): + """Retrieves single instance type by name""" + if name is None: + return FLAGS.default_instance_type + try: + ctxt = context.get_admin_context() + inst_type = db.instance_type_get_by_name(ctxt, name) + return inst_type + except exception.DBError: + raise exception.ApiError(_("Unknown instance type: %s" % name)) def get_by_type(instance_type): - """Build instance data structure and save it to the data store.""" + """retrieve instance type name""" if instance_type is None: return FLAGS.default_instance_type - if instance_type not in INSTANCE_TYPES: - raise exception.ApiError(_("Unknown instance type: %s") % \ - instance_type, "Invalid") - return instance_type + + try: + ctxt = context.get_admin_context() + inst_type = db.instance_type_get_by_name(ctxt, instance_type) + return inst_type['name'] + except exception.DBError, e: + LOG.exception(_('DB error: %s' % e)) + raise exception.ApiError(_("Unknown instance type: %s" %\ + instance_type)) def get_by_flavor_id(flavor_id): - for instance_type, details in INSTANCE_TYPES.iteritems(): - if details['flavorid'] == flavor_id: - return instance_type - return FLAGS.default_instance_type + """retrieve instance type's name by flavor_id""" + if flavor_id is None: + return FLAGS.default_instance_type + try: + ctxt = context.get_admin_context() + flavor = db.instance_type_get_by_flavor_id(ctxt, flavor_id) + return flavor['name'] + except exception.DBError, e: + LOG.exception(_('DB error: %s' % e)) + raise exception.ApiError(_("Unknown flavor: %s" % flavor_id)) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index d3d8a617e..b0c301925 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2,6 +2,7 @@ # Copyright 2010 United States Government as represented by the # Administrator of the National Aeronautics and Space Administration. +# Copyright 2011 Justin Santa Barbara # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may @@ -35,11 +36,16 @@ terminating it. """ import datetime +import os import random import string import socket +import sys +import tempfile import functools +from eventlet import greenthread + from nova import exception from nova import flags from nova import log as logging @@ -47,6 +53,7 @@ from nova import manager from nova import rpc from nova import utils from nova.compute import power_state +from nova.virt import driver FLAGS = flags.FLAGS flags.DEFINE_string('instances_path', '$state_path/instances', @@ -60,6 +67,12 @@ flags.DEFINE_integer('password_length', 12, flags.DEFINE_string('console_host', socket.gethostname(), 'Console proxy host to use to connect to instances on' 'this host.') +flags.DEFINE_integer('live_migration_retry_count', 30, + "Retry count needed in live_migration." + " sleep 1 sec for each count") +flags.DEFINE_integer("rescue_timeout", 0, + "Automatically unrescue an instance after N seconds." + " Set to 0 to disable.") LOG = logging.getLogger('nova.compute.manager') @@ -98,7 +111,7 @@ def checks_instance_lock(function): return decorated_function -class ComputeManager(manager.Manager): +class ComputeManager(manager.SchedulerDependentManager): """Manages the running instances from creation to destruction.""" @@ -108,10 +121,19 @@ class ComputeManager(manager.Manager): # and redocument the module docstring if not compute_driver: compute_driver = FLAGS.compute_driver - self.driver = utils.import_object(compute_driver) + + try: + self.driver = utils.check_isinstance( + utils.import_object(compute_driver), + driver.ComputeDriver) + except ImportError as e: + LOG.error(_("Unable to load the virtualization driver: %s") % (e)) + sys.exit(1) + self.network_manager = utils.import_object(FLAGS.network_manager) self.volume_manager = utils.import_object(FLAGS.volume_manager) - super(ComputeManager, self).__init__(*args, **kwargs) + super(ComputeManager, self).__init__(service_name="compute", + *args, **kwargs) def init_host(self): """Do any initialization that needs to be run if this is a @@ -130,7 +152,7 @@ class ComputeManager(manager.Manager): state = power_state.FAILED self.db.instance_set_state(context, instance_id, state) - def get_console_topic(self, context, **_kwargs): + def get_console_topic(self, context, **kwargs): """Retrieves the console host for a project on this host Currently this is just set in the flags for each compute host.""" @@ -139,7 +161,7 @@ class ComputeManager(manager.Manager): FLAGS.console_topic, FLAGS.console_host) - def get_network_topic(self, context, **_kwargs): + def get_network_topic(self, context, **kwargs): """Retrieves the network host for a project on this host""" # TODO(vish): This method should be memoized. This will make # the call to get_network_host cheaper, so that @@ -158,13 +180,13 @@ class ComputeManager(manager.Manager): @exception.wrap_exception def refresh_security_group_rules(self, context, - security_group_id, **_kwargs): + security_group_id, **kwargs): """This call passes straight through to the virtualization driver.""" return self.driver.refresh_security_group_rules(security_group_id) @exception.wrap_exception def refresh_security_group_members(self, context, - security_group_id, **_kwargs): + security_group_id, **kwargs): """This call passes straight through to the virtualization driver.""" return self.driver.refresh_security_group_members(security_group_id) @@ -173,18 +195,18 @@ class ComputeManager(manager.Manager): """This call passes straight through to the virtualization driver.""" return self.driver.refresh_provider_fw_rules() - @exception.wrap_exception - def run_instance(self, context, instance_id, **_kwargs): + def run_instance(self, context, instance_id, **kwargs): """Launch a new instance with specified options.""" context = context.elevated() instance_ref = self.db.instance_get(context, instance_id) + instance_ref.injected_files = kwargs.get('injected_files', []) if instance_ref['name'] in self.driver.list_instances(): raise exception.Error(_("Instance has already been created")) LOG.audit(_("instance %s: starting..."), instance_id, context=context) self.db.instance_update(context, instance_id, - {'host': self.host}) + {'host': self.host, 'launched_on': self.host}) self.db.instance_set_state(context, instance_id, @@ -218,9 +240,10 @@ class ComputeManager(manager.Manager): self.db.instance_update(context, instance_id, {'launched_at': now}) - except Exception: # pylint: disable-msg=W0702 - LOG.exception(_("instance %s: Failed to spawn"), instance_id, - context=context) + except Exception: # pylint: disable=W0702 + LOG.exception(_("Instance '%s' failed to spawn. Is virtualization" + " enabled in the BIOS?"), instance_id, + context=context) self.db.instance_set_state(context, instance_id, power_state.SHUTDOWN) @@ -328,28 +351,38 @@ class ComputeManager(manager.Manager): """Set the root/admin password for an instance on this server.""" context = context.elevated() instance_ref = self.db.instance_get(context, instance_id) - if instance_ref['state'] != power_state.RUNNING: - logging.warn('trying to reset the password on a non-running ' - 'instance: %s (state: %s expected: %s)', - instance_ref['id'], - instance_ref['state'], - power_state.RUNNING) - - logging.debug('instance %s: setting admin password', + instance_id = instance_ref['id'] + instance_state = instance_ref['state'] + expected_state = power_state.RUNNING + if instance_state != expected_state: + LOG.warn(_('trying to reset the password on a non-running ' + 'instance: %(instance_id)s (state: %(instance_state)s ' + 'expected: %(expected_state)s)') % locals()) + LOG.audit(_('instance %s: setting admin password'), instance_ref['name']) if new_pass is None: # Generate a random password - new_pass = self._generate_password(FLAGS.password_length) - + new_pass = utils.generate_password(FLAGS.password_length) self.driver.set_admin_password(instance_ref, new_pass) self._update_state(context, instance_id) - def _generate_password(self, length=20): - """Generate a random sequence of letters and digits - to be used as a password. - """ - chrs = string.letters + string.digits - return "".join([random.choice(chrs) for i in xrange(length)]) + @exception.wrap_exception + @checks_instance_lock + def inject_file(self, context, instance_id, path, file_contents): + """Write a file to the specified path on an instance on this server""" + context = context.elevated() + instance_ref = self.db.instance_get(context, instance_id) + instance_id = instance_ref['id'] + instance_state = instance_ref['state'] + expected_state = power_state.RUNNING + if instance_state != expected_state: + LOG.warn(_('trying to inject a file into a non-running ' + 'instance: %(instance_id)s (state: %(instance_state)s ' + 'expected: %(expected_state)s)') % locals()) + nm = instance_ref['name'] + msg = _('instance %(nm)s: injecting file to %(path)s') % locals() + LOG.audit(msg) + self.driver.inject_file(instance_ref, path, file_contents) @exception.wrap_exception @checks_instance_lock @@ -358,12 +391,19 @@ class ComputeManager(manager.Manager): context = context.elevated() instance_ref = self.db.instance_get(context, instance_id) LOG.audit(_('instance %s: rescuing'), instance_id, context=context) - self.db.instance_set_state(context, - instance_id, - power_state.NOSTATE, - 'rescuing') + self.db.instance_set_state( + context, + instance_id, + power_state.NOSTATE, + 'rescuing') self.network_manager.setup_compute_network(context, instance_id) - self.driver.rescue(instance_ref) + self.driver.rescue( + instance_ref, + lambda result: self._update_state_callback( + self, + context, + instance_id, + result)) self._update_state(context, instance_id) @exception.wrap_exception @@ -373,11 +413,18 @@ class ComputeManager(manager.Manager): context = context.elevated() instance_ref = self.db.instance_get(context, instance_id) LOG.audit(_('instance %s: unrescuing'), instance_id, context=context) - self.db.instance_set_state(context, - instance_id, - power_state.NOSTATE, - 'unrescuing') - self.driver.unrescue(instance_ref) + self.db.instance_set_state( + context, + instance_id, + power_state.NOSTATE, + 'unrescuing') + self.driver.unrescue( + instance_ref, + lambda result: self._update_state_callback( + self, + context, + instance_id, + result)) self._update_state(context, instance_id) @staticmethod @@ -387,6 +434,141 @@ class ComputeManager(manager.Manager): @exception.wrap_exception @checks_instance_lock + def confirm_resize(self, context, instance_id, migration_id): + """Destroys the source instance""" + context = context.elevated() + instance_ref = self.db.instance_get(context, instance_id) + migration_ref = self.db.migration_get(context, migration_id) + self.driver.destroy(instance_ref) + + @exception.wrap_exception + @checks_instance_lock + def revert_resize(self, context, instance_id, migration_id): + """Destroys the new instance on the destination machine, + reverts the model changes, and powers on the old + instance on the source machine""" + instance_ref = self.db.instance_get(context, instance_id) + migration_ref = self.db.migration_get(context, migration_id) + + self.driver.destroy(instance_ref) + topic = self.db.queue_get_for(context, FLAGS.compute_topic, + instance_ref['host']) + rpc.cast(context, topic, + {'method': 'finish_revert_resize', + 'args': { + 'migration_id': migration_ref['id'], + 'instance_id': instance_id, }, + }) + + @exception.wrap_exception + @checks_instance_lock + def finish_revert_resize(self, context, instance_id, migration_id): + """Finishes the second half of reverting a resize, powering back on + the source instance and reverting the resized attributes in the + database""" + instance_ref = self.db.instance_get(context, instance_id) + migration_ref = self.db.migration_get(context, migration_id) + instance_type = self.db.instance_type_get_by_flavor_id(context, + migration_ref['old_flavor_id']) + + # Just roll back the record. There's no need to resize down since + # the 'old' VM already has the preferred attributes + self.db.instance_update(context, instance_id, + dict(memory_mb=instance_type['memory_mb'], + vcpus=instance_type['vcpus'], + local_gb=instance_type['local_gb'])) + + self.driver.revert_resize(instance_ref) + self.db.migration_update(context, migration_id, + {'status': 'reverted'}) + + @exception.wrap_exception + @checks_instance_lock + def prep_resize(self, context, instance_id, flavor_id): + """Initiates the process of moving a running instance to another + host, possibly changing the RAM and disk size in the process""" + context = context.elevated() + instance_ref = self.db.instance_get(context, instance_id) + if instance_ref['host'] == FLAGS.host: + raise exception.Error(_( + 'Migration error: destination same as source!')) + + instance_type = self.db.instance_type_get_by_flavor_id(context, + flavor_id) + migration_ref = self.db.migration_create(context, + {'instance_id': instance_id, + 'source_compute': instance_ref['host'], + 'dest_compute': FLAGS.host, + 'dest_host': self.driver.get_host_ip_addr(), + 'old_flavor_id': instance_type['flavorid'], + 'new_flavor_id': flavor_id, + 'status': 'pre-migrating'}) + + LOG.audit(_('instance %s: migrating to '), instance_id, + context=context) + topic = self.db.queue_get_for(context, FLAGS.compute_topic, + instance_ref['host']) + rpc.cast(context, topic, + {'method': 'resize_instance', + 'args': { + 'migration_id': migration_ref['id'], + 'instance_id': instance_id, }, + }) + + @exception.wrap_exception + @checks_instance_lock + def resize_instance(self, context, instance_id, migration_id): + """Starts the migration of a running instance to another host""" + migration_ref = self.db.migration_get(context, migration_id) + instance_ref = self.db.instance_get(context, instance_id) + self.db.migration_update(context, migration_id, + {'status': 'migrating', }) + + disk_info = self.driver.migrate_disk_and_power_off(instance_ref, + migration_ref['dest_host']) + self.db.migration_update(context, migration_id, + {'status': 'post-migrating', }) + + service = self.db.service_get_by_host_and_topic(context, + migration_ref['dest_compute'], FLAGS.compute_topic) + topic = self.db.queue_get_for(context, FLAGS.compute_topic, + migration_ref['dest_compute']) + rpc.cast(context, topic, + {'method': 'finish_resize', + 'args': { + 'migration_id': migration_id, + 'instance_id': instance_id, + 'disk_info': disk_info, }, + }) + + @exception.wrap_exception + @checks_instance_lock + def finish_resize(self, context, instance_id, migration_id, disk_info): + """Completes the migration process by setting up the newly transferred + disk and turning on the instance on its new host machine""" + migration_ref = self.db.migration_get(context, migration_id) + instance_ref = self.db.instance_get(context, + migration_ref['instance_id']) + # TODO(mdietz): apply the rest of the instance_type attributes going + # after they're supported + instance_type = self.db.instance_type_get_by_flavor_id(context, + migration_ref['new_flavor_id']) + self.db.instance_update(context, instance_id, + dict(instance_type=instance_type['name'], + memory_mb=instance_type['memory_mb'], + vcpus=instance_type['vcpus'], + local_gb=instance_type['local_gb'])) + + # reload the updated instance ref + # FIXME(mdietz): is there reload functionality? + instance_ref = self.db.instance_get(context, instance_id) + self.driver.finish_resize(instance_ref, disk_info) + + self.db.migration_update(context, migration_id, + {'status': 'finished', }) + + @exception.wrap_exception + @checks_instance_lock def pause_instance(self, context, instance_id): """Pause an instance on this server.""" context = context.elevated() @@ -503,6 +685,30 @@ class ComputeManager(manager.Manager): instance_ref = self.db.instance_get(context, instance_id) return instance_ref['locked'] + @checks_instance_lock + def reset_network(self, context, instance_id): + """ + Reset networking on the instance. + + """ + context = context.elevated() + instance_ref = self.db.instance_get(context, instance_id) + LOG.debug(_('instance %s: reset network'), instance_id, + context=context) + self.driver.reset_network(instance_ref) + + @checks_instance_lock + def inject_network_info(self, context, instance_id): + """ + Inject network info for the instance. + + """ + context = context.elevated() + instance_ref = self.db.instance_get(context, instance_id) + LOG.debug(_('instance %s: inject network info'), instance_id, + context=context) + self.driver.inject_network_info(instance_ref) + @exception.wrap_exception def get_console_output(self, context, instance_id): """Send the console output for an instance.""" @@ -516,11 +722,20 @@ class ComputeManager(manager.Manager): def get_ajax_console(self, context, instance_id): """Return connection information for an ajax console""" context = context.elevated() - logging.debug(_("instance %s: getting ajax console"), instance_id) + LOG.debug(_("instance %s: getting ajax console"), instance_id) instance_ref = self.db.instance_get(context, instance_id) return self.driver.get_ajax_console(instance_ref) + @exception.wrap_exception + def get_vnc_console(self, context, instance_id): + """Return connection information for an vnc console.""" + context = context.elevated() + LOG.debug(_("instance %s: getting vnc console"), instance_id) + instance_ref = self.db.instance_get(context, instance_id) + + return self.driver.get_vnc_console(instance_ref) + @checks_instance_lock def attach_volume(self, context, instance_id, volume_id, mountpoint): """Attach a volume to an instance.""" @@ -538,7 +753,7 @@ class ComputeManager(manager.Manager): volume_id, instance_id, mountpoint) - except Exception as exc: # pylint: disable-msg=W0702 + except Exception as exc: # pylint: disable=W0702 # NOTE(vish): The inline callback eats the exception info so we # log the traceback here and reraise the same # ecxception below. @@ -569,3 +784,333 @@ class ComputeManager(manager.Manager): self.volume_manager.remove_compute_volume(context, volume_id) self.db.volume_detached(context, volume_id) return True + + @exception.wrap_exception + def compare_cpu(self, context, cpu_info): + """Checks the host cpu is compatible to a cpu given by xml. + + :param context: security context + :param cpu_info: json string obtained from virConnect.getCapabilities + :returns: See driver.compare_cpu + + """ + return self.driver.compare_cpu(cpu_info) + + @exception.wrap_exception + def create_shared_storage_test_file(self, context): + """Makes tmpfile under FLAGS.instance_path. + + This method enables compute nodes to recognize that they mounts + same shared storage. (create|check|creanup)_shared_storage_test_file() + is a pair. + + :param context: security context + :returns: tmpfile name(basename) + + """ + + dirpath = FLAGS.instances_path + fd, tmp_file = tempfile.mkstemp(dir=dirpath) + LOG.debug(_("Creating tmpfile %s to notify to other " + "compute nodes that they should mount " + "the same storage.") % tmp_file) + os.close(fd) + return os.path.basename(tmp_file) + + @exception.wrap_exception + def check_shared_storage_test_file(self, context, filename): + """Confirms existence of the tmpfile under FLAGS.instances_path. + + :param context: security context + :param filename: confirm existence of FLAGS.instances_path/thisfile + + """ + + tmp_file = os.path.join(FLAGS.instances_path, filename) + if not os.path.exists(tmp_file): + raise exception.NotFound(_('%s not found') % tmp_file) + + @exception.wrap_exception + def cleanup_shared_storage_test_file(self, context, filename): + """Removes existence of the tmpfile under FLAGS.instances_path. + + :param context: security context + :param filename: remove existence of FLAGS.instances_path/thisfile + + """ + + tmp_file = os.path.join(FLAGS.instances_path, filename) + os.remove(tmp_file) + + @exception.wrap_exception + def update_available_resource(self, context): + """See comments update_resource_info. + + :param context: security context + :returns: See driver.update_available_resource() + + """ + + return self.driver.update_available_resource(context, self.host) + + def pre_live_migration(self, context, instance_id, time=None): + """Preparations for live migration at dest host. + + :param context: security context + :param instance_id: nova.db.sqlalchemy.models.Instance.Id + + """ + + if not time: + time = greenthread + + # Getting instance info + instance_ref = self.db.instance_get(context, instance_id) + ec2_id = instance_ref['hostname'] + + # Getting fixed ips + fixed_ip = self.db.instance_get_fixed_address(context, instance_id) + if not fixed_ip: + msg = _("%(instance_id)s(%(ec2_id)s) does not have fixed_ip.") + raise exception.NotFound(msg % locals()) + + # If any volume is mounted, prepare here. + if not instance_ref['volumes']: + LOG.info(_("%s has no volume."), ec2_id) + else: + for v in instance_ref['volumes']: + self.volume_manager.setup_compute_volume(context, v['id']) + + # Bridge settings. + # Call this method prior to ensure_filtering_rules_for_instance, + # since bridge is not set up, ensure_filtering_rules_for instance + # fails. + # + # Retry operation is necessary because continuously request comes, + # concorrent request occurs to iptables, then it complains. + max_retry = FLAGS.live_migration_retry_count + for cnt in range(max_retry): + try: + self.network_manager.setup_compute_network(context, + instance_id) + break + except exception.ProcessExecutionError: + if cnt == max_retry - 1: + raise + else: + LOG.warn(_("setup_compute_network() failed %(cnt)d." + "Retry up to %(max_retry)d for %(ec2_id)s.") + % locals()) + time.sleep(1) + + # Creating filters to hypervisors and firewalls. + # An example is that nova-instance-instance-xxx, + # which is written to libvirt.xml(Check "virsh nwfilter-list") + # This nwfilter is necessary on the destination host. + # In addition, this method is creating filtering rule + # onto destination host. + self.driver.ensure_filtering_rules_for_instance(instance_ref) + + def live_migration(self, context, instance_id, dest): + """Executing live migration. + + :param context: security context + :param instance_id: nova.db.sqlalchemy.models.Instance.Id + :param dest: destination host + + """ + + # Get instance for error handling. + instance_ref = self.db.instance_get(context, instance_id) + i_name = instance_ref.name + + try: + # Checking volume node is working correctly when any volumes + # are attached to instances. + if instance_ref['volumes']: + rpc.call(context, + FLAGS.volume_topic, + {"method": "check_for_export", + "args": {'instance_id': instance_id}}) + + # Asking dest host to preparing live migration. + rpc.call(context, + self.db.queue_get_for(context, FLAGS.compute_topic, dest), + {"method": "pre_live_migration", + "args": {'instance_id': instance_id}}) + + except Exception: + msg = _("Pre live migration for %(i_name)s failed at %(dest)s") + LOG.error(msg % locals()) + self.recover_live_migration(context, instance_ref) + raise + + # Executing live migration + # live_migration might raises exceptions, but + # nothing must be recovered in this version. + self.driver.live_migration(context, instance_ref, dest, + self.post_live_migration, + self.recover_live_migration) + + def post_live_migration(self, ctxt, instance_ref, dest): + """Post operations for live migration. + + This method is called from live_migration + and mainly updating database record. + + :param ctxt: security context + :param instance_id: nova.db.sqlalchemy.models.Instance.Id + :param dest: destination host + + """ + + LOG.info(_('post_live_migration() is started..')) + instance_id = instance_ref['id'] + + # Detaching volumes. + try: + for vol in self.db.volume_get_all_by_instance(ctxt, instance_id): + self.volume_manager.remove_compute_volume(ctxt, vol['id']) + except exception.NotFound: + pass + + # Releasing vlan. + # (not necessary in current implementation?) + + # Releasing security group ingress rule. + self.driver.unfilter_instance(instance_ref) + + # Database updating. + i_name = instance_ref.name + try: + # Not return if floating_ip is not found, otherwise, + # instance never be accessible.. + floating_ip = self.db.instance_get_floating_address(ctxt, + instance_id) + if not floating_ip: + LOG.info(_('No floating_ip is found for %s.'), i_name) + else: + floating_ip_ref = self.db.floating_ip_get_by_address(ctxt, + floating_ip) + self.db.floating_ip_update(ctxt, + floating_ip_ref['address'], + {'host': dest}) + except exception.NotFound: + LOG.info(_('No floating_ip is found for %s.'), i_name) + except: + LOG.error(_("Live migration: Unexpected error:" + "%s cannot inherit floating ip..") % i_name) + + # Restore instance/volume state + self.recover_live_migration(ctxt, instance_ref, dest) + + LOG.info(_('Migrating %(i_name)s to %(dest)s finished successfully.') + % locals()) + LOG.info(_("You may see the error \"libvirt: QEMU error: " + "Domain not found: no domain with matching name.\" " + "This error can be safely ignored.")) + + def recover_live_migration(self, ctxt, instance_ref, host=None): + """Recovers Instance/volume state from migrating -> running. + + :param ctxt: security context + :param instance_id: nova.db.sqlalchemy.models.Instance.Id + :param host: + DB column value is updated by this hostname. + if none, the host instance currently running is selected. + + """ + + if not host: + host = instance_ref['host'] + + self.db.instance_update(ctxt, + instance_ref['id'], + {'state_description': 'running', + 'state': power_state.RUNNING, + 'host': host}) + + for volume in instance_ref['volumes']: + self.db.volume_update(ctxt, volume['id'], {'status': 'in-use'}) + + def periodic_tasks(self, context=None): + """Tasks to be run at a periodic interval.""" + error_list = super(ComputeManager, self).periodic_tasks(context) + if error_list is None: + error_list = [] + + try: + if FLAGS.rescue_timeout > 0: + self.driver.poll_rescued_instances(FLAGS.rescue_timeout) + except Exception as ex: + LOG.warning(_("Error during poll_rescued_instances: %s"), + unicode(ex)) + error_list.append(ex) + + try: + self._poll_instance_states(context) + except Exception as ex: + LOG.warning(_("Error during instance poll: %s"), + unicode(ex)) + error_list.append(ex) + + return error_list + + def _poll_instance_states(self, context): + vm_instances = self.driver.list_instances_detail() + vm_instances = dict((vm.name, vm) for vm in vm_instances) + + # Keep a list of VMs not in the DB, cross them off as we find them + vms_not_found_in_db = list(vm_instances.keys()) + + db_instances = self.db.instance_get_all_by_host(context, self.host) + + for db_instance in db_instances: + name = db_instance['name'] + db_state = db_instance['state'] + vm_instance = vm_instances.get(name) + + if vm_instance is None: + # NOTE(justinsb): We have to be very careful here, because a + # concurrent operation could be in progress (e.g. a spawn) + if db_state == power_state.NOSTATE: + # Assume that NOSTATE => spawning + # TODO(justinsb): This does mean that if we crash during a + # spawn, the machine will never leave the spawning state, + # but this is just the way nova is; this function isn't + # trying to correct that problem. + # We could have a separate task to correct this error. + # TODO(justinsb): What happens during a live migration? + LOG.info(_("Found instance '%(name)s' in DB but no VM. " + "State=%(db_state)s, so assuming spawn is in " + "progress.") % locals()) + vm_state = db_state + else: + LOG.info(_("Found instance '%(name)s' in DB but no VM. " + "State=%(db_state)s, so setting state to " + "shutoff.") % locals()) + vm_state = power_state.SHUTOFF + else: + vm_state = vm_instance.state + vms_not_found_in_db.remove(name) + + if vm_state != db_state: + LOG.info(_("DB/VM state mismatch. Changing state from " + "'%(db_state)s' to '%(vm_state)s'") % locals()) + self.db.instance_set_state(context, + db_instance['id'], + vm_state) + + if vm_state == power_state.SHUTOFF: + # TODO(soren): This is what the compute manager does when you + # terminate an instance. At some point I figure we'll have a + # "terminated" state and some sort of cleanup job that runs + # occasionally, cleaning them out. + self.db.instance_destroy(context, db_instance['id']) + + # Are there VMs not in the DB? + for vm_not_found_in_db in vms_not_found_in_db: + name = vm_not_found_in_db + # TODO(justinsb): What to do here? Adopt it? Shut it down? + LOG.warning(_("Found VM not in DB: '%(name)s'. Ignoring") + % locals()) diff --git a/nova/compute/power_state.py b/nova/compute/power_state.py index adfc2dff0..ef013b2ef 100644 --- a/nova/compute/power_state.py +++ b/nova/compute/power_state.py @@ -2,6 +2,7 @@ # Copyright 2010 United States Government as represented by the # Administrator of the National Aeronautics and Space Administration. +# Copyright 2011 Justin Santa Barbara # All Rights Reserved. # Copyright (c) 2010 Citrix Systems, Inc. # @@ -19,6 +20,7 @@ """The various power states that a VM can be in.""" +#NOTE(justinsb): These are the virDomainState values from libvirt NOSTATE = 0x00 RUNNING = 0x01 BLOCKED = 0x02 @@ -29,9 +31,10 @@ CRASHED = 0x06 SUSPENDED = 0x07 FAILED = 0x08 - -def name(code): - d = { +# TODO(justinsb): Power state really needs to be a proper class, +# so that we're not locked into the libvirt status codes and can put mapping +# logic here rather than spread throughout the code +_STATE_MAP = { NOSTATE: 'pending', RUNNING: 'running', BLOCKED: 'blocked', @@ -41,4 +44,11 @@ def name(code): CRASHED: 'crashed', SUSPENDED: 'suspended', FAILED: 'failed to spawn'} - return d[code] + + +def name(code): + return _STATE_MAP[code] + + +def valid_states(): + return _STATE_MAP.keys() |
