diff options
| author | Sandy Walsh <sandy.walsh@rackspace.com> | 2011-03-17 18:54:16 -0700 |
|---|---|---|
| committer | Sandy Walsh <sandy.walsh@rackspace.com> | 2011-03-17 18:54:16 -0700 |
| commit | 23efe8d14973a7c94de167562340938ba00d043b (patch) | |
| tree | 4e383662f4d11763684901e454025ec9c9297543 /nova/compute | |
| parent | 609a912fa8a816c1f47140489dcc1131356cd67c (diff) | |
| parent | abc6c82449dfc46a33dcd8190840e51f44b5b930 (diff) | |
| download | nova-23efe8d14973a7c94de167562340938ba00d043b.tar.gz nova-23efe8d14973a7c94de167562340938ba00d043b.tar.xz nova-23efe8d14973a7c94de167562340938ba00d043b.zip | |
refactored out middleware, now it's a decorator on service.api
Diffstat (limited to 'nova/compute')
| -rw-r--r-- | nova/compute/api.py | 61 | ||||
| -rw-r--r-- | nova/compute/manager.py | 269 |
2 files changed, 311 insertions, 19 deletions
diff --git a/nova/compute/api.py b/nova/compute/api.py index f4bfe720c..9fb4c8ae2 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -34,6 +34,7 @@ from nova import rpc from nova import utils from nova import volume from nova.compute import instance_types +from nova.scheduler import api as scheduler_api from nova.db import base FLAGS = flags.FLAGS @@ -80,13 +81,32 @@ class API(base.Base): topic, {"method": "get_network_topic", "args": {'fake': 1}}) + def _check_injected_file_quota(self, context, injected_files): + """ + Enforce quota limits on injected files + + Raises a QuotaError if any limit is exceeded + """ + if injected_files is None: + return + limit = quota.allowed_injected_files(context) + if len(injected_files) > limit: + raise quota.QuotaError(code="OnsetFileLimitExceeded") + path_limit = quota.allowed_injected_file_path_bytes(context) + content_limit = quota.allowed_injected_file_content_bytes(context) + for path, content in injected_files: + if len(path) > path_limit: + raise quota.QuotaError(code="OnsetFilePathLimitExceeded") + if len(content) > content_limit: + raise quota.QuotaError(code="OnsetFileContentLimitExceeded") + def create(self, context, instance_type, image_id, kernel_id=None, ramdisk_id=None, min_count=1, max_count=1, display_name='', display_description='', key_name=None, key_data=None, security_group='default', availability_zone=None, user_data=None, metadata=[], - onset_files=None): + injected_files=None): """Create the number of instances requested if quota and other arguments check out ok.""" @@ -124,11 +144,18 @@ class API(base.Base): LOG.warn(msg) raise quota.QuotaError(msg, "MetadataLimitExceeded") + self._check_injected_file_quota(context, injected_files) + image = self.image_service.show(context, image_id) + + os_type = None + if 'properties' in image and 'os_type' in image['properties']: + os_type = image['properties']['os_type'] + if kernel_id is None: - kernel_id = image.get('kernel_id', None) + kernel_id = image['properties'].get('kernel_id', None) if ramdisk_id is None: - ramdisk_id = image.get('ramdisk_id', None) + ramdisk_id = image['properties'].get('ramdisk_id', None) # FIXME(sirp): is there a way we can remove null_kernel? # No kernel and ramdisk for raw images if kernel_id == str(FLAGS.null_kernel): @@ -165,6 +192,7 @@ class API(base.Base): 'image_id': image_id, 'kernel_id': kernel_id or '', 'ramdisk_id': ramdisk_id or '', + 'state': 0, 'state_description': 'scheduling', 'user_id': context.user_id, 'project_id': context.project_id, @@ -180,7 +208,8 @@ class API(base.Base): 'key_data': key_data, 'locked': False, 'metadata': metadata, - 'availability_zone': availability_zone} + 'availability_zone': availability_zone, + 'os_type': os_type} elevated = context.elevated() instances = [] LOG.debug(_("Going to run %s instances..."), num_instances) @@ -218,7 +247,7 @@ class API(base.Base): "args": {"topic": FLAGS.compute_topic, "instance_id": instance_id, "availability_zone": availability_zone, - "onset_files": onset_files}}) + "injected_files": injected_files}}) for group_id in security_groups: self.trigger_security_group_members_refresh(elevated, group_id) @@ -314,6 +343,7 @@ class API(base.Base): rv = self.db.instance_update(context, instance_id, kwargs) return dict(rv.iteritems()) + #@scheduler_api.reroute_if_not_found("delete") def delete(self, context, instance_id): LOG.debug(_("Going to try to terminate %s"), instance_id) try: @@ -343,9 +373,18 @@ class API(base.Base): def get(self, context, instance_id): """Get a single instance with the given ID.""" + LOG.debug("*** COMPUTE.API::GET") rv = self.db.instance_get(context, instance_id) + LOG.debug("*** COMPUTE.API::GET OUT CLEAN") return dict(rv.iteritems()) + @scheduler_api.reroute_if_not_found("get") + def routing_get(self, context, instance_id): + """Use this method instead of get() if this is the only + operation you intend to to. It will route to novaclient.get + if the instance is not found.""" + return self.get(context, instance_id) + def get_all(self, context, project_id=None, reservation_id=None, fixed_ip=None): """Get all instances, possibly filtered by one of the @@ -463,14 +502,17 @@ class API(base.Base): "args": {"topic": FLAGS.compute_topic, "instance_id": instance_id, }},) + #@scheduler_api.reroute_if_not_found("pause") def pause(self, context, instance_id): """Pause the given instance.""" self._cast_compute_message('pause_instance', context, instance_id) + #@scheduler_api.reroute_if_not_found("unpause") def unpause(self, context, instance_id): """Unpause the given instance.""" self._cast_compute_message('unpause_instance', context, instance_id) + #@scheduler_api.reroute_if_not_found("diagnostics") def get_diagnostics(self, context, instance_id): """Retrieve diagnostics for the given instance.""" return self._call_compute_message( @@ -482,25 +524,30 @@ class API(base.Base): """Retrieve actions for the given instance.""" return self.db.instance_get_actions(context, instance_id) + #@scheduler_api.reroute_if_not_found("suspend") def suspend(self, context, instance_id): """suspend the instance with instance_id""" self._cast_compute_message('suspend_instance', context, instance_id) + #@scheduler_api.reroute_if_not_found("resume") def resume(self, context, instance_id): """resume the instance with instance_id""" self._cast_compute_message('resume_instance', context, instance_id) + #@scheduler_api.reroute_if_not_found("rescue") def rescue(self, context, instance_id): """Rescue the given instance.""" self._cast_compute_message('rescue_instance', context, instance_id) + #@scheduler_api.reroute_if_not_found("unrescue") def unrescue(self, context, instance_id): """Unrescue the given instance.""" self._cast_compute_message('unrescue_instance', context, instance_id) - def set_admin_password(self, context, instance_id): + def set_admin_password(self, context, instance_id, password=None): """Set the root/admin password for the given instance.""" - self._cast_compute_message('set_admin_password', context, instance_id) + self._cast_compute_message('set_admin_password', context, instance_id, + password) def inject_file(self, context, instance_id): """Write a file to the given instance.""" diff --git a/nova/compute/manager.py b/nova/compute/manager.py index ebe1ce6f0..a31df0895 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -34,17 +34,19 @@ terminating it. :func:`nova.utils.import_object` """ -import base64 import datetime +import os import random import string import socket +import tempfile +import time import functools from nova import exception from nova import flags from nova import log as logging -from nova import scheduler_manager +from nova import manager from nova import rpc from nova import utils from nova.compute import power_state @@ -61,6 +63,9 @@ flags.DEFINE_integer('password_length', 12, flags.DEFINE_string('console_host', socket.gethostname(), 'Console proxy host to use to connect to instances on' 'this host.') +flags.DEFINE_integer('live_migration_retry_count', 30, + ("Retry count needed in live_migration." + " sleep 1 sec for each count")) LOG = logging.getLogger('nova.compute.manager') @@ -99,7 +104,7 @@ def checks_instance_lock(function): return decorated_function -class ComputeManager(scheduler_manager.SchedulerDependentManager): +class ComputeManager(manager.SchedulerDependentManager): """Manages the running instances from creation to destruction.""" @@ -175,14 +180,14 @@ class ComputeManager(scheduler_manager.SchedulerDependentManager): """Launch a new instance with specified options.""" context = context.elevated() instance_ref = self.db.instance_get(context, instance_id) - instance_ref.onset_files = kwargs.get('onset_files', []) + instance_ref.injected_files = kwargs.get('injected_files', []) if instance_ref['name'] in self.driver.list_instances(): raise exception.Error(_("Instance has already been created")) LOG.audit(_("instance %s: starting..."), instance_id, context=context) self.db.instance_update(context, instance_id, - {'host': self.host}) + {'host': self.host, 'launched_on': self.host}) self.db.instance_set_state(context, instance_id, @@ -354,15 +359,10 @@ class ComputeManager(scheduler_manager.SchedulerDependentManager): LOG.warn(_('trying to inject a file into a non-running ' 'instance: %(instance_id)s (state: %(instance_state)s ' 'expected: %(expected_state)s)') % locals()) - # Files/paths *should* be base64-encoded at this point, but - # double-check to make sure. - b64_path = utils.ensure_b64_encoding(path) - b64_contents = utils.ensure_b64_encoding(file_contents) - plain_path = base64.b64decode(b64_path) nm = instance_ref['name'] - msg = _('instance %(nm)s: injecting file to %(plain_path)s') % locals() + msg = _('instance %(nm)s: injecting file to %(path)s') % locals() LOG.audit(msg) - self.driver.inject_file(instance_ref, b64_path, b64_contents) + self.driver.inject_file(instance_ref, path, file_contents) @exception.wrap_exception @checks_instance_lock @@ -724,3 +724,248 @@ class ComputeManager(scheduler_manager.SchedulerDependentManager): self.volume_manager.remove_compute_volume(context, volume_id) self.db.volume_detached(context, volume_id) return True + + @exception.wrap_exception + def compare_cpu(self, context, cpu_info): + """Checks the host cpu is compatible to a cpu given by xml. + + :param context: security context + :param cpu_info: json string obtained from virConnect.getCapabilities + :returns: See driver.compare_cpu + + """ + return self.driver.compare_cpu(cpu_info) + + @exception.wrap_exception + def create_shared_storage_test_file(self, context): + """Makes tmpfile under FLAGS.instance_path. + + This method enables compute nodes to recognize that they mounts + same shared storage. (create|check|creanup)_shared_storage_test_file() + is a pair. + + :param context: security context + :returns: tmpfile name(basename) + + """ + + dirpath = FLAGS.instances_path + fd, tmp_file = tempfile.mkstemp(dir=dirpath) + LOG.debug(_("Creating tmpfile %s to notify to other " + "compute nodes that they should mount " + "the same storage.") % tmp_file) + os.close(fd) + return os.path.basename(tmp_file) + + @exception.wrap_exception + def check_shared_storage_test_file(self, context, filename): + """Confirms existence of the tmpfile under FLAGS.instances_path. + + :param context: security context + :param filename: confirm existence of FLAGS.instances_path/thisfile + + """ + + tmp_file = os.path.join(FLAGS.instances_path, filename) + if not os.path.exists(tmp_file): + raise exception.NotFound(_('%s not found') % tmp_file) + + @exception.wrap_exception + def cleanup_shared_storage_test_file(self, context, filename): + """Removes existence of the tmpfile under FLAGS.instances_path. + + :param context: security context + :param filename: remove existence of FLAGS.instances_path/thisfile + + """ + + tmp_file = os.path.join(FLAGS.instances_path, filename) + os.remove(tmp_file) + + @exception.wrap_exception + def update_available_resource(self, context): + """See comments update_resource_info. + + :param context: security context + :returns: See driver.update_available_resource() + + """ + + return self.driver.update_available_resource(context, self.host) + + def pre_live_migration(self, context, instance_id): + """Preparations for live migration at dest host. + + :param context: security context + :param instance_id: nova.db.sqlalchemy.models.Instance.Id + + """ + + # Getting instance info + instance_ref = self.db.instance_get(context, instance_id) + ec2_id = instance_ref['hostname'] + + # Getting fixed ips + fixed_ip = self.db.instance_get_fixed_address(context, instance_id) + if not fixed_ip: + msg = _("%(instance_id)s(%(ec2_id)s) does not have fixed_ip.") + raise exception.NotFound(msg % locals()) + + # If any volume is mounted, prepare here. + if not instance_ref['volumes']: + LOG.info(_("%s has no volume."), ec2_id) + else: + for v in instance_ref['volumes']: + self.volume_manager.setup_compute_volume(context, v['id']) + + # Bridge settings. + # Call this method prior to ensure_filtering_rules_for_instance, + # since bridge is not set up, ensure_filtering_rules_for instance + # fails. + # + # Retry operation is necessary because continuously request comes, + # concorrent request occurs to iptables, then it complains. + max_retry = FLAGS.live_migration_retry_count + for cnt in range(max_retry): + try: + self.network_manager.setup_compute_network(context, + instance_id) + break + except exception.ProcessExecutionError: + if cnt == max_retry - 1: + raise + else: + LOG.warn(_("setup_compute_network() failed %(cnt)d." + "Retry up to %(max_retry)d for %(ec2_id)s.") + % locals()) + time.sleep(1) + + # Creating filters to hypervisors and firewalls. + # An example is that nova-instance-instance-xxx, + # which is written to libvirt.xml(Check "virsh nwfilter-list") + # This nwfilter is necessary on the destination host. + # In addition, this method is creating filtering rule + # onto destination host. + self.driver.ensure_filtering_rules_for_instance(instance_ref) + + def live_migration(self, context, instance_id, dest): + """Executing live migration. + + :param context: security context + :param instance_id: nova.db.sqlalchemy.models.Instance.Id + :param dest: destination host + + """ + + # Get instance for error handling. + instance_ref = self.db.instance_get(context, instance_id) + i_name = instance_ref.name + + try: + # Checking volume node is working correctly when any volumes + # are attached to instances. + if instance_ref['volumes']: + rpc.call(context, + FLAGS.volume_topic, + {"method": "check_for_export", + "args": {'instance_id': instance_id}}) + + # Asking dest host to preparing live migration. + rpc.call(context, + self.db.queue_get_for(context, FLAGS.compute_topic, dest), + {"method": "pre_live_migration", + "args": {'instance_id': instance_id}}) + + except Exception: + msg = _("Pre live migration for %(i_name)s failed at %(dest)s") + LOG.error(msg % locals()) + self.recover_live_migration(context, instance_ref) + raise + + # Executing live migration + # live_migration might raises exceptions, but + # nothing must be recovered in this version. + self.driver.live_migration(context, instance_ref, dest, + self.post_live_migration, + self.recover_live_migration) + + def post_live_migration(self, ctxt, instance_ref, dest): + """Post operations for live migration. + + This method is called from live_migration + and mainly updating database record. + + :param ctxt: security context + :param instance_id: nova.db.sqlalchemy.models.Instance.Id + :param dest: destination host + + """ + + LOG.info(_('post_live_migration() is started..')) + instance_id = instance_ref['id'] + + # Detaching volumes. + try: + for vol in self.db.volume_get_all_by_instance(ctxt, instance_id): + self.volume_manager.remove_compute_volume(ctxt, vol['id']) + except exception.NotFound: + pass + + # Releasing vlan. + # (not necessary in current implementation?) + + # Releasing security group ingress rule. + self.driver.unfilter_instance(instance_ref) + + # Database updating. + i_name = instance_ref.name + try: + # Not return if floating_ip is not found, otherwise, + # instance never be accessible.. + floating_ip = self.db.instance_get_floating_address(ctxt, + instance_id) + if not floating_ip: + LOG.info(_('No floating_ip is found for %s.'), i_name) + else: + floating_ip_ref = self.db.floating_ip_get_by_address(ctxt, + floating_ip) + self.db.floating_ip_update(ctxt, + floating_ip_ref['address'], + {'host': dest}) + except exception.NotFound: + LOG.info(_('No floating_ip is found for %s.'), i_name) + except: + LOG.error(_("Live migration: Unexpected error:" + "%s cannot inherit floating ip..") % i_name) + + # Restore instance/volume state + self.recover_live_migration(ctxt, instance_ref, dest) + + LOG.info(_('Migrating %(i_name)s to %(dest)s finished successfully.') + % locals()) + LOG.info(_("You may see the error \"libvirt: QEMU error: " + "Domain not found: no domain with matching name.\" " + "This error can be safely ignored.")) + + def recover_live_migration(self, ctxt, instance_ref, host=None): + """Recovers Instance/volume state from migrating -> running. + + :param ctxt: security context + :param instance_id: nova.db.sqlalchemy.models.Instance.Id + :param host: + DB column value is updated by this hostname. + if none, the host instance currently running is selected. + + """ + + if not host: + host = instance_ref['host'] + + self.db.instance_update(ctxt, + instance_ref['id'], + {'state_description': 'running', + 'state': power_state.RUNNING, + 'host': host}) + + for volume in instance_ref['volumes']: + self.db.volume_update(ctxt, volume['id'], {'status': 'in-use'}) |
