refactored out middleware, now it's a decorator on service.api

author: Sandy Walsh <sandy.walsh@rackspace.com> 2011-03-17 18:54:16 -0700
committer: Sandy Walsh <sandy.walsh@rackspace.com> 2011-03-17 18:54:16 -0700
commit: 23efe8d14973a7c94de167562340938ba00d043b (patch)
tree: 4e383662f4d11763684901e454025ec9c9297543 /nova/compute
parent: 609a912fa8a816c1f47140489dcc1131356cd67c (diff)
parent: abc6c82449dfc46a33dcd8190840e51f44b5b930 (diff)
2 files changed, 311 insertions, 19 deletions
diff --git a/nova/compute/api.py b/nova/compute/api.py
index f4bfe720c..9fb4c8ae2 100644
--- a/nova/compute/api.py
+++ b/nova/compute/api.py
@@ -34,6 +34,7 @@ from nova import rpc
 from nova import utils
 from nova import volume
 from nova.compute import instance_types
+from nova.scheduler import api as scheduler_api
 from nova.db import base
 
 FLAGS = flags.FLAGS
@@ -80,13 +81,32 @@ class API(base.Base):
                         topic,
                         {"method": "get_network_topic", "args": {'fake': 1}})
 
+    def _check_injected_file_quota(self, context, injected_files):
+        """
+        Enforce quota limits on injected files
+
+        Raises a QuotaError if any limit is exceeded
+        """
+        if injected_files is None:
+            return
+        limit = quota.allowed_injected_files(context)
+        if len(injected_files) > limit:
+            raise quota.QuotaError(code="OnsetFileLimitExceeded")
+        path_limit = quota.allowed_injected_file_path_bytes(context)
+        content_limit = quota.allowed_injected_file_content_bytes(context)
+        for path, content in injected_files:
+            if len(path) > path_limit:
+                raise quota.QuotaError(code="OnsetFilePathLimitExceeded")
+            if len(content) > content_limit:
+                raise quota.QuotaError(code="OnsetFileContentLimitExceeded")
+
     def create(self, context, instance_type,
                image_id, kernel_id=None, ramdisk_id=None,
                min_count=1, max_count=1,
                display_name='', display_description='',
                key_name=None, key_data=None, security_group='default',
                availability_zone=None, user_data=None, metadata=[],
-               onset_files=None):
+               injected_files=None):
         """Create the number of instances requested if quota and
         other arguments check out ok."""
 
@@ -124,11 +144,18 @@ class API(base.Base):
                 LOG.warn(msg)
                 raise quota.QuotaError(msg, "MetadataLimitExceeded")
 
+        self._check_injected_file_quota(context, injected_files)
+
         image = self.image_service.show(context, image_id)
+
+        os_type = None
+        if 'properties' in image and 'os_type' in image['properties']:
+            os_type = image['properties']['os_type']
+
         if kernel_id is None:
-            kernel_id = image.get('kernel_id', None)
+            kernel_id = image['properties'].get('kernel_id', None)
         if ramdisk_id is None:
-            ramdisk_id = image.get('ramdisk_id', None)
+            ramdisk_id = image['properties'].get('ramdisk_id', None)
         # FIXME(sirp): is there a way we can remove null_kernel?
         # No kernel and ramdisk for raw images
         if kernel_id == str(FLAGS.null_kernel):
@@ -165,6 +192,7 @@ class API(base.Base):
             'image_id': image_id,
             'kernel_id': kernel_id or '',
             'ramdisk_id': ramdisk_id or '',
+            'state': 0,
             'state_description': 'scheduling',
             'user_id': context.user_id,
             'project_id': context.project_id,
@@ -180,7 +208,8 @@ class API(base.Base):
             'key_data': key_data,
             'locked': False,
             'metadata': metadata,
-            'availability_zone': availability_zone}
+            'availability_zone': availability_zone,
+            'os_type': os_type}
         elevated = context.elevated()
         instances = []
         LOG.debug(_("Going to run %s instances..."), num_instances)
@@ -218,7 +247,7 @@ class API(base.Base):
                       "args": {"topic": FLAGS.compute_topic,
                                "instance_id": instance_id,
                                "availability_zone": availability_zone,
-                               "onset_files": onset_files}})
+                               "injected_files": injected_files}})
 
         for group_id in security_groups:
             self.trigger_security_group_members_refresh(elevated, group_id)
@@ -314,6 +343,7 @@ class API(base.Base):
         rv = self.db.instance_update(context, instance_id, kwargs)
         return dict(rv.iteritems())
 
+    #@scheduler_api.reroute_if_not_found("delete")
     def delete(self, context, instance_id):
         LOG.debug(_("Going to try to terminate %s"), instance_id)
         try:
@@ -343,9 +373,18 @@ class API(base.Base):
 
     def get(self, context, instance_id):
         """Get a single instance with the given ID."""
+        LOG.debug("*** COMPUTE.API::GET")
         rv = self.db.instance_get(context, instance_id)
+        LOG.debug("*** COMPUTE.API::GET OUT CLEAN")
         return dict(rv.iteritems())
 
+    @scheduler_api.reroute_if_not_found("get")
+    def routing_get(self, context, instance_id):
+        """Use this method instead of get() if this is the only
+           operation you intend to to. It will route to novaclient.get
+           if the instance is not found."""
+        return self.get(context, instance_id)
+
     def get_all(self, context, project_id=None, reservation_id=None,
                 fixed_ip=None):
         """Get all instances, possibly filtered by one of the
@@ -463,14 +502,17 @@ class API(base.Base):
                      "args": {"topic": FLAGS.compute_topic,
                               "instance_id": instance_id, }},)
 
+    #@scheduler_api.reroute_if_not_found("pause")
     def pause(self, context, instance_id):
         """Pause the given instance."""
         self._cast_compute_message('pause_instance', context, instance_id)
 
+    #@scheduler_api.reroute_if_not_found("unpause")
     def unpause(self, context, instance_id):
         """Unpause the given instance."""
         self._cast_compute_message('unpause_instance', context, instance_id)
 
+    #@scheduler_api.reroute_if_not_found("diagnostics")
     def get_diagnostics(self, context, instance_id):
         """Retrieve diagnostics for the given instance."""
         return self._call_compute_message(
@@ -482,25 +524,30 @@ class API(base.Base):
         """Retrieve actions for the given instance."""
         return self.db.instance_get_actions(context, instance_id)
 
+    #@scheduler_api.reroute_if_not_found("suspend")
     def suspend(self, context, instance_id):
         """suspend the instance with instance_id"""
         self._cast_compute_message('suspend_instance', context, instance_id)
 
+    #@scheduler_api.reroute_if_not_found("resume")
     def resume(self, context, instance_id):
         """resume the instance with instance_id"""
         self._cast_compute_message('resume_instance', context, instance_id)
 
+    #@scheduler_api.reroute_if_not_found("rescue")
     def rescue(self, context, instance_id):
         """Rescue the given instance."""
         self._cast_compute_message('rescue_instance', context, instance_id)
 
+    #@scheduler_api.reroute_if_not_found("unrescue")
     def unrescue(self, context, instance_id):
         """Unrescue the given instance."""
         self._cast_compute_message('unrescue_instance', context, instance_id)
 
-    def set_admin_password(self, context, instance_id):
+    def set_admin_password(self, context, instance_id, password=None):
         """Set the root/admin password for the given instance."""
-        self._cast_compute_message('set_admin_password', context, instance_id)
+        self._cast_compute_message('set_admin_password', context, instance_id,
+                                    password)
 
     def inject_file(self, context, instance_id):
         """Write a file to the given instance."""
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index ebe1ce6f0..a31df0895 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -34,17 +34,19 @@ terminating it.
                   :func:`nova.utils.import_object`
 """
 
-import base64
 import datetime
+import os
 import random
 import string
 import socket
+import tempfile
+import time
 import functools
 
 from nova import exception
 from nova import flags
 from nova import log as logging
-from nova import scheduler_manager
+from nova import manager
 from nova import rpc
 from nova import utils
 from nova.compute import power_state
@@ -61,6 +63,9 @@ flags.DEFINE_integer('password_length', 12,
 flags.DEFINE_string('console_host', socket.gethostname(),
                     'Console proxy host to use to connect to instances on'
                     'this host.')
+flags.DEFINE_integer('live_migration_retry_count', 30,
+                    ("Retry count needed in live_migration."
+                     " sleep 1 sec for each count"))
 
 LOG = logging.getLogger('nova.compute.manager')
 
@@ -99,7 +104,7 @@ def checks_instance_lock(function):
     return decorated_function
 
 
-class ComputeManager(scheduler_manager.SchedulerDependentManager):
+class ComputeManager(manager.SchedulerDependentManager):
 
     """Manages the running instances from creation to destruction."""
 
@@ -175,14 +180,14 @@ class ComputeManager(scheduler_manager.SchedulerDependentManager):
         """Launch a new instance with specified options."""
         context = context.elevated()
         instance_ref = self.db.instance_get(context, instance_id)
-        instance_ref.onset_files = kwargs.get('onset_files', [])
+        instance_ref.injected_files = kwargs.get('injected_files', [])
         if instance_ref['name'] in self.driver.list_instances():
             raise exception.Error(_("Instance has already been created"))
         LOG.audit(_("instance %s: starting..."), instance_id,
                   context=context)
         self.db.instance_update(context,
                                 instance_id,
-                                {'host': self.host})
+                                {'host': self.host, 'launched_on': self.host})
 
         self.db.instance_set_state(context,
                                    instance_id,
@@ -354,15 +359,10 @@ class ComputeManager(scheduler_manager.SchedulerDependentManager):
             LOG.warn(_('trying to inject a file into a non-running '
                     'instance: %(instance_id)s (state: %(instance_state)s '
                     'expected: %(expected_state)s)') % locals())
-        # Files/paths *should* be base64-encoded at this point, but
-        # double-check to make sure.
-        b64_path = utils.ensure_b64_encoding(path)
-        b64_contents = utils.ensure_b64_encoding(file_contents)
-        plain_path = base64.b64decode(b64_path)
         nm = instance_ref['name']
-        msg = _('instance %(nm)s: injecting file to %(plain_path)s') % locals()
+        msg = _('instance %(nm)s: injecting file to %(path)s') % locals()
         LOG.audit(msg)
-        self.driver.inject_file(instance_ref, b64_path, b64_contents)
+        self.driver.inject_file(instance_ref, path, file_contents)
 
     @exception.wrap_exception
     @checks_instance_lock
@@ -724,3 +724,248 @@ class ComputeManager(scheduler_manager.SchedulerDependentManager):
         self.volume_manager.remove_compute_volume(context, volume_id)
         self.db.volume_detached(context, volume_id)
         return True
+
+    @exception.wrap_exception
+    def compare_cpu(self, context, cpu_info):
+        """Checks the host cpu is compatible to a cpu given by xml.
+
+        :param context: security context
+        :param cpu_info: json string obtained from virConnect.getCapabilities
+        :returns: See driver.compare_cpu
+
+        """
+        return self.driver.compare_cpu(cpu_info)
+
+    @exception.wrap_exception
+    def create_shared_storage_test_file(self, context):
+        """Makes tmpfile under FLAGS.instance_path.
+
+        This method enables compute nodes to recognize that they mounts
+        same shared storage. (create|check|creanup)_shared_storage_test_file()
+        is a pair.
+
+        :param context: security context
+        :returns: tmpfile name(basename)
+
+        """
+
+        dirpath = FLAGS.instances_path
+        fd, tmp_file = tempfile.mkstemp(dir=dirpath)
+        LOG.debug(_("Creating tmpfile %s to notify to other "
+                    "compute nodes that they should mount "
+                    "the same storage.") % tmp_file)
+        os.close(fd)
+        return os.path.basename(tmp_file)
+
+    @exception.wrap_exception
+    def check_shared_storage_test_file(self, context, filename):
+        """Confirms existence of the tmpfile under FLAGS.instances_path.
+
+        :param context: security context
+        :param filename: confirm existence of FLAGS.instances_path/thisfile
+
+        """
+
+        tmp_file = os.path.join(FLAGS.instances_path, filename)
+        if not os.path.exists(tmp_file):
+            raise exception.NotFound(_('%s not found') % tmp_file)
+
+    @exception.wrap_exception
+    def cleanup_shared_storage_test_file(self, context, filename):
+        """Removes existence of the tmpfile under FLAGS.instances_path.
+
+        :param context: security context
+        :param filename: remove existence of FLAGS.instances_path/thisfile
+
+        """
+
+        tmp_file = os.path.join(FLAGS.instances_path, filename)
+        os.remove(tmp_file)
+
+    @exception.wrap_exception
+    def update_available_resource(self, context):
+        """See comments update_resource_info.
+
+        :param context: security context
+        :returns: See driver.update_available_resource()
+
+        """
+
+        return self.driver.update_available_resource(context, self.host)
+
+    def pre_live_migration(self, context, instance_id):
+        """Preparations for live migration at dest host.
+
+        :param context: security context
+        :param instance_id: nova.db.sqlalchemy.models.Instance.Id
+
+        """
+
+        # Getting instance info
+        instance_ref = self.db.instance_get(context, instance_id)
+        ec2_id = instance_ref['hostname']
+
+        # Getting fixed ips
+        fixed_ip = self.db.instance_get_fixed_address(context, instance_id)
+        if not fixed_ip:
+            msg = _("%(instance_id)s(%(ec2_id)s) does not have fixed_ip.")
+            raise exception.NotFound(msg % locals())
+
+        # If any volume is mounted, prepare here.
+        if not instance_ref['volumes']:
+            LOG.info(_("%s has no volume."), ec2_id)
+        else:
+            for v in instance_ref['volumes']:
+                self.volume_manager.setup_compute_volume(context, v['id'])
+
+        # Bridge settings.
+        # Call this method prior to ensure_filtering_rules_for_instance,
+        # since bridge is not set up, ensure_filtering_rules_for instance
+        # fails.
+        #
+        # Retry operation is necessary because continuously request comes,
+        # concorrent request occurs to iptables, then it complains.
+        max_retry = FLAGS.live_migration_retry_count
+        for cnt in range(max_retry):
+            try:
+                self.network_manager.setup_compute_network(context,
+                                                           instance_id)
+                break
+            except exception.ProcessExecutionError:
+                if cnt == max_retry - 1:
+                    raise
+                else:
+                    LOG.warn(_("setup_compute_network() failed %(cnt)d."
+                               "Retry up to %(max_retry)d for %(ec2_id)s.")
+                               % locals())
+                    time.sleep(1)
+
+        # Creating filters to hypervisors and firewalls.
+        # An example is that nova-instance-instance-xxx,
+        # which is written to libvirt.xml(Check "virsh nwfilter-list")
+        # This nwfilter is necessary on the destination host.
+        # In addition, this method is creating filtering rule
+        # onto destination host.
+        self.driver.ensure_filtering_rules_for_instance(instance_ref)
+
+    def live_migration(self, context, instance_id, dest):
+        """Executing live migration.
+
+        :param context: security context
+        :param instance_id: nova.db.sqlalchemy.models.Instance.Id
+        :param dest: destination host
+
+        """
+
+        # Get instance for error handling.
+        instance_ref = self.db.instance_get(context, instance_id)
+        i_name = instance_ref.name
+
+        try:
+            # Checking volume node is working correctly when any volumes
+            # are attached to instances.
+            if instance_ref['volumes']:
+                rpc.call(context,
+                          FLAGS.volume_topic,
+                          {"method": "check_for_export",
+                           "args": {'instance_id': instance_id}})
+
+            # Asking dest host to preparing live migration.
+            rpc.call(context,
+                     self.db.queue_get_for(context, FLAGS.compute_topic, dest),
+                     {"method": "pre_live_migration",
+                      "args": {'instance_id': instance_id}})
+
+        except Exception:
+            msg = _("Pre live migration for %(i_name)s failed at %(dest)s")
+            LOG.error(msg % locals())
+            self.recover_live_migration(context, instance_ref)
+            raise
+
+        # Executing live migration
+        # live_migration might raises exceptions, but
+        # nothing must be recovered in this version.
+        self.driver.live_migration(context, instance_ref, dest,
+                                   self.post_live_migration,
+                                   self.recover_live_migration)
+
+    def post_live_migration(self, ctxt, instance_ref, dest):
+        """Post operations for live migration.
+
+        This method is called from live_migration
+        and mainly updating database record.
+
+        :param ctxt: security context
+        :param instance_id: nova.db.sqlalchemy.models.Instance.Id
+        :param dest: destination host
+
+        """
+
+        LOG.info(_('post_live_migration() is started..'))
+        instance_id = instance_ref['id']
+
+        # Detaching volumes.
+        try:
+            for vol in self.db.volume_get_all_by_instance(ctxt, instance_id):
+                self.volume_manager.remove_compute_volume(ctxt, vol['id'])
+        except exception.NotFound:
+            pass
+
+        # Releasing vlan.
+        # (not necessary in current implementation?)
+
+        # Releasing security group ingress rule.
+        self.driver.unfilter_instance(instance_ref)
+
+        # Database updating.
+        i_name = instance_ref.name
+        try:
+            # Not return if floating_ip is not found, otherwise,
+            # instance never be accessible..
+            floating_ip = self.db.instance_get_floating_address(ctxt,
+                                                         instance_id)
+            if not floating_ip:
+                LOG.info(_('No floating_ip is found for %s.'), i_name)
+            else:
+                floating_ip_ref = self.db.floating_ip_get_by_address(ctxt,
+                                                              floating_ip)
+                self.db.floating_ip_update(ctxt,
+                                           floating_ip_ref['address'],
+                                           {'host': dest})
+        except exception.NotFound:
+            LOG.info(_('No floating_ip is found for %s.'), i_name)
+        except:
+            LOG.error(_("Live migration: Unexpected error:"
+                        "%s cannot inherit floating ip..") % i_name)
+
+        # Restore instance/volume state
+        self.recover_live_migration(ctxt, instance_ref, dest)
+
+        LOG.info(_('Migrating %(i_name)s to %(dest)s finished successfully.')
+                 % locals())
+        LOG.info(_("You may see the error \"libvirt: QEMU error: "
+                   "Domain not found: no domain with matching name.\" "
+                   "This error can be safely ignored."))
+
+    def recover_live_migration(self, ctxt, instance_ref, host=None):
+        """Recovers Instance/volume state from migrating -> running.
+
+        :param ctxt: security context
+        :param instance_id: nova.db.sqlalchemy.models.Instance.Id
+        :param host:
+            DB column value is updated by this hostname.
+            if none, the host instance currently running is selected.
+
+        """
+
+        if not host:
+            host = instance_ref['host']
+
+        self.db.instance_update(ctxt,
+                                instance_ref['id'],
+                                {'state_description': 'running',
+                                 'state': power_state.RUNNING,
+                                 'host': host})
+
+        for volume in instance_ref['volumes']:
+            self.db.volume_update(ctxt, volume['id'], {'status': 'in-use'})
author	Sandy Walsh <sandy.walsh@rackspace.com>	2011-03-17 18:54:16 -0700
committer	Sandy Walsh <sandy.walsh@rackspace.com>	2011-03-17 18:54:16 -0700
commit	23efe8d14973a7c94de167562340938ba00d043b (patch)
tree	4e383662f4d11763684901e454025ec9c9297543 /nova/compute
parent	609a912fa8a816c1f47140489dcc1131356cd67c (diff)
parent	abc6c82449dfc46a33dcd8190840e51f44b5b930 (diff)