Remove network_api fallback for info_cache from APIs

Fixes bug 932395 OS API and EC2 would query the network API when instance['info_cache'] had no network info.. or network info was an empty list. The ideal was to fall back to querying the network IP in case the cache was busted. However, reality says this case is extremely common as it is the case when instances are freshly built and haven't had network info assigned yet. The calls to network API are expensive and goes against the whole idea of this "cache". So, this patch removes the fallback to querying the network API. In its place, it adds a periodic task to the compute manager to periodically sync the cache entry. Since I had to fix a number of tests, I consolidated some things there as well with regards to instance stubbing. Change-Id: I493f811bcba4e99ac6a5756bcab473557d5c0104
author: Chris Behrens <cbehrens@codestud.com> 2012-02-17 00:42:10 +0000
committer: Chris Behrens <cbehrens@codestud.com> 2012-02-22 23:36:50 +0000
commit: 08fa534a0d28fa1be48aef927584161becb936c7 (patch)
tree: d872447d9d266df831c0f077afd09a3bd5e90d32 /nova/compute
parent: 9196e1c7b07be8d3f9dbe7947cace9fd4e862f9c (diff)
download: nova-08fa534a0d28fa1be48aef927584161becb936c7.tar.gz
nova-08fa534a0d28fa1be48aef927584161becb936c7.tar.xz
nova-08fa534a0d28fa1be48aef927584161becb936c7.zip
1 files changed, 58 insertions, 0 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index fa02447c7..77452ee94 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -118,6 +118,10 @@ compute_opts = [
                default=3600,
                help="Number of periodic scheduler ticks to wait between "
                     "runs of the image cache manager."),
+    cfg.IntOpt("heal_instance_info_cache_interval",
+               default=60,
+               help="Number of seconds between instance info_cache self "
+                        "healing updates")
     ]
 
 FLAGS = flags.FLAGS
@@ -205,6 +209,7 @@ class ComputeManager(manager.SchedulerDependentManager):
         self.network_manager = utils.import_object(FLAGS.network_manager)
         self._last_host_check = 0
         self._last_bw_usage_poll = 0
+        self._last_info_cache_heal = 0
 
         super(ComputeManager, self).__init__(service_name="compute",
                                              *args, **kwargs)
@@ -2088,6 +2093,59 @@ class ComputeManager(manager.SchedulerDependentManager):
                             block_device_info)
 
     @manager.periodic_task
+    def _heal_instance_info_cache(self, context):
+        """Called periodically.  On every call, try to update the
+        info_cache's network information for another instance by
+        calling to the network manager.
+
+        This is implemented by keeping a cache of uuids of instances
+        that live on this host.  On each call, we pop one off of a
+        list, pull the DB record, and try the call to the network API.
+        If anything errors, we don't care.  It's possible the instance
+        has been deleted, etc.
+        """
+        heal_interval = FLAGS.heal_instance_info_cache_interval
+        if not heal_interval:
+            return
+        curr_time = time.time()
+        if self._last_info_cache_heal + heal_interval > curr_time:
+            return
+        self._last_info_cache_heal = curr_time
+
+        instance_uuids = getattr(self, '_instance_uuids_to_heal', None)
+        instance = None
+
+        while not instance or instance['host'] != self.host:
+            if instance_uuids:
+                try:
+                    instance = self.db.instance_get_by_uuid(context,
+                        instance_uuids.pop(0))
+                except exception.InstanceNotFound:
+                    # Instance is gone.  Try to grab another.
+                    continue
+            else:
+                # No more in our copy of uuids.  Pull from the DB.
+                db_instances = self.db.instance_get_all_by_host(
+                        context, self.host)
+                if not db_instances:
+                    # None.. just return.
+                    return
+                instance = db_instances.pop(0)
+                instance_uuids = [inst['uuid'] for inst in db_instances]
+                self._instance_uuids_to_heal = instance_uuids
+
+        # We have an instance now and it's ours
+        try:
+            # Call to network API to get instance info.. this will
+            # force an update to the instance's info_cache
+            self.network_api.get_instance_nw_info(context, instance)
+            LOG.debug(_("Updated the info_cache for instance %s") %
+                    instance['uuid'])
+        except Exception:
+            # We don't care about any failures
+            pass
+
+    @manager.periodic_task
     def _poll_rebooting_instances(self, context):
         if FLAGS.reboot_timeout > 0:
             self.driver.poll_rebooting_instances(FLAGS.reboot_timeout)
author	Chris Behrens <cbehrens@codestud.com>	2012-02-17 00:42:10 +0000
committer	Chris Behrens <cbehrens@codestud.com>	2012-02-22 23:36:50 +0000
commit	08fa534a0d28fa1be48aef927584161becb936c7 (patch)
tree	d872447d9d266df831c0f077afd09a3bd5e90d32 /nova/compute
parent	9196e1c7b07be8d3f9dbe7947cace9fd4e862f9c (diff)
download	nova-08fa534a0d28fa1be48aef927584161becb936c7.tar.gz nova-08fa534a0d28fa1be48aef927584161becb936c7.tar.xz nova-08fa534a0d28fa1be48aef927584161becb936c7.zip