From ac0f6eb063fc5a5c0a9410402ecf57fae1faf594 Mon Sep 17 00:00:00 2001 From: Devananda van der Veen Date: Fri, 1 Mar 2013 14:05:35 -0800 Subject: Compute manager should remove dead resources While most hypervisors return a single - and constant - value from driver.get_available_nodes, baremetal does not. When a node is deleted from the baremetal database, it is no longer returned from driver.get_available_nodes. However, Nova's compute_node record is not directly updated. This patch allows Compute Manager to detect missing nodes within update_available_resources. It then invokes resource_tracker to update the dead node and remove it from compute. This in turn allows the ServiceGroup API to properly update the servicegroup when a baremetal node is no longer in service. Fixes bug 1138184 Change-Id: Icfff3f8e3099668806633a6a58a152b32ec8b49b --- nova/compute/manager.py | 9 ++++++++- nova/compute/resource_tracker.py | 23 +++++++++++++++++------ 2 files changed, 25 insertions(+), 7 deletions(-) (limited to 'nova/compute') diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 49f62bc2f..d544ad43e 100755 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -3658,11 +3658,18 @@ class ComputeManager(manager.SchedulerDependentManager): :param context: security context """ new_resource_tracker_dict = {} - nodenames = self.driver.get_available_nodes() + nodenames = set(self.driver.get_available_nodes()) for nodename in nodenames: rt = self._get_resource_tracker(nodename) rt.update_available_resource(context) new_resource_tracker_dict[nodename] = rt + + # delete nodes that the driver no longer reports + known_nodes = set(self._resource_tracker_dict.keys()) + for nodename in known_nodes - nodenames: + rt = self._get_resource_tracker(nodename) + rt.update_available_resource(context, delete=True) + self._resource_tracker_dict = new_resource_tracker_dict @manager.periodic_task(spacing=CONF.running_deleted_instance_poll_interval) diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 42000fcb9..fab64017d 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -226,7 +226,7 @@ class ResourceTracker(object): return self.compute_node is None @lockutils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, 'nova-') - def update_available_resource(self, context): + def update_available_resource(self, context, delete=False): """Override in-memory calculations of compute node resource usage based on data audited from the hypervisor layer. @@ -237,11 +237,15 @@ class ResourceTracker(object): LOG.audit(_("Auditing locally available compute resources")) resources = self.driver.get_available_resource(self.nodename) if not resources: - # The virt driver does not support this function - LOG.audit(_("Virt driver does not support " - "'get_available_resource' Compute tracking is disabled.")) - self.compute_node = None - return + if delete: + self._delete_compute_node(context) + return + else: + # The virt driver does not support this function + LOG.audit(_("Virt driver does not support " + "'get_available_resource' Compute tracking is disabled.")) + self.compute_node = None + return self._verify_resources(resources) @@ -270,6 +274,13 @@ class ResourceTracker(object): self._sync_compute_node(context, resources) + def _delete_compute_node(self, context): + """Delete a compute node DB record.""" + if self.compute_node: + LOG.audit(_("Deleting compute node %s") % self.compute_node['id']) + self.compute_node = self.conductor_api.compute_node_delete( + context, self.compute_node) + def _sync_compute_node(self, context, resources): """Create or update the compute node DB record.""" if not self.compute_node: -- cgit