From 8f1c54ce98fed9cb7384be9cbb9f28eba2f12c2d Mon Sep 17 00:00:00 2001 From: David McNally Date: Wed, 8 Aug 2012 16:20:23 +0100 Subject: Compute restart causes period of network 'blackout' Fixes bug 1034401 When a compute service is restarted each instance running on the host has its iptables rules built and applied sequentially during the host init stage. The impact of this, especially on a host running many instances, can be observed as a period where some instances are not accessible as the existing iptables rules have been torn down and not yet re-applied. The presented work-around for this is a configurable/flagged deferred mode that prevents the application of the iptables rules until all instances on the host had been initialised then the rules for all instances are applied all at once preventing a 'blackout' period. Change-Id: I0da90d07e54225fb63f3884897fb00a6027cd537 --- nova/compute/manager.py | 74 ++++++++++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 31 deletions(-) (limited to 'nova/compute') diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 9971be012..6dde12157 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -275,43 +275,55 @@ class ComputeManager(manager.SchedulerDependentManager): self.driver.init_host(host=self.host) context = nova.context.get_admin_context() instances = self.db.instance_get_all_by_host(context, self.host) - for count, instance in enumerate(instances): - db_state = instance['power_state'] - drv_state = self._get_power_state(context, instance) - expect_running = (db_state == power_state.RUNNING and - drv_state != db_state) + if FLAGS.defer_iptables_apply: + self.driver.filter_defer_apply_on() - LOG.debug(_('Current state is %(drv_state)s, state in DB is ' - '%(db_state)s.'), locals(), instance=instance) + try: + for count, instance in enumerate(instances): + db_state = instance['power_state'] + drv_state = self._get_power_state(context, instance) - net_info = compute_utils.get_nw_info_for_instance(instance) + expect_running = (db_state == power_state.RUNNING and + drv_state != db_state) - # We're calling plug_vifs to ensure bridge and iptables - # filters are present, calling it once is enough. - if count == 0: - legacy_net_info = self._legacy_nw_info(net_info) - self.driver.plug_vifs(instance, legacy_net_info) + LOG.debug(_('Current state is %(drv_state)s, state in DB is ' + '%(db_state)s.'), locals(), instance=instance) - if ((expect_running and FLAGS.resume_guests_state_on_host_boot) or - FLAGS.start_guests_on_host_boot): - LOG.info(_('Rebooting instance after nova-compute restart.'), - locals(), instance=instance) - try: - self.driver.resume_state_on_host_boot(context, instance, - self._legacy_nw_info(net_info)) - except NotImplementedError: - LOG.warning(_('Hypervisor driver does not support ' - 'resume guests'), instance=instance) + net_info = compute_utils.get_nw_info_for_instance(instance) - elif drv_state == power_state.RUNNING: - # VMWareAPI drivers will raise an exception - try: - self.driver.ensure_filtering_rules_for_instance(instance, - self._legacy_nw_info(net_info)) - except NotImplementedError: - LOG.warning(_('Hypervisor driver does not support ' - 'firewall rules'), instance=instance) + # We're calling plug_vifs to ensure bridge and iptables + # filters are present, calling it once is enough. + if count == 0: + legacy_net_info = self._legacy_nw_info(net_info) + self.driver.plug_vifs(instance, legacy_net_info) + + if ((expect_running and FLAGS.resume_guests_state_on_host_boot) + or FLAGS.start_guests_on_host_boot): + LOG.info( + _('Rebooting instance after nova-compute restart.'), + locals(), instance=instance) + try: + self.driver.resume_state_on_host_boot(context, + instance, + self._legacy_nw_info(net_info)) + except NotImplementedError: + LOG.warning(_('Hypervisor driver does not support ' + 'resume guests'), instance=instance) + + elif drv_state == power_state.RUNNING: + # VMWareAPI drivers will raise an exception + try: + self.driver.ensure_filtering_rules_for_instance( + instance, + self._legacy_nw_info(net_info)) + except NotImplementedError: + LOG.warning(_('Hypervisor driver does not support ' + 'firewall rules'), instance=instance) + + finally: + if FLAGS.defer_iptables_apply: + self.driver.filter_defer_apply_off() def _get_power_state(self, context, instance): """Retrieve the power state for the given instance.""" -- cgit