diff options
author | Chris Behrens <cbehrens@codestud.com> | 2013-06-25 20:00:02 +0000 |
---|---|---|
committer | Chris Behrens <cbehrens@codestud.com> | 2013-06-26 19:58:16 +0000 |
commit | f0cf1c0fc14ba44ae6af5aad93ccd2fe010094a5 (patch) | |
tree | a803f7a00fdfdb1318f7503f2e3383118abc92fe | |
parent | 9331c5c1115c7d8cc5bcab71b1100eeea1ce72fe (diff) | |
download | nova-f0cf1c0fc14ba44ae6af5aad93ccd2fe010094a5.tar.gz nova-f0cf1c0fc14ba44ae6af5aad93ccd2fe010094a5.tar.xz nova-f0cf1c0fc14ba44ae6af5aad93ccd2fe010094a5.zip |
Allow retrying network allocations separately
Introduce a new config option, 'network_allocate_retries', that allows
one to retry network allocations. The default is 0 for no retries to
match the current behavior.
The network allocations currently get retried by a full retry of a build
via the scheduler, if those are enabled. This patch reduces the need to
re-schedule for simple network allocation issues.
The retrying happens in the network alloc async greenthread, so for virt
drivers that support the new NetworkModel, the retrying potentially
happens in the background while the image is being downloaded, etc.
DocImpact
Change-Id: I1a5fdcccbb736fc0b1d8c0cbc3b45a8372a6aef7
-rwxr-xr-x | nova/compute/manager.py | 69 | ||||
-rw-r--r-- | nova/tests/compute/test_compute.py | 97 |
2 files changed, 147 insertions, 19 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 22881f5bd..e3d84b1fd 100755 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -106,6 +106,9 @@ compute_opts = [ default=False, help='Whether to start guests that were running before the ' 'host rebooted'), + cfg.IntOpt('network_allocate_retries', + default=0, + help="Number of times to retry network allocation on failures"), ] interval_opts = [ @@ -1147,6 +1150,50 @@ class ComputeManager(manager.SchedulerDependentManager): expected_task_state=(task_states.SCHEDULING, None)) + def _allocate_network_async(self, context, instance, requested_networks, + macs, security_groups, is_vpn): + """Method used to allocate networks in the background. + + Broken out for testing. + """ + LOG.debug(_("Allocating IP information in the background."), + instance=instance) + retries = CONF.network_allocate_retries + if retries < 0: + LOG.warn(_("Treating negative config value (%(retries)s) for " + "'network_allocate_retries' as 0."), + {'retries': retries}) + attempts = retries > 1 and retries + 1 or 1 + retry_time = 1 + for attempt in range(1, attempts + 1): + try: + nwinfo = self.network_api.allocate_for_instance( + context, instance, vpn=is_vpn, + requested_networks=requested_networks, + macs=macs, + conductor_api=self.conductor_api, + security_groups=security_groups) + LOG.debug(_('Instance network_info: |%s|'), nwinfo, + instance=instance) + return nwinfo + except Exception: + exc_info = sys.exc_info() + log_info = {'attempt': attempt, + 'attempts': attempts} + if attempt == attempts: + LOG.exception(_('Instance failed network setup ' + 'after %(attempts)d attempt(s)'), + log_info) + raise exc_info[0], exc_info[1], exc_info[2] + LOG.warn(_('Instance failed network setup ' + '(attempt %(attempt)d of %(attempts)d)'), + log_info, instance=instance) + time.sleep(retry_time) + retry_time *= 2 + if retry_time > 30: + retry_time = 30 + # Not reached. + def _allocate_network(self, context, instance, requested_networks, macs, security_groups): """Start network allocation asynchronously. Return an instance @@ -1161,25 +1208,9 @@ class ComputeManager(manager.SchedulerDependentManager): task_state=task_states.NETWORKING, expected_task_state=None) is_vpn = pipelib.is_vpn_image(instance['image_ref']) - - def async_alloc(): - LOG.debug(_("Allocating IP information in the background."), - instance=instance) - try: - nwinfo = self.network_api.allocate_for_instance( - context, instance, vpn=is_vpn, - requested_networks=requested_networks, - macs=macs, - conductor_api=self.conductor_api, - security_groups=security_groups) - except Exception: - with excutils.save_and_reraise_exception(): - LOG.exception(_('Instance failed network setup'), - instance=instance) - LOG.debug(_('Instance network_info: |%s|'), nwinfo, - instance=instance) - return nwinfo - return network_model.NetworkInfoAsyncWrapper(async_alloc) + return network_model.NetworkInfoAsyncWrapper( + self._allocate_network_async, context, instance, + requested_networks, macs, security_groups, is_vpn) def _prep_block_device(self, context, instance, bdms): """Set up the block device for an instance with error logging.""" diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py index 7953f8b63..466a039a1 100644 --- a/nova/tests/compute/test_compute.py +++ b/nova/tests/compute/test_compute.py @@ -1079,6 +1079,103 @@ class ComputeTestCase(BaseTestCase): self._assert_state({'vm_state': vm_states.ERROR, 'task_state': None}) + def test_allocate_network_succeeds_after_retries(self): + # Undo setUp() stubs as this is a true unit test + self.stubs.UnsetAll() + self.flags(network_allocate_retries=8) + + nwapi = self.compute.network_api + self.mox.StubOutWithMock(nwapi, 'allocate_for_instance') + self.mox.StubOutWithMock(time, 'sleep') + + instance = {} + is_vpn = 'fake-is-vpn' + req_networks = 'fake-req-networks' + macs = 'fake-macs' + sec_groups = 'fake-sec-groups' + final_result = 'meow' + + expected_sleep_times = [1, 2, 4, 8, 16, 30, 30, 30] + + for sleep_time in expected_sleep_times: + nwapi.allocate_for_instance( + self.context, instance, vpn=is_vpn, + requested_networks=req_networks, macs=macs, + conductor_api=self.compute.conductor_api, + security_groups=sec_groups).AndRaise( + test.TestingException()) + time.sleep(sleep_time) + + nwapi.allocate_for_instance( + self.context, instance, vpn=is_vpn, + requested_networks=req_networks, macs=macs, + conductor_api=self.compute.conductor_api, + security_groups=sec_groups).AndReturn(final_result) + + self.mox.ReplayAll() + + res = self.compute._allocate_network_async(self.context, instance, + req_networks, + macs, + sec_groups, + is_vpn) + self.assertEqual(final_result, res) + + def test_allocate_network_fails(self): + # Undo setUp() stubs as this is a true unit test + self.stubs.UnsetAll() + self.flags(network_allocate_retries=0) + + nwapi = self.compute.network_api + self.mox.StubOutWithMock(nwapi, 'allocate_for_instance') + + instance = {} + is_vpn = 'fake-is-vpn' + req_networks = 'fake-req-networks' + macs = 'fake-macs' + sec_groups = 'fake-sec-groups' + + nwapi.allocate_for_instance( + self.context, instance, vpn=is_vpn, + requested_networks=req_networks, macs=macs, + conductor_api=self.compute.conductor_api, + security_groups=sec_groups).AndRaise(test.TestingException()) + + self.mox.ReplayAll() + + self.assertRaises(test.TestingException, + self.compute._allocate_network_async, + self.context, instance, req_networks, macs, + sec_groups, is_vpn) + + def test_allocate_network_neg_conf_value_treated_as_zero(self): + # Undo setUp() stubs as this is a true unit test + self.stubs.UnsetAll() + self.flags(network_allocate_retries=-1) + + nwapi = self.compute.network_api + self.mox.StubOutWithMock(nwapi, 'allocate_for_instance') + + instance = {} + is_vpn = 'fake-is-vpn' + req_networks = 'fake-req-networks' + macs = 'fake-macs' + sec_groups = 'fake-sec-groups' + + # Only attempted once. + nwapi.allocate_for_instance( + self.context, instance, vpn=is_vpn, + requested_networks=req_networks, macs=macs, + conductor_api=self.compute.conductor_api, + security_groups=sec_groups).AndRaise(test.TestingException()) + + self.mox.ReplayAll() + + self.assertRaises(test.TestingException, + self.compute._allocate_network_async, + self.context, instance, req_networks, macs, + sec_groups, is_vpn) + def test_run_instance_dealloc_network_instance_not_found(self): """spawn network deallocate test. |