From 60965a50bcf4b1d1e51d01a8581ae9cb011b8923 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Thu, 29 Nov 2012 22:03:34 -0500 Subject: Get instances from conductor in init_host. Update compute's init_host() to get the list of instances on this host via the conductor service. This removes a db read from compute. Most of the test fixes are to ensure conductor is running anywhere that compute is running. The EC2 availability zones test change (from 13 to 15) is because this output includes info on each service that's running, so running nova-conductor in this test suite added more info here. Note that this uses the previously-added ping() call in conductor to determine when the service is available. The compute manager pings the conductor every ten seconds for ten attempts and then falls back to the default configured RPC timeout. This should be a reasonable compromise between requiring strict service startup ordering and extreme delays. Part of blueprint no-db-compute. Change-Id: Ie2953f7ae79819a1b6e24e8997ed4332fd4d2356 --- nova/compute/manager.py | 32 +++++++++++++++++++++++++---- nova/tests/api/ec2/test_cinder_cloud.py | 2 ++ nova/tests/api/ec2/test_cloud.py | 4 +++- nova/tests/api/ec2/test_ec2_validate.py | 2 ++ nova/tests/compute/test_compute.py | 15 ++++++++++++++ nova/tests/integrated/integrated_helpers.py | 6 ++++-- nova/tests/network/test_manager.py | 4 ++++ nova/tests/test_test.py | 6 ++++++ 8 files changed, 64 insertions(+), 7 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 7ac6b1518..25c9a0b3c 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -66,6 +66,7 @@ from nova.openstack.common import lockutils from nova.openstack.common import log as logging from nova.openstack.common.notifier import api as notifier from nova.openstack.common import rpc +from nova.openstack.common.rpc import common as rpc_common from nova.openstack.common import timeutils from nova import quota from nova.scheduler import rpcapi as scheduler_rpcapi @@ -359,6 +360,32 @@ class ComputeManager(manager.SchedulerDependentManager): 'trying to set it to ERROR'), instance_uuid=instance_uuid) + def _get_instances_at_startup(self, context): + '''Get instances for this host during service init.''' + attempt = 0 + timeout = 10 + while True: + # NOTE(danms): Try ten times with a short timeout, and then punt + # to the configured RPC timeout after that + if attempt == 10: + timeout = None + attempt += 1 + + # NOTE(russellb): This is running during service startup. If we + # allow an exception to be raised, the service will shut down. + # This may fail the first time around if nova-conductor wasn't + # running when nova-compute started. + try: + self.conductor_api.ping(context, '1.21 GigaWatts', + timeout=timeout) + break + except rpc_common.Timeout as e: + LOG.exception(_('Timed out waiting for nova-conductor. ' + 'Is it running? Or did nova-compute start ' + 'before nova-conductor?')) + + return self.conductor_api.instance_get_all_by_host(context, self.host) + def _init_instance(self, context, instance): '''Initialize this instance during service init.''' db_state = instance['power_state'] @@ -417,10 +444,7 @@ class ComputeManager(manager.SchedulerDependentManager): """Initialization for a standalone compute service.""" self.driver.init_host(host=self.host) context = nova.context.get_admin_context() - - # NOTE(danms): this requires some care since conductor - # may not be up and fielding requests by the time compute is - instances = self.db.instance_get_all_by_host(context, self.host) + instances = self._get_instances_at_startup(context) if CONF.defer_iptables_apply: self.driver.filter_defer_apply_on() diff --git a/nova/tests/api/ec2/test_cinder_cloud.py b/nova/tests/api/ec2/test_cinder_cloud.py index 61402ce0e..c333def6c 100644 --- a/nova/tests/api/ec2/test_cinder_cloud.py +++ b/nova/tests/api/ec2/test_cinder_cloud.py @@ -124,6 +124,8 @@ class CinderCloudTestCase(test.TestCase): self.flags(use_local=True, group='conductor') # set up services + self.conductor = self.start_service('conductor', + manager=CONF.conductor.manager) self.compute = self.start_service('compute') self.scheduler = self.start_service('scheduler') self.network = self.start_service('network') diff --git a/nova/tests/api/ec2/test_cloud.py b/nova/tests/api/ec2/test_cloud.py index 284298585..d452c18cb 100644 --- a/nova/tests/api/ec2/test_cloud.py +++ b/nova/tests/api/ec2/test_cloud.py @@ -139,6 +139,8 @@ class CloudTestCase(test.TestCase): self.flags(use_local=True, group='conductor') # set up services + self.conductor = self.start_service('conductor', + manager=CONF.conductor.manager) self.compute = self.start_service('compute') self.scheduler = self.start_service('scheduler') self.network = self.start_service('network') @@ -730,7 +732,7 @@ class CloudTestCase(test.TestCase): result = self.cloud.describe_availability_zones(admin_ctxt, zone_name='verbose') - self.assertEqual(len(result['availabilityZoneInfo']), 13) + self.assertEqual(len(result['availabilityZoneInfo']), 15) db.service_destroy(self.context, service1['id']) db.service_destroy(self.context, service2['id']) diff --git a/nova/tests/api/ec2/test_ec2_validate.py b/nova/tests/api/ec2/test_ec2_validate.py index e3dfd0029..cbb3f81e3 100644 --- a/nova/tests/api/ec2/test_ec2_validate.py +++ b/nova/tests/api/ec2/test_ec2_validate.py @@ -52,6 +52,8 @@ class EC2ValidateTestCase(test.TestCase): self.cloud = cloud.CloudController() # set up services + self.conductor = self.start_service('conductor', + manager=CONF.conductor.manager) self.compute = self.start_service('compute') self.scheduter = self.start_service('scheduler') self.network = self.start_service('network') diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py index 4099a2a15..4c26e42b5 100644 --- a/nova/tests/compute/test_compute.py +++ b/nova/tests/compute/test_compute.py @@ -3095,6 +3095,21 @@ class ComputeTestCase(BaseTestCase): instance = self._create_fake_instance(params) self.compute._instance_update(self.context, instance['uuid']) + def test_startup_conductor_ping(self): + timeouts = [] + calls = dict(count=0) + + def fake_ping(context, message, timeout): + timeouts.append(timeout) + calls['count'] += 1 + if calls['count'] < 15: + raise rpc_common.Timeout("fake") + + self.stubs.Set(self.compute.conductor_api, 'ping', fake_ping) + self.compute._get_instances_at_startup(self.context) + self.assertEqual(timeouts.count(10), 10) + self.assertTrue(None in timeouts) + class ComputeAPITestCase(BaseTestCase): diff --git a/nova/tests/integrated/integrated_helpers.py b/nova/tests/integrated/integrated_helpers.py index 1f6a278cf..e20d6881b 100644 --- a/nova/tests/integrated/integrated_helpers.py +++ b/nova/tests/integrated/integrated_helpers.py @@ -24,6 +24,7 @@ import string import uuid import nova.image.glance +from nova.openstack.common import cfg from nova.openstack.common.log import logging from nova import service from nova import test # For the flags @@ -32,6 +33,7 @@ import nova.tests.image.fake from nova.tests.integrated.api import client +CONF = cfg.CONF LOG = logging.getLogger(__name__) @@ -73,12 +75,12 @@ class _IntegratedTestBase(test.TestCase): 'chance.ChanceScheduler') # set up services + self.conductor = self.start_service('conductor', + manager=CONF.conductor.manager) self.compute = self.start_service('compute') self.scheduler = self.start_service('cert') self.network = self.start_service('network') self.scheduler = self.start_service('scheduler') - self.conductor = self.start_service( - 'conductor', manager='nova.conductor.manager.ConductorManager') self._start_api_service() diff --git a/nova/tests/network/test_manager.py b/nova/tests/network/test_manager.py index b45a290c0..2a5a0bb87 100644 --- a/nova/tests/network/test_manager.py +++ b/nova/tests/network/test_manager.py @@ -27,6 +27,7 @@ from nova import exception from nova import ipv6 from nova.network import linux_net from nova.network import manager as network_manager +from nova.openstack.common import cfg from nova.openstack.common import importutils from nova.openstack.common import log as logging from nova.openstack.common import rpc @@ -39,6 +40,7 @@ from nova.tests import matchers from nova import utils +CONF = cfg.CONF LOG = logging.getLogger(__name__) @@ -1585,6 +1587,8 @@ class AllocateTestCase(test.TestCase): def test_allocate_for_instance(self): address = "10.10.10.10" self.flags(auto_assign_floating_ip=True) + self.conductor = self.start_service( + 'conductor', manager=CONF.conductor.manager) self.compute = self.start_service('compute') self.network = self.start_service('network') diff --git a/nova/tests/test_test.py b/nova/tests/test_test.py index 2e045b2ac..9e2d3560c 100644 --- a/nova/tests/test_test.py +++ b/nova/tests/test_test.py @@ -18,10 +18,15 @@ """Tests for the testing base code.""" +from nova.openstack.common import cfg from nova.openstack.common import rpc from nova import test +CONF = cfg.CONF +CONF.import_opt('use_local', 'nova.conductor.api', group='conductor') + + class IsolationTestCase(test.TestCase): """Ensure that things are cleaned up after failed tests. @@ -30,6 +35,7 @@ class IsolationTestCase(test.TestCase): """ def test_service_isolation(self): + self.flags(use_local=True, group='conductor') self.useFixture(test.ServiceFixture('compute')) def test_rpc_consumer_isolation(self): -- cgit