Isolate certain images on certain hosts.

This implements a [hosts] <=> [images] mapping in the simple scheduler that partitions your host resources into the part that services a particular image set, and the general cloud. This is useful, for example, if you want to specify a set of hosts to run utility VMs (cloudpipe, bastion, etc) that you don't want consuming resources from your generally available pool. When specifying a host with --isolated_hosts flags (comma-separated list) those hosts will only run the images specified in --isolated_images, and will not run any other images. The isolated images will not run on any other hosts. You can specify --skip_isolated_core_check to allow overcommitting of the isolated hosts. This allows utility vms that are not cpu bound to avoid the resource cheks the scheduler usually performs (based off of --max_cores). Change-Id: Ib2db5a605cb7560a169af9ff2a6dadb649da9c1d
author: Todd Willey <xtoddx@gmail.com> 2012-01-08 12:51:35 -0500
committer: Todd Willey <xtoddx@gmail.com> 2012-01-08 15:36:56 -0500
commit: e231a055c2a73f82597b0b2a690f2fb28472a99f (patch)
tree: 161c832dde50c4596a8098c1c8e8d3ef1bf1e19a /nova
parent: 410d8e8b3191021513648e98ed11980dfe968ab9 (diff)
2 files changed, 118 insertions, 2 deletions
diff --git a/nova/scheduler/simple.py b/nova/scheduler/simple.py
index aadbe55bd..23a58e5bc 100644
--- a/nova/scheduler/simple.py
+++ b/nova/scheduler/simple.py
@@ -36,6 +36,10 @@ flags.DEFINE_integer("max_networks", 1000,
                      "maximum number of networks to allow per host")
 flags.DEFINE_string('default_schedule_zone', None,
                     'zone to use when user doesnt specify one')
+flags.DEFINE_list('isolated_images', [], 'Images to run on isolated host')
+flags.DEFINE_list('isolated_hosts', [], 'Host reserved for specific images')
+flags.DEFINE_boolean('skip_isolated_core_check', True,
+                     'Allow overcommitting vcpus on isolated hosts')
 
 
 class SimpleScheduler(chance.ChanceScheduler):
@@ -58,12 +62,21 @@ class SimpleScheduler(chance.ChanceScheduler):
             return host
 
         results = db.service_get_all_compute_sorted(elevated)
+        in_isolation = instance_opts['image_ref'] in FLAGS.isolated_images
+        check_cores = not in_isolation or not FLAGS.skip_isolated_core_check
         if zone:
             results = [(service, cores) for (service, cores) in results
                        if service['availability_zone'] == zone]
         for result in results:
             (service, instance_cores) = result
-            if instance_cores + instance_opts['vcpus'] > FLAGS.max_cores:
+            if in_isolation and service['host'] not in FLAGS.isolated_hosts:
+                # isloated images run on isolated hosts
+                continue
+            if service['host'] in FLAGS.isolated_hosts and not in_isolation:
+                # images that aren't isolated only run on general hosts
+                continue
+            if check_cores and \
+                    instance_cores + instance_opts['vcpus'] > FLAGS.max_cores:
                 msg = _("Not enough allocatable CPU cores remaining")
                 raise exception.NoValidHost(reason=msg)
             if self.service_is_up(service):
diff --git a/nova/tests/scheduler/test_scheduler.py b/nova/tests/scheduler/test_scheduler.py
index 57218fca3..48f678d56 100644
--- a/nova/tests/scheduler/test_scheduler.py
+++ b/nova/tests/scheduler/test_scheduler.py
@@ -57,7 +57,8 @@ def _create_instance_dict(**kwargs):
     inst = {}
     # NOTE(jk0): If an integer is passed as the image_ref, the image
     # service will use the default image service (in this case, the fake).
-    inst['image_ref'] = 'cedef40a-ed67-4d10-800e-17455edce175'
+    inst['image_ref'] = kwargs.get('image_ref',
+                                   'cedef40a-ed67-4d10-800e-17455edce175')
     inst['reservation_id'] = 'r-fakeres'
     inst['user_id'] = kwargs.get('user_id', 'admin')
     inst['project_id'] = kwargs.get('project_id', 'fake')
@@ -791,6 +792,108 @@ class SimpleDriverTestCase(test.TestCase):
         compute1.terminate_instance(self.context, instance_uuids[0])
         compute1.kill()
 
+    def test_isolation_of_images(self):
+        self.flags(isolated_images=['hotmess'], isolated_hosts=['host1'])
+        compute1 = self.start_service('compute', host='host1')
+        compute2 = self.start_service('compute', host='host2')
+        instance = _create_instance()
+        compute1.run_instance(self.context, instance['uuid'])
+        global instance_uuids
+        instance_uuids = []
+        self.stubs.Set(SimpleScheduler,
+                'create_instance_db_entry', _fake_create_instance_db_entry)
+        global _picked_host
+        _picked_host = None
+        self.stubs.Set(driver,
+                'cast_to_compute_host', _fake_cast_to_compute_host)
+        request_spec = _create_request_spec(image_ref='hotmess')
+        self.scheduler.driver.schedule_run_instance(self.context, request_spec)
+        self.assertEqual(_picked_host, 'host1')
+        self.assertEqual(len(instance_uuids), 1)
+        compute1.terminate_instance(self.context, instance['uuid'])
+        compute1.terminate_instance(self.context, instance_uuids[0])
+        compute1.kill()
+        compute2.kill()
+
+    def test_non_isolation_of_not_isolated_images(self):
+        self.flags(isolated_images=['hotmess'], isolated_hosts=['host1'])
+        compute1 = self.start_service('compute', host='host1')
+        compute2 = self.start_service('compute', host='host2')
+        instance = _create_instance()
+        compute2.run_instance(self.context, instance['uuid'])
+        global instance_uuids
+        instance_uuids = []
+        self.stubs.Set(SimpleScheduler,
+                'create_instance_db_entry', _fake_create_instance_db_entry)
+        global _picked_host
+        _picked_host = None
+        self.stubs.Set(driver,
+                'cast_to_compute_host', _fake_cast_to_compute_host)
+        request_spec = _create_request_spec()
+        self.scheduler.driver.schedule_run_instance(self.context, request_spec)
+        self.assertEqual(_picked_host, 'host2')
+        self.assertEqual(len(instance_uuids), 1)
+        compute2.terminate_instance(self.context, instance['uuid'])
+        compute2.terminate_instance(self.context, instance_uuids[0])
+        compute1.kill()
+        compute2.kill()
+
+    def test_isolated_images_are_resource_bound(self):
+        """Ensures we don't go over max cores"""
+        self.flags(isolated_images=['hotmess'], isolated_hosts=['host1'])
+        compute1 = self.start_service('compute', host='host1')
+        instance_uuids1 = []
+        for index in xrange(FLAGS.max_cores):
+            instance = _create_instance()
+            compute1.run_instance(self.context, instance['uuid'])
+            instance_uuids1.append(instance['uuid'])
+
+        def _create_instance_db_entry(simple_self, context, request_spec):
+            self.fail(_("Shouldn't try to create DB entry when at "
+                    "max cores"))
+        self.stubs.Set(SimpleScheduler,
+                'create_instance_db_entry', _create_instance_db_entry)
+
+        global _picked_host
+        _picked_host = None
+        self.stubs.Set(driver,
+                'cast_to_compute_host', _fake_cast_to_compute_host)
+
+        request_spec = _create_request_spec()
+
+        self.assertRaises(exception.NoValidHost,
+                          self.scheduler.driver.schedule_run_instance,
+                          self.context,
+                          request_spec)
+        for instance_uuid in instance_uuids1:
+            compute1.terminate_instance(self.context, instance_uuid)
+        compute1.kill()
+
+    def test_isolated_images_disable_resource_checking(self):
+        self.flags(isolated_images=['hotmess'], isolated_hosts=['host1'],
+                   skip_isolated_core_check=True)
+        compute1 = self.start_service('compute', host='host1')
+        global instance_uuids
+        instance_uuids = []
+        for index in xrange(FLAGS.max_cores):
+            instance = _create_instance()
+            compute1.run_instance(self.context, instance['uuid'])
+            instance_uuids.append(instance['uuid'])
+
+        self.stubs.Set(SimpleScheduler,
+                'create_instance_db_entry', _fake_create_instance_db_entry)
+        global _picked_host
+        _picked_host = None
+        self.stubs.Set(driver,
+                'cast_to_compute_host', _fake_cast_to_compute_host)
+        request_spec = _create_request_spec(image_ref='hotmess')
+        self.scheduler.driver.schedule_run_instance(self.context, request_spec)
+        self.assertEqual(_picked_host, 'host1')
+        self.assertEqual(len(instance_uuids), FLAGS.max_cores + 1)
+        for instance_uuid in instance_uuids:
+            compute1.terminate_instance(self.context, instance_uuid)
+        compute1.kill()
+
     def test_too_many_cores(self):
         """Ensures we don't go over max cores"""
         compute1 = self.start_service('compute', host='host1')
author	Todd Willey <xtoddx@gmail.com>	2012-01-08 12:51:35 -0500
committer	Todd Willey <xtoddx@gmail.com>	2012-01-08 15:36:56 -0500
commit	e231a055c2a73f82597b0b2a690f2fb28472a99f (patch)
tree	161c832dde50c4596a8098c1c8e8d3ef1bf1e19a /nova
parent	410d8e8b3191021513648e98ed11980dfe968ab9 (diff)