Add support for memory overcommit in live-migration

Override the live-migration memory check in FilterScheduler and make it use RamFilter for checking available memory on target host. Resolves bug 1068258. Change-Id: I3002b8330e595ce71e2718b2091d3b0b99747707
author: Hans Lindgren <hanlind@kth.se> 2013-01-09 16:01:52 +0100
committer: Hans Lindgren <hanlind@kth.se> 2013-01-22 22:55:47 +0100
commit: 3783cf3cc9c571beb9c75e5b0e39bf449520aaf3 (patch)
tree: cd36b4c8e885673fa0a1e19929a58232869bbb23
parent: cd4093e0f2a7d07fa81915dc4866f4ac7324a028 (diff)
4 files changed, 187 insertions, 17 deletions
diff --git a/nova/scheduler/driver.py b/nova/scheduler/driver.py
index 09de10388..98d75076d 100644
--- a/nova/scheduler/driver.py
+++ b/nova/scheduler/driver.py
@@ -263,16 +263,9 @@ class Scheduler(object):
 
         """
         # Getting total available memory of host
-        avail = self._get_compute_info(context, dest)['memory_mb']
-
-        # Getting total used memory and disk of host
-        # It should be sum of memories that are assigned as max value,
-        # because overcommitting is risky.
-        instance_refs = db.instance_get_all_by_host(context, dest)
-        used = sum([i['memory_mb'] for i in instance_refs])
+        avail = self._get_compute_info(context, dest)['free_ram_mb']
 
         mem_inst = instance_ref['memory_mb']
-        avail = avail - used
         if not mem_inst or avail <= mem_inst:
             instance_uuid = instance_ref['uuid']
             reason = _("Unable to migrate %(instance_uuid)s to %(dest)s: "
diff --git a/nova/scheduler/filter_scheduler.py b/nova/scheduler/filter_scheduler.py
index 07a3f578a..8ddc3ad92 100644
--- a/nova/scheduler/filter_scheduler.py
+++ b/nova/scheduler/filter_scheduler.py
@@ -298,3 +298,30 @@ class FilterScheduler(driver.Scheduler):
             # will change for the next instance.
             best_host.obj.consume_from_instance(instance_properties)
         return selected_hosts
+
+    def _assert_compute_node_has_enough_memory(self, context,
+                                              instance_ref, dest):
+        """Checks if destination host has enough memory for live migration.
+
+
+        :param context: security context
+        :param instance_ref: nova.db.sqlalchemy.models.Instance object
+        :param dest: destination host
+
+        """
+        compute = self._get_compute_info(context, dest)
+        node = compute.get('hypervisor_hostname')
+        host_state = self.host_manager.host_state_cls(dest, node)
+        host_state.update_from_compute_node(compute)
+
+        instance_type = instance_ref['instance_type']
+        filter_properties = {'instance_type': instance_type}
+
+        hosts = self.host_manager.get_filtered_hosts([host_state],
+                                                     filter_properties,
+                                                     'RamFilter')
+        if not hosts:
+            instance_uuid = instance_ref['uuid']
+            reason = _("Unable to migrate %(instance_uuid)s to %(dest)s: "
+                       "Lack of memory")
+            raise exception.MigrationError(reason=reason % locals())
diff --git a/nova/tests/scheduler/test_filter_scheduler.py b/nova/tests/scheduler/test_filter_scheduler.py
index 2bd2cb85b..086b6410c 100644
--- a/nova/tests/scheduler/test_filter_scheduler.py
+++ b/nova/tests/scheduler/test_filter_scheduler.py
@@ -19,15 +19,18 @@ Tests For Filter Scheduler.
 import mox
 
 from nova.compute import instance_types
+from nova.compute import rpcapi as compute_rpcapi
 from nova.compute import utils as compute_utils
 from nova.compute import vm_states
 from nova import context
 from nova import db
 from nova import exception
+from nova.openstack.common import rpc
 from nova.scheduler import driver
 from nova.scheduler import filter_scheduler
 from nova.scheduler import host_manager
 from nova.scheduler import weights
+from nova import servicegroup
 from nova.tests.scheduler import fakes
 from nova.tests.scheduler import test_scheduler
 
@@ -339,3 +342,138 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
 
         self.assertEqual([['host', 'node']],
                          filter_properties['retry']['hosts'])
+
+    def test_live_migration_dest_check_service_memory_overcommit(self):
+        # Live-migration should work since default is to overcommit memory.
+        self.mox.StubOutWithMock(self.driver, '_live_migration_src_check')
+        self.mox.StubOutWithMock(db, 'service_get_by_compute_host')
+        self.mox.StubOutWithMock(servicegroup.API, 'service_is_up')
+        self.mox.StubOutWithMock(self.driver, '_get_compute_info')
+        self.mox.StubOutWithMock(self.driver, '_live_migration_common_check')
+        self.mox.StubOutWithMock(rpc, 'call')
+        self.mox.StubOutWithMock(self.driver.compute_rpcapi, 'live_migration')
+
+        dest = 'fake_host2'
+        block_migration = False
+        disk_over_commit = False
+        instance = self._live_migration_instance()
+
+        self.driver._live_migration_src_check(self.context, instance)
+        db.service_get_by_compute_host(self.context,
+                dest).AndReturn('fake_service3')
+        self.servicegroup_api.service_is_up('fake_service3').AndReturn(True)
+
+        self.driver._get_compute_info(self.context, dest).AndReturn(
+                                                       {'memory_mb': 2048,
+                                                        'free_disk_gb': 512,
+                                                        'local_gb_used': 512,
+                                                        'free_ram_mb': 512,
+                                                        'local_gb': 1024,
+                                                        'vcpus': 4,
+                                                        'vcpus_used': 2,
+                                                        'updated_at': None})
+
+        self.driver._live_migration_common_check(self.context, instance, dest)
+
+        rpc.call(self.context, "compute.fake_host2",
+                   {"method": 'check_can_live_migrate_destination',
+                    "args": {'instance': instance,
+                             'block_migration': block_migration,
+                             'disk_over_commit': disk_over_commit},
+                    "version": compute_rpcapi.ComputeAPI.BASE_RPC_API_VERSION},
+                 None).AndReturn({})
+
+        self.driver.compute_rpcapi.live_migration(self.context,
+                host=instance['host'], instance=instance, dest=dest,
+                block_migration=block_migration, migrate_data={})
+
+        self.mox.ReplayAll()
+        result = self.driver.schedule_live_migration(self.context,
+                instance=instance, dest=dest,
+                block_migration=block_migration,
+                disk_over_commit=disk_over_commit)
+        self.assertEqual(result, None)
+
+    def test_live_migration_assert_memory_no_overcommit(self):
+        # Test that memory check passes with no memory overcommit.
+        def fake_get(context, host):
+            return {'memory_mb': 2048,
+                    'free_disk_gb': 512,
+                    'local_gb_used': 512,
+                    'free_ram_mb': 1024,
+                    'local_gb': 1024,
+                    'vcpus': 4,
+                    'vcpus_used': 2,
+                    'updated_at': None}
+
+        self.stubs.Set(self.driver, '_get_compute_info', fake_get)
+
+        self.flags(ram_allocation_ratio=1.0)
+        instance = self._live_migration_instance()
+        dest = 'fake_host2'
+        result = self.driver._assert_compute_node_has_enough_memory(
+                self.context, instance, dest)
+        self.assertEqual(result, None)
+
+    def test_live_migration_assert_memory_no_overcommit_lack_memory(self):
+        # Test that memory check fails with no memory overcommit.
+        def fake_get(context, host):
+            return {'memory_mb': 2048,
+                    'free_disk_gb': 512,
+                    'local_gb_used': 512,
+                    'free_ram_mb': 1023,
+                    'local_gb': 1024,
+                    'vcpus': 4,
+                    'vcpus_used': 2,
+                    'updated_at': None}
+
+        self.stubs.Set(self.driver, '_get_compute_info', fake_get)
+
+        self.flags(ram_allocation_ratio=1.0)
+        instance = self._live_migration_instance()
+        dest = 'fake_host2'
+        self.assertRaises(exception.MigrationError,
+                self.driver._assert_compute_node_has_enough_memory,
+                context, instance, dest)
+
+    def test_live_migration_assert_memory_overcommit(self):
+        # Test that memory check passes with memory overcommit.
+        def fake_get(context, host):
+            return {'memory_mb': 2048,
+                    'free_disk_gb': 512,
+                    'local_gb_used': 512,
+                    'free_ram_mb': -1024,
+                    'local_gb': 1024,
+                    'vcpus': 4,
+                    'vcpus_used': 2,
+                    'updated_at': None}
+
+        self.stubs.Set(self.driver, '_get_compute_info', fake_get)
+
+        self.flags(ram_allocation_ratio=2.0)
+        instance = self._live_migration_instance()
+        dest = 'fake_host2'
+        result = self.driver._assert_compute_node_has_enough_memory(
+                self.context, instance, dest)
+        self.assertEqual(result, None)
+
+    def test_live_migration_assert_memory_overcommit_lack_memory(self):
+        # Test that memory check fails with memory overcommit.
+        def fake_get(context, host):
+            return {'memory_mb': 2048,
+                    'free_disk_gb': 512,
+                    'local_gb_used': 512,
+                    'free_ram_mb': -1025,
+                    'local_gb': 1024,
+                    'vcpus': 4,
+                    'vcpus_used': 2,
+                    'updated_at': None}
+
+        self.stubs.Set(self.driver, '_get_compute_info', fake_get)
+
+        self.flags(ram_allocation_ratio=2.0)
+        instance = self._live_migration_instance()
+        dest = 'fake_host2'
+        self.assertRaises(exception.MigrationError,
+                self.driver._assert_compute_node_has_enough_memory,
+                self.context, instance, dest)
diff --git a/nova/tests/scheduler/test_scheduler.py b/nova/tests/scheduler/test_scheduler.py
index eb4c3864f..7f9db1016 100644
--- a/nova/tests/scheduler/test_scheduler.py
+++ b/nova/tests/scheduler/test_scheduler.py
@@ -322,7 +322,8 @@ class SchedulerTestCase(test.TestCase):
                 'root_gb': 1024,
                 'ephemeral_gb': 0,
                 'vm_state': '',
-                'task_state': ''}
+                'task_state': '',
+                'instance_type': {'memory_mb': 1024}}
 
     def test_live_migration_basic(self):
         # Test basic schedule_live_migration functionality.
@@ -361,9 +362,7 @@ class SchedulerTestCase(test.TestCase):
 
         self.mox.StubOutWithMock(servicegroup.API, 'service_is_up')
         self.mox.StubOutWithMock(db, 'service_get_by_compute_host')
-        self.mox.StubOutWithMock(db, 'instance_get_all_by_host')
         self.mox.StubOutWithMock(rpc, 'call')
-        self.mox.StubOutWithMock(rpc, 'cast')
         self.mox.StubOutWithMock(self.driver.compute_rpcapi,
                                  'live_migration')
 
@@ -384,9 +383,14 @@ class SchedulerTestCase(test.TestCase):
         # assert_compute_node_has_enough_memory()
         db.service_get_by_compute_host(self.context, dest).AndReturn(
                 {'compute_node': [{'memory_mb': 2048,
+                                   'free_disk_gb': 512,
+                                   'local_gb_used': 512,
+                                   'free_ram_mb': 1280,
+                                   'local_gb': 1024,
+                                   'vcpus': 4,
+                                   'vcpus_used': 2,
+                                   'updated_at': None,
                                    'hypervisor_version': 1}]})
-        db.instance_get_all_by_host(self.context, dest).AndReturn(
-                [dict(memory_mb=256), dict(memory_mb=512)])
 
         # Common checks (same hypervisor, etc)
         db.service_get_by_compute_host(self.context, dest).AndReturn(
@@ -529,11 +533,14 @@ class SchedulerTestCase(test.TestCase):
     def test_live_migration_dest_check_service_lack_memory(self):
         # Confirms exception raises when dest doesn't have enough memory.
 
+        # Flag needed to make FilterScheduler test hit memory limit since the
+        # default for it is to allow memory overcommit by a factor of 1.5.
+        self.flags(ram_allocation_ratio=1.0)
+
         self.mox.StubOutWithMock(self.driver, '_live_migration_src_check')
         self.mox.StubOutWithMock(db, 'service_get_by_compute_host')
         self.mox.StubOutWithMock(servicegroup.API, 'service_is_up')
         self.mox.StubOutWithMock(self.driver, '_get_compute_info')
-        self.mox.StubOutWithMock(db, 'instance_get_all_by_host')
 
         dest = 'fake_host2'
         block_migration = False
@@ -546,9 +553,14 @@ class SchedulerTestCase(test.TestCase):
         self.servicegroup_api.service_is_up('fake_service3').AndReturn(True)
 
         self.driver._get_compute_info(self.context, dest).AndReturn(
-                                                       {'memory_mb': 2048})
-        db.instance_get_all_by_host(self.context, dest).AndReturn(
-                [dict(memory_mb=1024), dict(memory_mb=512)])
+                                                       {'memory_mb': 2048,
+                                                        'free_disk_gb': 512,
+                                                        'local_gb_used': 512,
+                                                        'free_ram_mb': 512,
+                                                        'local_gb': 1024,
+                                                        'vcpus': 4,
+                                                        'vcpus_used': 2,
+                                                        'updated_at': None})
 
         self.mox.ReplayAll()
         self.assertRaises(exception.MigrationError,
author	Hans Lindgren <hanlind@kth.se>	2013-01-09 16:01:52 +0100
committer	Hans Lindgren <hanlind@kth.se>	2013-01-22 22:55:47 +0100
commit	3783cf3cc9c571beb9c75e5b0e39bf449520aaf3 (patch)
tree	cd36b4c8e885673fa0a1e19929a58232869bbb23
parent	cd4093e0f2a7d07fa81915dc4866f4ac7324a028 (diff)