From 8e57055cecef909b4d210baeedb5dad2d155a0a2 Mon Sep 17 00:00:00 2001 From: masumotok Date: Fri, 6 Jan 2012 23:54:54 +0900 Subject: First implementation of bp/live-migration-resource-calc Fix based on revewer's comment upgraded the migration version nova/db/sqlalchemy/migrate_repo/versions/069_block_migration.py rebase on master Change-Id: Ia762f8dec761c3d595bc6fcd39f127f6d92306d2 --- Authors | 2 + bin/nova-manage | 107 ++++++++++++-------- nova/compute/manager.py | 22 +++- .../migrate_repo/versions/069_block_migration.py | 50 ++++++++++ nova/db/sqlalchemy/models.py | 1 + nova/exception.py | 4 + nova/scheduler/driver.py | 111 +++++++++++++++------ nova/scheduler/manager.py | 3 + nova/scheduler/multi.py | 2 +- nova/tests/fake_libvirt_utils.py | 2 +- nova/tests/scheduler/test_scheduler.py | 54 ++++++++-- nova/tests/test_compute.py | 4 + nova/tests/test_instance_types.py | 2 +- nova/tests/test_libvirt.py | 94 +++++------------ nova/tests/test_virt_drivers.py | 5 - nova/virt/fake.py | 6 +- nova/virt/libvirt/connection.py | 77 +++++++++----- 17 files changed, 357 insertions(+), 189 deletions(-) create mode 100644 nova/db/sqlalchemy/migrate_repo/versions/069_block_migration.py diff --git a/Authors b/Authors index e6eda9240..c7b1e894f 100644 --- a/Authors +++ b/Authors @@ -88,6 +88,7 @@ Justin Shepherd Kei Masumoto Keisuke Tagami masumoto +masukotm Ken Pepple Kevin Bringard Kevin L. Mitchell @@ -138,6 +139,7 @@ Stephanie Reese Thierry Carrez Tim Simpson Todd Willey +Tomoya Masuko Trey Morris Troy Toman Tushar Patil diff --git a/bin/nova-manage b/bin/nova-manage index 6147b1202..3636d99ee 100755 --- a/bin/nova-manage +++ b/bin/nova-manage @@ -980,7 +980,8 @@ class VmCommands(object): instance['availability_zone'], instance['launch_index']) - def _migration(self, ec2_id, dest, block_migration=False): + def _migration(self, ec2_id, dest, block_migration=False, + disk_over_commit=False): """Migrates a running instance to a new machine. :param ec2_id: instance id which comes from euca-describe-instance. :param dest: destination host name. @@ -1007,7 +1008,8 @@ class VmCommands(object): "args": {"instance_id": instance_id, "dest": dest, "topic": FLAGS.compute_topic, - "block_migration": block_migration}}) + "block_migration": block_migration, + "disk_over_commit": disk_over_commit}}) print _('Migration of %s initiated.' 'Check its progress using euca-describe-instances.') % ec2_id @@ -1022,11 +1024,14 @@ class VmCommands(object): @args('--ec2_id', dest='ec2_id', metavar='', help='EC2 ID') @args('--dest', dest='dest', metavar='', - help='destanation node') - def block_migration(self, ec2_id, dest): + help='destanation node') + @args('--disk_over_commit', dest='disk_over_commit', + metavar='', + help='Allow overcommit (default Flase)') + def block_migration(self, ec2_id, dest, disk_over_commit=False): """Migrates a running instance to a new machine with storage data.""" - self._migration(ec2_id, dest, True) + self._migration(ec2_id, dest, True, disk_over_commit) class ServiceCommands(object): @@ -1091,8 +1096,11 @@ class ServiceCommands(object): @args('--host', dest='host', metavar='', help='Host') def describe_resource(self, host): - """Describes cpu/memory/hdd info for host.""" + """Describes cpu/memory/hdd info for host. + + :param host: hostname. + """ result = rpc.call(context.get_admin_context(), FLAGS.scheduler_topic, {"method": "show_host_resources", @@ -1102,49 +1110,66 @@ class ServiceCommands(object): print _('An unexpected error has occurred.') print _('[Result]'), result else: - cpu = result['resource']['vcpus'] - mem = result['resource']['memory_mb'] - hdd = result['resource']['local_gb'] - cpu_u = result['resource']['vcpus_used'] - mem_u = result['resource']['memory_mb_used'] - hdd_u = result['resource']['local_gb_used'] - + # Printing a total and used_now + # (NOTE)The host name width 16 characters + print '%(a)-25s%(b)16s%(c)8s%(d)8s%(e)8s' % {"a": _('HOST'), + "b": _('PROJECT'), + "c": _('cpu'), + "d": _('mem(mb)'), + "e": _('hdd')} + print '%(a)-16s(total)%(b)26s%(c)8s%(d)8s' %\ + {"a": host, + "b": result['resource']['vcpus'], + "c": result['resource']['memory_mb'], + "d": result['resource']['local_gb']} + + print '%(a)-16s(used_now)%(b)23s%(c)8s%(d)8s' %\ + {"a": host, + "b": result['resource']['vcpus_used'], + "c": result['resource']['memory_mb_used'], + "d": result['resource']['local_gb_used']} + + # Printing a used_max cpu_sum = 0 mem_sum = 0 hdd_sum = 0 - print 'HOST\t\t\tPROJECT\t\tcpu\tmem(mb)\tdisk(gb)' - print '%s(total)\t\t\t%s\t%s\t%s' % (host, cpu, mem, hdd) - print '%s(used_now)\t\t\t%s\t%s\t%s' % (host, cpu_u, mem_u, hdd_u) - for p_id, val in result['usage'].items(): - cpu_sum += val['vcpus'] - mem_sum += val['memory_mb'] - hdd_sum += val['local_gb'] - print '%s(used_max)\t\t\t%s\t%s\t%s' % (host, cpu_sum, - mem_sum, hdd_sum) + ctxt = context.get_admin_context() + instance_refs = db.instance_get_all_by_host(ctxt, host) - for p_id, val in result['usage'].items(): - print '%s\t\t%s\t\t%s\t%s\t%s' % (host, - p_id, - val['vcpus'], - val['memory_mb'], - val['local_gb']) + project_ids = [i['project_id'] for i in instance_refs] + project_ids = list(set(project_ids)) + usage = dict() + for project_id in project_ids: + vcpus = [i['vcpus'] for i in instance_refs \ + if i['project_id'] == project_id] - @args('--host', dest='host', metavar='', help='Host') - def update_resource(self, host): - """Updates available vcpu/memory/disk info for host.""" + mem = [i['memory_mb'] for i in instance_refs \ + if i['project_id'] == project_id] - ctxt = context.get_admin_context() - service_refs = db.service_get_all_by_host(ctxt, host) - if len(service_refs) <= 0: - raise exception.Invalid(_('%s does not exist.') % host) + disk = [i['local_gb'] for i in instance_refs \ + if i['project_id'] == project_id] - service_refs = [s for s in service_refs if s['topic'] == 'compute'] - if len(service_refs) <= 0: - raise exception.Invalid(_('%s is not compute node.') % host) + usage[project_id] = { + 'vcpus': reduce(lambda x, y: x + y, vcpus), + 'memory_mb': reduce(lambda x, y: x + y, mem), + 'local_gb': reduce(lambda x, y: x + y, disk)} - rpc.call(ctxt, - db.queue_get_for(ctxt, FLAGS.compute_topic, host), - {"method": "update_available_resource"}) + for p_id, val in usage.items(): + cpu_sum += val['vcpus'] + mem_sum += val['memory_mb'] + hdd_sum += val['local_gb'] + print '%(a)-16s(used_max)%(b)23s%(c)8s%(d)8s' % {"a": host, + "b": cpu_sum, + "c": mem_sum, + "d": hdd_sum} + + for p_id, val in usage.items(): + print '%(a)-25s%(b)16s%(c)8s%(d)8s%(e)8s' %\ + {"a": host, + "b": p_id, + "c": val['vcpus'], + "d": val['memory_mb'], + "e": val['local_gb']} class HostCommands(object): diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 58b179464..2e24ffbae 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -1663,6 +1663,17 @@ class ComputeManager(manager.SchedulerDependentManager): """ return self.driver.update_available_resource(context, self.host) + def get_instance_disk_info(self, context, instance_name): + """Getting infomation of instance's current disk. + + Implementation nova.virt.libvirt.connection. + + :param context: security context + :param instance_name: instance name + + """ + return self.driver.get_instance_disk_info(instance_name) + def pre_live_migration(self, context, instance_id, time=None, block_migration=False, disk=None): """Preparations for live migration at dest host. @@ -1735,7 +1746,7 @@ class ComputeManager(manager.SchedulerDependentManager): :param context: security context :param instance_id: nova.db.sqlalchemy.models.Instance.Id :param dest: destination host - :param block_migration: if true, do block migration + :param block_migration: if true, prepare for block migration """ # Get instance for error handling. @@ -1751,8 +1762,7 @@ class ComputeManager(manager.SchedulerDependentManager): "args": {'instance_id': instance_id}}) if block_migration: - disk = self.driver.get_instance_disk_info(context, - instance_ref) + disk = self.driver.get_instance_disk_info(instance_ref.name) else: disk = None @@ -1790,7 +1800,7 @@ class ComputeManager(manager.SchedulerDependentManager): :param ctxt: security context :param instance_id: nova.db.sqlalchemy.models.Instance.Id :param dest: destination host - :param block_migration: if true, do block migration + :param block_migration: if true, prepare for block migration """ @@ -1879,7 +1889,7 @@ class ComputeManager(manager.SchedulerDependentManager): :param context: security context :param instance_id: nova.db.sqlalchemy.models.Instance.Id - :param block_migration: block_migration + :param block_migration: if true, prepare for block migration """ instance_ref = self.db.instance_get(context, instance_id) @@ -1900,6 +1910,8 @@ class ComputeManager(manager.SchedulerDependentManager): :param dest: This method is called from live migration src host. This param specifies destination host. + :param block_migration: if true, prepare for block migration + """ host = instance_ref['host'] self._instance_update(context, diff --git a/nova/db/sqlalchemy/migrate_repo/versions/069_block_migration.py b/nova/db/sqlalchemy/migrate_repo/versions/069_block_migration.py new file mode 100644 index 000000000..a16cd4dc8 --- /dev/null +++ b/nova/db/sqlalchemy/migrate_repo/versions/069_block_migration.py @@ -0,0 +1,50 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# Copyright 2010 United States Government as represented by the +# Administrator of the National Aeronautics and Space Administration. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from sqlalchemy import Boolean, Column, DateTime, Integer, MetaData +from sqlalchemy import Table, Text +from nova import log as logging + +meta = MetaData() + +# Add disk_available_least column to compute_nodes table. +# Thinking about qcow2 image support, both compressed and virtual disk size +# has to be considered. +# disk_available stores "total disk size - used disk(compressed disk size)", +# while disk_available_least stores +# "total disk size - used disk(virtual disk size)". +# virtual disk size is used for kvm block migration. + +compute_nodes = Table('compute_nodes', meta, + Column('id', Integer(), primary_key=True, nullable=False)) + +disk_available_least = Column('disk_available_least', Integer(), default=0) + + +def upgrade(migrate_engine): + meta.bind = migrate_engine + try: + compute_nodes.create_column(disk_available_least) + except Exception: + logging.error(_("progress column not added to compute_nodes table")) + raise + + +def downgrade(migrate_engine): + meta.bind = migrate_engine + compute_nodes.drop_column(disk_available_least) diff --git a/nova/db/sqlalchemy/models.py b/nova/db/sqlalchemy/models.py index cdcef7179..92c9bb27f 100644 --- a/nova/db/sqlalchemy/models.py +++ b/nova/db/sqlalchemy/models.py @@ -147,6 +147,7 @@ class ComputeNode(BASE, NovaBase): # above, since it is copied from tag of getCapabilities() # (See libvirt.virtConnection). cpu_info = Column(Text, nullable=True) + disk_available_least = Column(Integer) class Certificate(BASE, NovaBase): diff --git a/nova/exception.py b/nova/exception.py index 870ae80e4..716f86dc1 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -299,6 +299,10 @@ class UnableToMigrateToSelf(Invalid): "to current host (%(host)s).") +class DestinationHostUnavailable(Invalid): + message = _("Destination compute host is unavailable at this time.") + + class SourceHostUnavailable(Invalid): message = _("Original compute host is unavailable at this time.") diff --git a/nova/scheduler/driver.py b/nova/scheduler/driver.py index 85acf6a09..b3b598de9 100644 --- a/nova/scheduler/driver.py +++ b/nova/scheduler/driver.py @@ -169,12 +169,16 @@ class Scheduler(object): raise NotImplementedError(_("Must implement a fallback schedule")) def schedule_live_migration(self, context, instance_id, dest, - block_migration=False): + block_migration=False, + disk_over_commit=False): """Live migration scheduling method. - :param context: :param instance_id: :param dest: destination host + :param block_migration: if true, block_migration. + :param disk_over_commit: if True, consider real(not virtual) + disk size. + :return: The host where instance is running currently. Then scheduler send request that host. @@ -187,10 +191,12 @@ class Scheduler(object): # Checking destination host. self._live_migration_dest_check(context, instance_ref, - dest, block_migration) + dest, block_migration, + disk_over_commit) # Common checking. self._live_migration_common_check(context, instance_ref, - dest, block_migration) + dest, block_migration, + disk_over_commit) # Changing instance_state. values = {"vm_state": vm_states.MIGRATING} @@ -238,13 +244,15 @@ class Scheduler(object): raise exception.ComputeServiceUnavailable(host=src) def _live_migration_dest_check(self, context, instance_ref, dest, - block_migration): + block_migration, disk_over_commit): """Live migration check routine (for destination host). :param context: security context :param instance_ref: nova.db.sqlalchemy.models.Instance object :param dest: destination host - + :param block_migration: if true, block_migration. + :param disk_over_commit: if True, consider real(not virtual) + disk size. """ # Checking dest exists and compute node. @@ -267,10 +275,11 @@ class Scheduler(object): self.assert_compute_node_has_enough_resources(context, instance_ref, dest, - block_migration) + block_migration, + disk_over_commit) def _live_migration_common_check(self, context, instance_ref, dest, - block_migration): + block_migration, disk_over_commit): """Live migration common check routine. Below checkings are followed by @@ -279,7 +288,9 @@ class Scheduler(object): :param context: security context :param instance_ref: nova.db.sqlalchemy.models.Instance object :param dest: destination host - :param block_migration if True, check for block_migration. + :param block_migration: if true, block_migration. + :param disk_over_commit: if True, consider real(not virtual) + disk size. """ @@ -300,7 +311,7 @@ class Scheduler(object): "and %(dest)s.") % locals()) raise - # Checking dest exists. + # Checking destination host exists. dservice_refs = db.service_get_all_compute_by_host(context, dest) dservice_ref = dservice_refs[0]['compute_node'][0] @@ -338,20 +349,26 @@ class Scheduler(object): raise def assert_compute_node_has_enough_resources(self, context, instance_ref, - dest, block_migration): + dest, block_migration, + disk_over_commit): """Checks if destination host has enough resource for live migration. :param context: security context :param instance_ref: nova.db.sqlalchemy.models.Instance object :param dest: destination host - :param block_migration: if True, disk checking has been done + :param block_migration: if true, block_migration. + :param disk_over_commit: if True, consider real(not virtual) + disk size. """ - self.assert_compute_node_has_enough_memory(context, instance_ref, dest) + self.assert_compute_node_has_enough_memory(context, + instance_ref, dest) if not block_migration: return - self.assert_compute_node_has_enough_disk(context, instance_ref, dest) + self.assert_compute_node_has_enough_disk(context, + instance_ref, dest, + disk_over_commit) def assert_compute_node_has_enough_memory(self, context, instance_ref, dest): @@ -364,7 +381,7 @@ class Scheduler(object): """ - # Getting total available memory and disk of host + # Getting total available memory of host avail = self._get_compute_info(context, dest, 'memory_mb') # Getting total used memory and disk of host @@ -385,35 +402,65 @@ class Scheduler(object): "instance:%(mem_inst)s)") raise exception.MigrationError(reason=reason % locals()) - def assert_compute_node_has_enough_disk(self, context, - instance_ref, dest): + def assert_compute_node_has_enough_disk(self, context, instance_ref, dest, + disk_over_commit): """Checks if destination host has enough disk for block migration. :param context: security context :param instance_ref: nova.db.sqlalchemy.models.Instance object :param dest: destination host + :param disk_over_commit: if True, consider real(not virtual) + disk size. """ - # Getting total available memory and disk of host - avail = self._get_compute_info(context, dest, 'local_gb') + # Libvirt supports qcow2 disk format,which is usually compressed + # on compute nodes. + # Real disk image (compressed) may enlarged to "virtual disk size", + # that is specified as the maximum disk size. + # (See qemu-img -f path-to-disk) + # Scheduler recognizes destination host still has enough disk space + # if real disk size < available disk size + # if disk_over_commit is True, + # otherwise virtual disk size < available disk size. + + # Refresh compute_nodes table + topic = db.queue_get_for(context, FLAGS.compute_topic, dest) + rpc.call(context, topic, + {"method": "update_available_resource"}) + + # Getting total available disk of host + available_gb = self._get_compute_info(context, + dest, 'disk_available_least') + available = available_gb * (1024 ** 3) + + # Getting necessary disk size + try: + topic = db.queue_get_for(context, FLAGS.compute_topic, + instance_ref['host']) + ret = rpc.call(context, topic, + {"method": 'get_instance_disk_info', + "args": {'instance_name': instance_ref.name}}) + disk_infos = utils.loads(ret) + except rpc.RemoteError: + LOG.exception(_("host %(dest)s is not compatible with " + "original host %(src)s.") % locals()) + raise - # Getting total used memory and disk of host - # It should be sum of disks that are assigned as max value - # because overcommiting is risky. - used = 0 - instance_refs = db.instance_get_all_by_host(context, dest) - used_list = [i['local_gb'] for i in instance_refs] - if used_list: - used = reduce(lambda x, y: x + y, used_list) + necessary = 0 + if disk_over_commit: + for info in disk_infos: + necessary += int(info['disk_size']) + else: + for info in disk_infos: + necessary += int(info['virt_disk_size']) - disk_inst = instance_ref['local_gb'] - avail = avail - used - if avail <= disk_inst: + # Check that available disk > necessary disk + if (available - necessary) < 0: instance_id = ec2utils.id_to_ec2_id(instance_ref['id']) reason = _("Unable to migrate %(instance_id)s to %(dest)s: " - "Lack of disk(host:%(avail)s " - "<= instance:%(disk_inst)s)") + "Lack of disk(host:%(available)s " + "<= instance:%(necessary)s)") raise exception.MigrationError(reason=reason % locals()) def _get_compute_info(self, context, host, key): diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py index 7edc4e4f2..c74988cda 100644 --- a/nova/scheduler/manager.py +++ b/nova/scheduler/manager.py @@ -148,6 +148,9 @@ class SchedulerManager(manager.Manager): 'local_gb_used': 64} """ + # Update latest compute_node table + topic = db.queue_get_for(context, FLAGS.compute_topic, host) + rpc.call(context, topic, {"method": "update_available_resource"}) # Getting compute node info and related instances info compute_ref = db.service_get_all_compute_by_host(context, host) diff --git a/nova/scheduler/multi.py b/nova/scheduler/multi.py index 511a8fa47..83ecd8e63 100644 --- a/nova/scheduler/multi.py +++ b/nova/scheduler/multi.py @@ -20,7 +20,6 @@ """ Scheduler that allows routing some calls to one driver and others to another. """ - from nova import flags from nova import utils from nova.scheduler import driver @@ -39,6 +38,7 @@ flags.DEFINE_string('volume_scheduler_driver', _METHOD_MAP = {'run_instance': 'compute', 'start_instance': 'compute', 'prep_resize': 'compute', + 'live_migration': 'compute', 'create_volume': 'volume', 'create_volumes': 'volume'} diff --git a/nova/tests/fake_libvirt_utils.py b/nova/tests/fake_libvirt_utils.py index 085a61bc9..b05f11178 100644 --- a/nova/tests/fake_libvirt_utils.py +++ b/nova/tests/fake_libvirt_utils.py @@ -33,7 +33,7 @@ def get_disk_size(path): return disk_sizes.get(path, 1024 * 1024 * 20) -def get_backing_file(path): +def get_disk_backing_file(path): return disk_backing_files.get(path, None) diff --git a/nova/tests/scheduler/test_scheduler.py b/nova/tests/scheduler/test_scheduler.py index 48f678d56..550964844 100644 --- a/nova/tests/scheduler/test_scheduler.py +++ b/nova/tests/scheduler/test_scheduler.py @@ -36,6 +36,7 @@ from nova import rpc from nova import utils from nova.scheduler import api from nova.scheduler import driver +from nova.scheduler import zone_manager from nova.scheduler import manager from nova.scheduler.simple import SimpleScheduler from nova.compute import power_state @@ -999,9 +1000,9 @@ class SimpleDriverTestCase(test.TestCase): self.mox.StubOutWithMock(driver_i, '_live_migration_common_check') driver_i._live_migration_src_check(nocare, nocare) driver_i._live_migration_dest_check(nocare, nocare, - i_ref['host'], False) + i_ref['host'], False, False) driver_i._live_migration_common_check(nocare, nocare, - i_ref['host'], False) + i_ref['host'], False, False) self.mox.StubOutWithMock(rpc, 'cast', use_mock_anything=True) kwargs = {'instance_id': instance_id, 'dest': i_ref['host'], 'block_migration': False} @@ -1013,7 +1014,8 @@ class SimpleDriverTestCase(test.TestCase): self.scheduler.live_migration(self.context, FLAGS.compute_topic, instance_id=instance_id, dest=i_ref['host'], - block_migration=False) + block_migration=False, + disk_over_commit=False) i_ref = db.instance_get(self.context, instance_id) self.assertTrue(i_ref['vm_state'] == vm_states.MIGRATING) @@ -1095,7 +1097,22 @@ class SimpleDriverTestCase(test.TestCase): self.assertRaises(exception.ComputeServiceUnavailable, self.scheduler.driver._live_migration_dest_check, - self.context, i_ref, i_ref['host'], False) + self.context, i_ref, i_ref['host'], False, False) + + db.instance_destroy(self.context, instance_id) + db.service_destroy(self.context, s_ref['id']) + + def test_live_migration_dest_check_not_alive(self): + """Confirms exception raises in case dest host does not exist.""" + instance_id = _create_instance()['id'] + i_ref = db.instance_get(self.context, instance_id) + t = utils.utcnow() - datetime.timedelta(10) + s_ref = self._create_compute_service(created_at=t, updated_at=t, + host=i_ref['host']) + + self.assertRaises(exception.ComputeServiceUnavailable, + self.scheduler.driver._live_migration_dest_check, + self.context, i_ref, i_ref['host'], False, False) db.instance_destroy(self.context, instance_id) db.service_destroy(self.context, s_ref['id']) @@ -1108,7 +1125,7 @@ class SimpleDriverTestCase(test.TestCase): self.assertRaises(exception.UnableToMigrateToSelf, self.scheduler.driver._live_migration_dest_check, - self.context, i_ref, i_ref['host'], False) + self.context, i_ref, i_ref['host'], False, False) db.instance_destroy(self.context, instance_id) db.service_destroy(self.context, s_ref['id']) @@ -1123,7 +1140,7 @@ class SimpleDriverTestCase(test.TestCase): self.assertRaises(exception.MigrationError, self.scheduler.driver._live_migration_dest_check, - self.context, i_ref, 'somewhere', False) + self.context, i_ref, 'somewhere', False, False) db.instance_destroy(self.context, instance_id) db.instance_destroy(self.context, instance_id2) @@ -1139,7 +1156,7 @@ class SimpleDriverTestCase(test.TestCase): self.assertRaises(exception.MigrationError, self.scheduler.driver._live_migration_dest_check, - self.context, i_ref, 'somewhere', True) + self.context, i_ref, 'somewhere', True, False) db.instance_destroy(self.context, instance_id) db.instance_destroy(self.context, instance_id2) @@ -1155,7 +1172,7 @@ class SimpleDriverTestCase(test.TestCase): ret = self.scheduler.driver._live_migration_dest_check(self.context, i_ref, 'somewhere', - False) + False, False) self.assertTrue(ret is None) db.instance_destroy(self.context, instance_id) db.service_destroy(self.context, s_ref['id']) @@ -1191,7 +1208,7 @@ class SimpleDriverTestCase(test.TestCase): #self.assertRaises(exception.SourceHostUnavailable, self.assertRaises(exception.FileNotFound, self.scheduler.driver._live_migration_common_check, - self.context, i_ref, dest, False) + self.context, i_ref, dest, False, False) db.instance_destroy(self.context, instance_id) db.service_destroy(self.context, s_ref['id']) @@ -1215,7 +1232,7 @@ class SimpleDriverTestCase(test.TestCase): self.mox.ReplayAll() self.assertRaises(exception.InvalidHypervisorType, self.scheduler.driver._live_migration_common_check, - self.context, i_ref, dest, False) + self.context, i_ref, dest, False, False) db.instance_destroy(self.context, instance_id) db.service_destroy(self.context, s_ref['id']) @@ -1241,7 +1258,7 @@ class SimpleDriverTestCase(test.TestCase): self.mox.ReplayAll() self.assertRaises(exception.DestinationHypervisorTooOld, self.scheduler.driver._live_migration_common_check, - self.context, i_ref, dest, False) + self.context, i_ref, dest, False, False) db.instance_destroy(self.context, instance_id) db.service_destroy(self.context, s_ref['id']) @@ -1275,6 +1292,7 @@ class SimpleDriverTestCase(test.TestCase): driver._live_migration_common_check(self.context, i_ref, dest, + False, False) except rpc.RemoteError, e: c = (e.exc_type == exception.InvalidCPUInfo) @@ -1284,6 +1302,20 @@ class SimpleDriverTestCase(test.TestCase): db.service_destroy(self.context, s_ref['id']) db.service_destroy(self.context, s_ref2['id']) + def test_exception_puts_instance_in_error_state(self): + """Test that an exception from the scheduler puts an instance + in the ERROR state.""" + + scheduler = manager.SchedulerManager() + ctxt = context.get_admin_context() + inst = _create_instance() + self.assertRaises(Exception, scheduler._schedule, + 'failing_method', ctxt, 'scheduler', + instance_id=inst['uuid']) + + # Refresh the instance + inst = db.instance_get(ctxt, inst['id']) + class MultiDriverTestCase(SimpleDriverTestCase): """Test case for multi driver.""" diff --git a/nova/tests/test_compute.py b/nova/tests/test_compute.py index 56906b81f..bf36e7431 100644 --- a/nova/tests/test_compute.py +++ b/nova/tests/test_compute.py @@ -1194,6 +1194,10 @@ class ComputeTestCase(BaseTestCase): self.mox.StubOutWithMock(rpc, 'call') rpc.call(c, FLAGS.volume_topic, {"method": "check_for_export", "args": {'instance_id': instance_id}}) + + self.mox.StubOutWithMock(self.compute.driver, 'get_instance_disk_info') + self.compute.driver.get_instance_disk_info(inst_ref.name) + rpc.call(c, topic, {"method": "pre_live_migration", "args": {'instance_id': instance_id, 'block_migration': True, diff --git a/nova/tests/test_instance_types.py b/nova/tests/test_instance_types.py index 6c7b9d76e..aa277206f 100644 --- a/nova/tests/test_instance_types.py +++ b/nova/tests/test_instance_types.py @@ -144,7 +144,7 @@ class InstanceTypeTestCase(test.TestCase): instance_types.create(name, 256, 1, 120, 'flavor1') self.assertRaises(exception.ApiError, instance_types.create, - name, 256, 1, 120, 'flavor2') + name, "256", 1, 120, 'flavor2') def test_duplicate_flavorids_fail(self): """Ensures that flavorid duplicates raise ApiError""" diff --git a/nova/tests/test_libvirt.py b/nova/tests/test_libvirt.py index 15b7f7259..782e01563 100644 --- a/nova/tests/test_libvirt.py +++ b/nova/tests/test_libvirt.py @@ -757,67 +757,6 @@ class LibvirtConnTestCase(test.TestCase): self.assertEquals(conn.uri, testuri) db.instance_destroy(user_context, instance_ref['id']) - def test_update_available_resource_works_correctly(self): - """Confirm compute_node table is updated successfully.""" - self.flags(instances_path='.') - - # Prepare mocks - def getVersion(): - return 12003 - - def getType(): - return 'qemu' - - def listDomainsID(): - return [] - - service_ref = self.create_service(host='dummy') - self.create_fake_libvirt_mock(getVersion=getVersion, - getType=getType, - listDomainsID=listDomainsID) - self.mox.StubOutWithMock(connection.LibvirtConnection, - 'get_cpu_info') - connection.LibvirtConnection.get_cpu_info().AndReturn('cpuinfo') - - # Start test - self.mox.ReplayAll() - conn = connection.LibvirtConnection(False) - conn.update_available_resource(self.context, 'dummy') - service_ref = db.service_get(self.context, service_ref['id']) - compute_node = service_ref['compute_node'][0] - - if sys.platform.upper() == 'LINUX2': - self.assertTrue(compute_node['vcpus'] >= 0) - self.assertTrue(compute_node['memory_mb'] > 0) - self.assertTrue(compute_node['local_gb'] > 0) - self.assertTrue(compute_node['vcpus_used'] == 0) - self.assertTrue(compute_node['memory_mb_used'] > 0) - self.assertTrue(compute_node['local_gb_used'] > 0) - self.assertTrue(len(compute_node['hypervisor_type']) > 0) - self.assertTrue(compute_node['hypervisor_version'] > 0) - else: - self.assertTrue(compute_node['vcpus'] >= 0) - self.assertTrue(compute_node['memory_mb'] == 0) - self.assertTrue(compute_node['local_gb'] > 0) - self.assertTrue(compute_node['vcpus_used'] == 0) - self.assertTrue(compute_node['memory_mb_used'] == 0) - self.assertTrue(compute_node['local_gb_used'] > 0) - self.assertTrue(len(compute_node['hypervisor_type']) > 0) - self.assertTrue(compute_node['hypervisor_version'] > 0) - - db.service_destroy(self.context, service_ref['id']) - - def test_update_resource_info_no_compute_record_found(self): - """Raise exception if no recorde found on services table.""" - self.flags(instances_path='.') - self.create_fake_libvirt_mock() - - self.mox.ReplayAll() - conn = connection.LibvirtConnection(False) - self.assertRaises(exception.ComputeServiceUnavailable, - conn.update_available_resource, - self.context, 'dummy') - @test.skip_if(missing_libvirt(), "Test requires libvirt") def test_ensure_filtering_rules_for_instance_timeout(self): """ensure_filtering_fules_for_instance() finishes with timeout.""" @@ -950,7 +889,7 @@ class LibvirtConnTestCase(test.TestCase): # Test data instance_ref = db.instance_create(self.context, self.test_instance) - dummyjson = ('[{"path": "%s/disk", "local_gb": "10G",' + dummyjson = ('[{"path": "%s/disk", "disk_size": "10737418240",' ' "type": "raw", "backing_file": ""}]') # Preparing mocks @@ -984,6 +923,13 @@ class LibvirtConnTestCase(test.TestCase): "" "") + ret = ("image: /test/disk\n" + "file format: raw\n" + "virtual size: 20G (21474836480 bytes)\n" + "disk size: 3.1G\n" + "cluster_size: 2097152\n" + "backing file: /test/dummy (actual path: /backing/file)\n") + # Preparing mocks vdmock = self.mox.CreateMock(libvirt.virDomain) self.mox.StubOutWithMock(vdmock, "XMLDesc") @@ -998,18 +944,27 @@ class LibvirtConnTestCase(test.TestCase): fake_libvirt_utils.disk_sizes['/test/disk'] = 10 * GB fake_libvirt_utils.disk_sizes['/test/disk.local'] = 20 * GB fake_libvirt_utils.disk_backing_files['/test/disk.local'] = 'file' + + self.mox.StubOutWithMock(os.path, "getsize") + os.path.getsize('/test/disk').AndReturn((10737418240)) + + self.mox.StubOutWithMock(utils, "execute") + utils.execute('qemu-img', 'info', '/test/disk.local').\ + AndReturn((ret, '')) + + os.path.getsize('/test/disk.local').AndReturn((21474836480)) + self.mox.ReplayAll() conn = connection.LibvirtConnection(False) - info = conn.get_instance_disk_info(self.context, instance_ref) + info = conn.get_instance_disk_info(instance_ref.name) info = utils.loads(info) - self.assertEquals(info[0]['type'], 'raw') - self.assertEquals(info[1]['type'], 'qcow2') self.assertEquals(info[0]['path'], '/test/disk') - self.assertEquals(info[1]['path'], '/test/disk.local') - self.assertEquals(info[0]['local_gb'], '10G') - self.assertEquals(info[1]['local_gb'], '20G') + self.assertEquals(info[0]['disk_size'], 10737418240) self.assertEquals(info[0]['backing_file'], "") + self.assertEquals(info[1]['type'], 'qcow2') + self.assertEquals(info[1]['path'], '/test/disk.local') + self.assertEquals(info[1]['virt_disk_size'], 21474836480) self.assertEquals(info[1]['backing_file'], "file") db.instance_destroy(self.context, instance_ref['id']) @@ -1188,6 +1143,9 @@ class HostStateTestCase(test.TestCase): def get_hypervisor_version(self): return 13091 + def get_disk_available_least(self): + return 13091 + def test_update_status(self): self.mox.StubOutWithMock(connection, 'get_connection') connection.get_connection(True).AndReturn(self.FakeConnection()) diff --git a/nova/tests/test_virt_drivers.py b/nova/tests/test_virt_drivers.py index 8c5cda6f0..9c17b3b0a 100644 --- a/nova/tests/test_virt_drivers.py +++ b/nova/tests/test_virt_drivers.py @@ -323,11 +323,6 @@ class _VirtDriverTestCase(test.TestCase): instance_ref, network_info = self._get_running_instance() self.connection.refresh_provider_fw_rules() - @catch_notimplementederror - def test_update_available_resource(self): - self.compute = self.start_service('compute', host='dummy') - self.connection.update_available_resource(self.ctxt, 'dummy') - @catch_notimplementederror def test_compare_cpu(self): cpu_info = '''{ "topology": { diff --git a/nova/virt/fake.py b/nova/virt/fake.py index b9fd8f30c..3311834c2 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -283,7 +283,7 @@ class FakeConnection(driver.ComputeDriver): """This method is supported only by libvirt.""" raise NotImplementedError('This method is supported only by libvirt.') - def get_instance_disk_info(self, ctxt, instance_ref): + def get_instance_disk_info(self, instance_name): """This method is supported only by libvirt.""" return @@ -319,3 +319,7 @@ class FakeConnection(driver.ComputeDriver): def set_host_enabled(self, host, enabled): """Sets the specified host's ability to accept new instances.""" pass + + def get_disk_available_least(self): + """ """ + pass diff --git a/nova/virt/libvirt/connection.py b/nova/virt/libvirt/connection.py index cfcda9f84..85c48e495 100644 --- a/nova/virt/libvirt/connection.py +++ b/nova/virt/libvirt/connection.py @@ -1562,7 +1562,8 @@ class LibvirtConnection(driver.ComputeDriver): 'local_gb_used': self.get_local_gb_used(), 'hypervisor_type': self.get_hypervisor_type(), 'hypervisor_version': self.get_hypervisor_version(), - 'cpu_info': self.get_cpu_info()} + 'cpu_info': self.get_cpu_info(), + 'disk_available_least': self.get_disk_available_least()} compute_node_ref = service_ref['compute_node'] if not compute_node_ref: @@ -1773,7 +1774,7 @@ class LibvirtConnection(driver.ComputeDriver): instance_disk = os.path.join(instance_dir, base) if not info['backing_file']: libvirt_utils.create_image(info['type'], instance_disk, - info['local_gb']) + info['disk_size']) else: # Creating backing file follows same way as spawning instances. backing_file = os.path.join(FLAGS.instances_path, @@ -1842,7 +1843,7 @@ class LibvirtConnection(driver.ComputeDriver): dom = self._lookup_by_name(instance_ref.name) self._conn.defineXML(dom.XMLDesc(0)) - def get_instance_disk_info(self, ctxt, instance_ref): + def get_instance_disk_info(self, instance_name): """Preparation block migration. :params ctxt: security context @@ -1851,12 +1852,15 @@ class LibvirtConnection(driver.ComputeDriver): instance object that is migrated. :return: json strings with below format. - "[{'path':'disk', 'type':'raw', 'local_gb':'10G'},...]" + "[{'path':'disk', 'type':'raw', + 'virt_disk_size':'10737418240', + 'backing_file':'backing_file', + 'disk_size':'83886080'},...]" """ disk_info = [] - virt_dom = self._lookup_by_name(instance_ref.name) + virt_dom = self._lookup_by_name(instance_name) xml = virt_dom.XMLDesc(0) doc = ElementTree.fromstring(xml) disk_nodes = doc.findall('.//devices/disk') @@ -1873,31 +1877,58 @@ class LibvirtConnection(driver.ComputeDriver): continue disk_type = driver_nodes[cnt].get('type') - size = libvirt_utils.get_disk_size(path) if disk_type == 'raw': + dk_size = int(os.path.getsize(path)) backing_file = "" + virt_size = 0 else: - backing_file = libvirt_utils.get_backing_file(path) - - # block migration needs same/larger size of empty image on the - # destination host. since qemu-img creates bit smaller size image - # depending on original image size, fixed value is necessary. - for unit, divisor in [('G', 1024 ** 3), ('M', 1024 ** 2), - ('K', 1024), ('', 1)]: - if size / divisor == 0: - continue - if size % divisor != 0: - size = size / divisor + 1 - else: - size = size / divisor - size = str(size) + unit - break + out, err = utils.execute('qemu-img', 'info', path) + + # virtual size: + size = [i.split('(')[1].split()[0] for i in out.split('\n') + if i.strip().find('virtual size') >= 0] + virt_size = int(size[0]) - disk_info.append({'type': disk_type, 'path': path, - 'local_gb': size, 'backing_file': backing_file}) + # real disk size: + dk_size = int(os.path.getsize(path)) + # backing file:(actual path:) + backing_file = libvirt_utils.get_disk_backing_file(path) + + disk_info.append({'type': disk_type, + 'path': path, + 'virt_disk_size': virt_size, + 'backing_file': backing_file, + 'disk_size': dk_size}) return utils.dumps(disk_info) + def get_disk_available_least(self): + """Return disk available least size. + + The size of available disk, when block_migration command given + disk_over_commit param is FALSE. + + The size that deducted real nstance disk size from the total size + of the virtual disk of all instances. + + """ + # available size of the disk + dk_sz_gb = self.get_local_gb_total() - self.get_local_gb_used() + + # Disk size that all instance uses : virtual_size - disk_size + instances_name = self.list_instances() + instances_sz = 0 + for i_name in instances_name: + disk_infos = utils.loads(self.get_instance_disk_info(i_name)) + for info in disk_infos: + i_vt_sz = int(info['virt_disk_size']) + i_dk_sz = int(info['disk_size']) + instances_sz += i_vt_sz - i_dk_sz + + # Disk available least size + available_least_size = dk_sz_gb * (1024 ** 3) - instances_sz + return (available_least_size / 1024 / 1024 / 1024) + def unfilter_instance(self, instance_ref, network_info): """See comments of same method in firewall_driver.""" self.firewall_driver.unfilter_instance(instance_ref, -- cgit