summaryrefslogtreecommitdiffstats
path: root/nova/virt
diff options
context:
space:
mode:
Diffstat (limited to 'nova/virt')
-rw-r--r--nova/virt/cpuinfo.xml.template9
-rw-r--r--nova/virt/fake.py21
-rw-r--r--nova/virt/libvirt_conn.py369
-rw-r--r--nova/virt/xenapi/vm_utils.py18
-rw-r--r--nova/virt/xenapi/vmops.py17
-rw-r--r--nova/virt/xenapi_conn.py30
6 files changed, 452 insertions, 12 deletions
diff --git a/nova/virt/cpuinfo.xml.template b/nova/virt/cpuinfo.xml.template
new file mode 100644
index 000000000..48842b29d
--- /dev/null
+++ b/nova/virt/cpuinfo.xml.template
@@ -0,0 +1,9 @@
+<cpu>
+ <arch>$arch</arch>
+ <model>$model</model>
+ <vendor>$vendor</vendor>
+ <topology sockets="$topology.sockets" cores="$topology.cores" threads="$topology.threads"/>
+#for $var in $features
+ <features name="$var" />
+#end for
+</cpu>
diff --git a/nova/virt/fake.py b/nova/virt/fake.py
index c744acf91..3a06284a1 100644
--- a/nova/virt/fake.py
+++ b/nova/virt/fake.py
@@ -407,6 +407,27 @@ class FakeConnection(object):
"""
return True
+ def update_available_resource(self, ctxt, host):
+ """This method is supported only by libvirt."""
+ return
+
+ def compare_cpu(self, xml):
+ """This method is supported only by libvirt."""
+ raise NotImplementedError('This method is supported only by libvirt.')
+
+ def ensure_filtering_rules_for_instance(self, instance_ref):
+ """This method is supported only by libvirt."""
+ raise NotImplementedError('This method is supported only by libvirt.')
+
+ def live_migration(self, context, instance_ref, dest,
+ post_method, recover_method):
+ """This method is supported only by libvirt."""
+ return
+
+ def unfilter_instance(self, instance_ref):
+ """This method is supported only by libvirt."""
+ raise NotImplementedError('This method is supported only by libvirt.')
+
class FakeInstance(object):
diff --git a/nova/virt/libvirt_conn.py b/nova/virt/libvirt_conn.py
index 700a6bf9d..0b306c950 100644
--- a/nova/virt/libvirt_conn.py
+++ b/nova/virt/libvirt_conn.py
@@ -36,10 +36,13 @@ Supports KVM, QEMU, UML, and XEN.
"""
+import multiprocessing
import os
import shutil
+import sys
import random
import subprocess
+import time
import uuid
from xml.dom import minidom
@@ -69,6 +72,7 @@ Template = None
LOG = logging.getLogger('nova.virt.libvirt_conn')
FLAGS = flags.FLAGS
+flags.DECLARE('live_migration_retry_count', 'nova.compute.manager')
# TODO(vish): These flags should probably go into a shared location
flags.DEFINE_string('rescue_image_id', 'ami-rescue', 'Rescue ami image')
flags.DEFINE_string('rescue_kernel_id', 'aki-rescue', 'Rescue aki image')
@@ -99,6 +103,17 @@ flags.DEFINE_string('ajaxterm_portrange',
flags.DEFINE_string('firewall_driver',
'nova.virt.libvirt_conn.IptablesFirewallDriver',
'Firewall driver (defaults to iptables)')
+flags.DEFINE_string('cpuinfo_xml_template',
+ utils.abspath('virt/cpuinfo.xml.template'),
+ 'CpuInfo XML Template (Used only live migration now)')
+flags.DEFINE_string('live_migration_uri',
+ "qemu+tcp://%s/system",
+ 'Define protocol used by live_migration feature')
+flags.DEFINE_string('live_migration_flag',
+ "VIR_MIGRATE_UNDEFINE_SOURCE, VIR_MIGRATE_PEER2PEER",
+ 'Define live migration behavior.')
+flags.DEFINE_integer('live_migration_bandwidth', 0,
+ 'Define live migration behavior')
def get_connection(read_only):
@@ -145,6 +160,7 @@ class LibvirtConnection(object):
self.libvirt_uri = self.get_uri()
self.libvirt_xml = open(FLAGS.libvirt_xml_template).read()
+ self.cpuinfo_xml = open(FLAGS.cpuinfo_xml_template).read()
self._wrapped_conn = None
self.read_only = read_only
@@ -850,6 +866,158 @@ class LibvirtConnection(object):
return interfaces
+ def get_vcpu_total(self):
+ """Get vcpu number of physical computer.
+
+ :returns: the number of cpu core.
+
+ """
+
+ # On certain platforms, this will raise a NotImplementedError.
+ try:
+ return multiprocessing.cpu_count()
+ except NotImplementedError:
+ LOG.warn(_("Cannot get the number of cpu, because this "
+ "function is not implemented for this platform. "
+ "This error can be safely ignored for now."))
+ return 0
+
+ def get_memory_mb_total(self):
+ """Get the total memory size(MB) of physical computer.
+
+ :returns: the total amount of memory(MB).
+
+ """
+
+ if sys.platform.upper() != 'LINUX2':
+ return 0
+
+ meminfo = open('/proc/meminfo').read().split()
+ idx = meminfo.index('MemTotal:')
+ # transforming kb to mb.
+ return int(meminfo[idx + 1]) / 1024
+
+ def get_local_gb_total(self):
+ """Get the total hdd size(GB) of physical computer.
+
+ :returns:
+ The total amount of HDD(GB).
+ Note that this value shows a partition where
+ NOVA-INST-DIR/instances mounts.
+
+ """
+
+ hddinfo = os.statvfs(FLAGS.instances_path)
+ return hddinfo.f_frsize * hddinfo.f_blocks / 1024 / 1024 / 1024
+
+ def get_vcpu_used(self):
+ """ Get vcpu usage number of physical computer.
+
+ :returns: The total number of vcpu that currently used.
+
+ """
+
+ total = 0
+ for dom_id in self._conn.listDomainsID():
+ dom = self._conn.lookupByID(dom_id)
+ total += len(dom.vcpus()[1])
+ return total
+
+ def get_memory_mb_used(self):
+ """Get the free memory size(MB) of physical computer.
+
+ :returns: the total usage of memory(MB).
+
+ """
+
+ if sys.platform.upper() != 'LINUX2':
+ return 0
+
+ m = open('/proc/meminfo').read().split()
+ idx1 = m.index('MemFree:')
+ idx2 = m.index('Buffers:')
+ idx3 = m.index('Cached:')
+ avail = (int(m[idx1 + 1]) + int(m[idx2 + 1]) + int(m[idx3 + 1])) / 1024
+ return self.get_memory_mb_total() - avail
+
+ def get_local_gb_used(self):
+ """Get the free hdd size(GB) of physical computer.
+
+ :returns:
+ The total usage of HDD(GB).
+ Note that this value shows a partition where
+ NOVA-INST-DIR/instances mounts.
+
+ """
+
+ hddinfo = os.statvfs(FLAGS.instances_path)
+ avail = hddinfo.f_frsize * hddinfo.f_bavail / 1024 / 1024 / 1024
+ return self.get_local_gb_total() - avail
+
+ def get_hypervisor_type(self):
+ """Get hypervisor type.
+
+ :returns: hypervisor type (ex. qemu)
+
+ """
+
+ return self._conn.getType()
+
+ def get_hypervisor_version(self):
+ """Get hypervisor version.
+
+ :returns: hypervisor version (ex. 12003)
+
+ """
+
+ return self._conn.getVersion()
+
+ def get_cpu_info(self):
+ """Get cpuinfo information.
+
+ Obtains cpu feature from virConnect.getCapabilities,
+ and returns as a json string.
+
+ :return: see above description
+
+ """
+
+ xml = self._conn.getCapabilities()
+ xml = libxml2.parseDoc(xml)
+ nodes = xml.xpathEval('//cpu')
+ if len(nodes) != 1:
+ raise exception.Invalid(_("Invalid xml. '<cpu>' must be 1,"
+ "but %d\n") % len(nodes)
+ + xml.serialize())
+
+ cpu_info = dict()
+ cpu_info['arch'] = xml.xpathEval('//cpu/arch')[0].getContent()
+ cpu_info['model'] = xml.xpathEval('//cpu/model')[0].getContent()
+ cpu_info['vendor'] = xml.xpathEval('//cpu/vendor')[0].getContent()
+
+ topology_node = xml.xpathEval('//cpu/topology')[0].get_properties()
+ topology = dict()
+ while topology_node != None:
+ name = topology_node.get_name()
+ topology[name] = topology_node.getContent()
+ topology_node = topology_node.get_next()
+
+ keys = ['cores', 'sockets', 'threads']
+ tkeys = topology.keys()
+ if list(set(tkeys)) != list(set(keys)):
+ ks = ', '.join(keys)
+ raise exception.Invalid(_("Invalid xml: topology(%(topology)s) "
+ "must have %(ks)s") % locals())
+
+ feature_nodes = xml.xpathEval('//cpu/feature')
+ features = list()
+ for nodes in feature_nodes:
+ features.append(nodes.get_properties().getContent())
+
+ cpu_info['topology'] = topology
+ cpu_info['features'] = features
+ return utils.dumps(cpu_info)
+
def block_stats(self, instance_name, disk):
"""
Note that this function takes an instance name, not an Instance, so
@@ -880,6 +1048,207 @@ class LibvirtConnection(object):
def refresh_security_group_members(self, security_group_id):
self.firewall_driver.refresh_security_group_members(security_group_id)
+ def update_available_resource(self, ctxt, host):
+ """Updates compute manager resource info on ComputeNode table.
+
+ This method is called when nova-coompute launches, and
+ whenever admin executes "nova-manage service update_resource".
+
+ :param ctxt: security context
+ :param host: hostname that compute manager is currently running
+
+ """
+
+ try:
+ service_ref = db.service_get_all_compute_by_host(ctxt, host)[0]
+ except exception.NotFound:
+ raise exception.Invalid(_("Cannot update compute manager "
+ "specific info, because no service "
+ "record was found."))
+
+ # Updating host information
+ dic = {'vcpus': self.get_vcpu_total(),
+ 'memory_mb': self.get_memory_mb_total(),
+ 'local_gb': self.get_local_gb_total(),
+ 'vcpus_used': self.get_vcpu_used(),
+ 'memory_mb_used': self.get_memory_mb_used(),
+ 'local_gb_used': self.get_local_gb_used(),
+ 'hypervisor_type': self.get_hypervisor_type(),
+ 'hypervisor_version': self.get_hypervisor_version(),
+ 'cpu_info': self.get_cpu_info()}
+
+ compute_node_ref = service_ref['compute_node']
+ if not compute_node_ref:
+ LOG.info(_('Compute_service record created for %s ') % host)
+ dic['service_id'] = service_ref['id']
+ db.compute_node_create(ctxt, dic)
+ else:
+ LOG.info(_('Compute_service record updated for %s ') % host)
+ db.compute_node_update(ctxt, compute_node_ref[0]['id'], dic)
+
+ def compare_cpu(self, cpu_info):
+ """Checks the host cpu is compatible to a cpu given by xml.
+
+ "xml" must be a part of libvirt.openReadonly().getCapabilities().
+ return values follows by virCPUCompareResult.
+ if 0 > return value, do live migration.
+ 'http://libvirt.org/html/libvirt-libvirt.html#virCPUCompareResult'
+
+ :param cpu_info: json string that shows cpu feature(see get_cpu_info())
+ :returns:
+ None. if given cpu info is not compatible to this server,
+ raise exception.
+
+ """
+
+ LOG.info(_('Instance launched has CPU info:\n%s') % cpu_info)
+ dic = utils.loads(cpu_info)
+ xml = str(Template(self.cpuinfo_xml, searchList=dic))
+ LOG.info(_('to xml...\n:%s ' % xml))
+
+ u = "http://libvirt.org/html/libvirt-libvirt.html#virCPUCompareResult"
+ m = _("CPU doesn't have compatibility.\n\n%(ret)s\n\nRefer to %(u)s")
+ # unknown character exists in xml, then libvirt complains
+ try:
+ ret = self._conn.compareCPU(xml, 0)
+ except libvirt.libvirtError, e:
+ ret = e.message
+ LOG.error(m % locals())
+ raise
+
+ if ret <= 0:
+ raise exception.Invalid(m % locals())
+
+ return
+
+ def ensure_filtering_rules_for_instance(self, instance_ref):
+ """Setting up filtering rules and waiting for its completion.
+
+ To migrate an instance, filtering rules to hypervisors
+ and firewalls are inevitable on destination host.
+ ( Waiting only for filterling rules to hypervisor,
+ since filtering rules to firewall rules can be set faster).
+
+ Concretely, the below method must be called.
+ - setup_basic_filtering (for nova-basic, etc.)
+ - prepare_instance_filter(for nova-instance-instance-xxx, etc.)
+
+ to_xml may have to be called since it defines PROJNET, PROJMASK.
+ but libvirt migrates those value through migrateToURI(),
+ so , no need to be called.
+
+ Don't use thread for this method since migration should
+ not be started when setting-up filtering rules operations
+ are not completed.
+
+ :params instance_ref: nova.db.sqlalchemy.models.Instance object
+
+ """
+
+ # If any instances never launch at destination host,
+ # basic-filtering must be set here.
+ self.firewall_driver.setup_basic_filtering(instance_ref)
+ # setting up n)ova-instance-instance-xx mainly.
+ self.firewall_driver.prepare_instance_filter(instance_ref)
+
+ # wait for completion
+ timeout_count = range(FLAGS.live_migration_retry_count)
+ while timeout_count:
+ try:
+ filter_name = 'nova-instance-%s' % instance_ref.name
+ self._conn.nwfilterLookupByName(filter_name)
+ break
+ except libvirt.libvirtError:
+ timeout_count.pop()
+ if len(timeout_count) == 0:
+ ec2_id = instance_ref['hostname']
+ iname = instance_ref.name
+ msg = _('Timeout migrating for %(ec2_id)s(%(iname)s)')
+ raise exception.Error(msg % locals())
+ time.sleep(1)
+
+ def live_migration(self, ctxt, instance_ref, dest,
+ post_method, recover_method):
+ """Spawning live_migration operation for distributing high-load.
+
+ :params ctxt: security context
+ :params instance_ref:
+ nova.db.sqlalchemy.models.Instance object
+ instance object that is migrated.
+ :params dest: destination host
+ :params post_method:
+ post operation method.
+ expected nova.compute.manager.post_live_migration.
+ :params recover_method:
+ recovery method when any exception occurs.
+ expected nova.compute.manager.recover_live_migration.
+
+ """
+
+ greenthread.spawn(self._live_migration, ctxt, instance_ref, dest,
+ post_method, recover_method)
+
+ def _live_migration(self, ctxt, instance_ref, dest,
+ post_method, recover_method):
+ """Do live migration.
+
+ :params ctxt: security context
+ :params instance_ref:
+ nova.db.sqlalchemy.models.Instance object
+ instance object that is migrated.
+ :params dest: destination host
+ :params post_method:
+ post operation method.
+ expected nova.compute.manager.post_live_migration.
+ :params recover_method:
+ recovery method when any exception occurs.
+ expected nova.compute.manager.recover_live_migration.
+
+ """
+
+ # Do live migration.
+ try:
+ flaglist = FLAGS.live_migration_flag.split(',')
+ flagvals = [getattr(libvirt, x.strip()) for x in flaglist]
+ logical_sum = reduce(lambda x, y: x | y, flagvals)
+
+ if self.read_only:
+ tmpconn = self._connect(self.libvirt_uri, False)
+ dom = tmpconn.lookupByName(instance_ref.name)
+ dom.migrateToURI(FLAGS.live_migration_uri % dest,
+ logical_sum,
+ None,
+ FLAGS.live_migration_bandwidth)
+ tmpconn.close()
+ else:
+ dom = self._conn.lookupByName(instance_ref.name)
+ dom.migrateToURI(FLAGS.live_migration_uri % dest,
+ logical_sum,
+ None,
+ FLAGS.live_migration_bandwidth)
+
+ except Exception:
+ recover_method(ctxt, instance_ref)
+ raise
+
+ # Waiting for completion of live_migration.
+ timer = utils.LoopingCall(f=None)
+
+ def wait_for_live_migration():
+ """waiting for live migration completion"""
+ try:
+ self.get_info(instance_ref.name)['state']
+ except exception.NotFound:
+ timer.stop()
+ post_method(ctxt, instance_ref, dest)
+
+ timer.f = wait_for_live_migration
+ timer.start(interval=0.5, now=True)
+
+ def unfilter_instance(self, instance_ref):
+ """See comments of same method in firewall_driver."""
+ self.firewall_driver.unfilter_instance(instance_ref)
+
class FirewallDriver(object):
def prepare_instance_filter(self, instance):
diff --git a/nova/virt/xenapi/vm_utils.py b/nova/virt/xenapi/vm_utils.py
index 1dad29736..bf69c37ae 100644
--- a/nova/virt/xenapi/vm_utils.py
+++ b/nova/virt/xenapi/vm_utils.py
@@ -20,6 +20,7 @@ their attributes like VDIs, VIFs, as well as their lookup functions.
"""
import os
+import sys
import pickle
import re
import time
@@ -428,6 +429,8 @@ class VMHelper(HelperBase):
% locals())
return vdi_uuid
except BaseException as e:
+ LOG.exception(_("instance %s: Failed to fetch glance image"),
+ instance_id, exc_info=sys.exc_info())
try:
vdi_uuid = session.get_xenapi().VDI.get_uuid(vdi)
e.args = e.args + ({image_type: vdi_uuid},)
@@ -490,12 +493,23 @@ class VMHelper(HelperBase):
else:
return session.get_xenapi().VDI.get_uuid(vdi_ref)
except BaseException as e:
+ LOG.exception(_("instance %s: Failed to fetch glance image"),
+ instance_id, exc_info=sys.exc_info())
if vdi_ref:
try:
vdi_uuid = session.get_xenapi().VDI.get_uuid(vdi_ref)
e.args = e.args + ({image_type: vdi_uuid},)
except:
pass # ignore failures in retrieving VDI
+ if filename:
+ try:
+ splits = filename.split("/")
+ if len(splits) > 0:
+ vdi_uuid = splits[len(splits) - 1]
+ e.args = e.args + ({image_type: vdi_uuid},)
+ except:
+ pass # ignore errors parsing file name
+
raise e
@classmethod
@@ -1022,8 +1036,8 @@ def _write_partition(virtual_size, dev):
def execute(*cmd, **kwargs):
return utils.execute(*cmd, **kwargs)
- execute('parted', '--script', dest, 'mklabel', 'msdos')
- execute('parted', '--script', dest, 'mkpart', 'primary',
+ execute('sudo', 'parted', '--script', dest, 'mklabel', 'msdos')
+ execute('sudo', 'parted', '--script', dest, 'mkpart', 'primary',
'%ds' % primary_first,
'%ds' % primary_last)
diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py
index 58ffff8fb..1c48f1bd7 100644
--- a/nova/virt/xenapi/vmops.py
+++ b/nova/virt/xenapi/vmops.py
@@ -93,7 +93,10 @@ class VMOps(object):
instance.id, exc_info=sys.exc_info())
LOG.debug(_('Instance %s failed to spawn - performing clean-up'),
instance.id)
- vdis = {}
+ vdis = {
+ ImageType.KERNEL: None,
+ ImageType.RAMDISK: None,
+ }
if vdi_uuid:
vdis[disk_image_type] = vdi_uuid
#extract VDI uuid from spawn error
@@ -121,7 +124,8 @@ class VMOps(object):
if remove_from_dom0:
LOG.debug(_("Removing kernel/ramdisk files from dom0"))
self._destroy_kernel_ramdisk_plugin_call(
- vdis[ImageType.KERNEL], vdis[ImageType.RAMDISK])
+ vdis[ImageType.KERNEL], vdis[ImageType.RAMDISK],
+ False)
#re-throw the error
raise spawn_error
@@ -540,12 +544,15 @@ class VMOps(object):
except self.XenAPI.Failure, exc:
LOG.exception(exc)
- def _destroy_kernel_ramdisk_plugin_call(self, kernel, ramdisk):
+ def _destroy_kernel_ramdisk_plugin_call(self, kernel, ramdisk,
+ filenames=True):
args = {}
+ kernel_arg_name = "kernel-" + (filenames and "file" or "uuid")
+ ramdisk_arg_name = "ramdisk-" + (filenames and "file" or "uuid")
if kernel:
- args['kernel-uuid'] = kernel
+ args[kernel_arg_name] = kernel
if ramdisk:
- args['ramdisk-uuid'] = ramdisk
+ args[ramdisk_arg_name] = ramdisk
task = self._session.async_call_plugin(
'glance', 'remove_kernel_ramdisk', args)
self._session.wait_for_task(task)
diff --git a/nova/virt/xenapi_conn.py b/nova/virt/xenapi_conn.py
index bfe290be3..58640fba5 100644
--- a/nova/virt/xenapi_conn.py
+++ b/nova/virt/xenapi_conn.py
@@ -269,6 +269,27 @@ class XenAPIConnection(object):
'username': FLAGS.xenapi_connection_username,
'password': FLAGS.xenapi_connection_password}
+ def update_available_resource(self, ctxt, host):
+ """This method is supported only by libvirt."""
+ return
+
+ def compare_cpu(self, xml):
+ """This method is supported only by libvirt."""
+ raise NotImplementedError('This method is supported only by libvirt.')
+
+ def ensure_filtering_rules_for_instance(self, instance_ref):
+ """This method is supported only libvirt."""
+ return
+
+ def live_migration(self, context, instance_ref, dest,
+ post_method, recover_method):
+ """This method is supported only by libvirt."""
+ return
+
+ def unfilter_instance(self, instance_ref):
+ """This method is supported only by libvirt."""
+ raise NotImplementedError('This method is supported only by libvirt.')
+
class XenAPISession(object):
"""The session to invoke XenAPI SDK calls"""
@@ -339,11 +360,10 @@ class XenAPISession(object):
try:
name = self._session.xenapi.task.get_name_label(task)
status = self._session.xenapi.task.get_status(task)
- if id:
- action = dict(
- instance_id=int(id),
- action=name[0:255], # Ensure action is never > 255
- error=None)
+ action = dict(
+ action=name[0:255], # Ensure action is never > 255
+ instance_id=id and int(id) or None,
+ error=None)
if status == "pending":
return
elif status == "success":