summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJustin Santa Barbara <justin@fathomdb.com>2011-03-29 16:35:39 -0700
committerJustin Santa Barbara <justin@fathomdb.com>2011-03-29 16:35:39 -0700
commit5e6c69bc7a7e5ddaa1bf0fa83f64da116343dba8 (patch)
tree1500a59a86efc43030a8f8910451da3630a2123b
parente5f108058f9b085571330dff3c3e3e3e57d2e5ed (diff)
downloadnova-5e6c69bc7a7e5ddaa1bf0fa83f64da116343dba8.tar.gz
nova-5e6c69bc7a7e5ddaa1bf0fa83f64da116343dba8.tar.xz
nova-5e6c69bc7a7e5ddaa1bf0fa83f64da116343dba8.zip
Narrowly focused bugfix - don't lose libvirt instances on host reboot or if they crash
-rw-r--r--nova/compute/manager.py14
-rw-r--r--nova/virt/libvirt_conn.py59
2 files changed, 55 insertions, 18 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index e0a5e2b3f..72f04ecb1 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -1088,12 +1088,14 @@ class ComputeManager(manager.SchedulerDependentManager):
db_instance['id'],
vm_state)
- if vm_state == power_state.SHUTOFF:
- # TODO(soren): This is what the compute manager does when you
- # terminate an instance. At some point I figure we'll have a
- # "terminated" state and some sort of cleanup job that runs
- # occasionally, cleaning them out.
- self.db.instance_destroy(context, db_instance['id'])
+ # NOTE(justinsb): We no longer auto-remove SHUTOFF instances
+ # It's quite hard to get them back when we do.
+ #if vm_state == power_state.SHUTOFF:
+ # # TODO(soren): This is what the compute manager does when you
+ # # terminate an instance. At some point I figure we'll have a
+ # # "terminated" state and some sort of cleanup job that runs
+ # # occasionally, cleaning them out.
+ # self.db.instance_destroy(context, db_instance['id'])
# Are there VMs not in the DB?
for vm_not_found_in_db in vms_not_found_in_db:
diff --git a/nova/virt/libvirt_conn.py b/nova/virt/libvirt_conn.py
index c144e827e..533ff9394 100644
--- a/nova/virt/libvirt_conn.py
+++ b/nova/virt/libvirt_conn.py
@@ -116,6 +116,8 @@ flags.DEFINE_integer('live_migration_bandwidth', 0,
'Define live migration behavior')
flags.DEFINE_string('qemu_img', 'qemu-img',
'binary to use for qemu-img commands')
+flags.DEFINE_bool('start_guests_on_host_boot', False,
+ 'Whether to restart guests when the host reboots')
def get_connection(read_only):
@@ -230,12 +232,14 @@ class LibvirtConnection(driver.ComputeDriver):
{'name': instance['name'], 'state': state})
db.instance_set_state(ctxt, instance['id'], state)
- if state == power_state.SHUTOFF:
- # TODO(soren): This is what the compute manager does when you
- # terminate # an instance. At some point I figure we'll have a
- # "terminated" state and some sort of cleanup job that runs
- # occasionally, cleaning them out.
- db.instance_destroy(ctxt, instance['id'])
+ # NOTE(justinsb): We no longer delete these instances,
+ # the user may want to power them back on
+ #if state == power_state.SHUTOFF:
+ # # TODO(soren): This is what the compute manager does when you
+ # # terminate # an instance. At some point I figure we'll have a
+ # # "terminated" state and some sort of cleanup job that runs
+ # # occasionally, cleaning them out.
+ # db.instance_destroy(ctxt, instance['id'])
if state != power_state.RUNNING:
continue
@@ -474,7 +478,7 @@ class LibvirtConnection(driver.ComputeDriver):
xml = self.to_xml(instance)
self.firewall_driver.setup_basic_filtering(instance)
self.firewall_driver.prepare_instance_filter(instance)
- self._conn.createXML(xml, 0)
+ self._create_new_domain(xml)
self.firewall_driver.apply_instance_filter(instance)
timer = utils.LoopingCall(f=None)
@@ -522,7 +526,7 @@ class LibvirtConnection(driver.ComputeDriver):
'kernel_id': FLAGS.rescue_kernel_id,
'ramdisk_id': FLAGS.rescue_ramdisk_id}
self._create_image(instance, xml, '.rescue', rescue_images)
- self._conn.createXML(xml, 0)
+ self._create_new_domain(xml)
timer = utils.LoopingCall(f=None)
@@ -565,10 +569,15 @@ class LibvirtConnection(driver.ComputeDriver):
self.firewall_driver.setup_basic_filtering(instance, network_info)
self.firewall_driver.prepare_instance_filter(instance, network_info)
self._create_image(instance, xml, network_info)
- self._conn.createXML(xml, 0)
+ domain = self._create_new_domain(xml)
LOG.debug(_("instance %s: is running"), instance['name'])
self.firewall_driver.apply_instance_filter(instance)
+ if FLAGS.start_guests_on_host_boot:
+ LOG.debug(_("instance %s: setting autostart ON") %
+ instance['name'])
+ domain.setAutostart(1)
+
timer = utils.LoopingCall(f=None)
def _wait_for_boot():
@@ -964,11 +973,19 @@ class LibvirtConnection(driver.ComputeDriver):
return xml
def get_info(self, instance_name):
+ # NOTE(justinsb): When libvirt isn't running / can't connect, we get:
+ # libvir: Remote error : unable to connect to
+ # '/var/run/libvirt/libvirt-sock', libvirtd may need to be started:
+ # No such file or directory
try:
virt_dom = self._conn.lookupByName(instance_name)
- except:
- raise exception.NotFound(_("Instance %s not found")
- % instance_name)
+ except libvirt.libvirtError as e:
+ if e.get_error_code() == libvirt.VIR_ERR_UNKNOWN_HOST:
+ raise exception.NotFound(_("Instance %s not found")
+ % instance_name)
+ LOG.warning(_("Error from libvirt during lookup: %s") % e)
+ raise
+
(state, max_mem, mem, num_cpu, cpu_time) = virt_dom.info()
return {'state': state,
'max_mem': max_mem,
@@ -976,6 +993,24 @@ class LibvirtConnection(driver.ComputeDriver):
'num_cpu': num_cpu,
'cpu_time': cpu_time}
+ def _create_new_domain(self, xml, persistent=True, launch_flags=0):
+ # NOTE(justinsb): libvirt has two types of domain:
+ # * a transient domain disappears when the guest is shutdown
+ # or the host is rebooted.
+ # * a permanent domain is not automatically deleted
+ # NOTE(justinsb): Even for ephemeral instances, transient seems risky
+
+ if persistent:
+ # To create a persistent domain, first define it, then launch it.
+ domain = self._conn.defineXML(xml)
+
+ domain.createWithFlags(launch_flags)
+ else:
+ # createXML call creates a transient domain
+ domain = self._conn.createXML(xml, launch_flags)
+
+ return domain
+
def get_diagnostics(self, instance_name):
raise exception.ApiError(_("diagnostics are not supported "
"for libvirt"))