diff options
| author | Justin Santa Barbara <justin@fathomdb.com> | 2011-03-29 16:35:39 -0700 |
|---|---|---|
| committer | Justin Santa Barbara <justin@fathomdb.com> | 2011-03-29 16:35:39 -0700 |
| commit | 5e6c69bc7a7e5ddaa1bf0fa83f64da116343dba8 (patch) | |
| tree | 1500a59a86efc43030a8f8910451da3630a2123b | |
| parent | e5f108058f9b085571330dff3c3e3e3e57d2e5ed (diff) | |
| download | nova-5e6c69bc7a7e5ddaa1bf0fa83f64da116343dba8.tar.gz nova-5e6c69bc7a7e5ddaa1bf0fa83f64da116343dba8.tar.xz nova-5e6c69bc7a7e5ddaa1bf0fa83f64da116343dba8.zip | |
Narrowly focused bugfix - don't lose libvirt instances on host reboot or if they crash
| -rw-r--r-- | nova/compute/manager.py | 14 | ||||
| -rw-r--r-- | nova/virt/libvirt_conn.py | 59 |
2 files changed, 55 insertions, 18 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py index e0a5e2b3f..72f04ecb1 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -1088,12 +1088,14 @@ class ComputeManager(manager.SchedulerDependentManager): db_instance['id'], vm_state) - if vm_state == power_state.SHUTOFF: - # TODO(soren): This is what the compute manager does when you - # terminate an instance. At some point I figure we'll have a - # "terminated" state and some sort of cleanup job that runs - # occasionally, cleaning them out. - self.db.instance_destroy(context, db_instance['id']) + # NOTE(justinsb): We no longer auto-remove SHUTOFF instances + # It's quite hard to get them back when we do. + #if vm_state == power_state.SHUTOFF: + # # TODO(soren): This is what the compute manager does when you + # # terminate an instance. At some point I figure we'll have a + # # "terminated" state and some sort of cleanup job that runs + # # occasionally, cleaning them out. + # self.db.instance_destroy(context, db_instance['id']) # Are there VMs not in the DB? for vm_not_found_in_db in vms_not_found_in_db: diff --git a/nova/virt/libvirt_conn.py b/nova/virt/libvirt_conn.py index c144e827e..533ff9394 100644 --- a/nova/virt/libvirt_conn.py +++ b/nova/virt/libvirt_conn.py @@ -116,6 +116,8 @@ flags.DEFINE_integer('live_migration_bandwidth', 0, 'Define live migration behavior') flags.DEFINE_string('qemu_img', 'qemu-img', 'binary to use for qemu-img commands') +flags.DEFINE_bool('start_guests_on_host_boot', False, + 'Whether to restart guests when the host reboots') def get_connection(read_only): @@ -230,12 +232,14 @@ class LibvirtConnection(driver.ComputeDriver): {'name': instance['name'], 'state': state}) db.instance_set_state(ctxt, instance['id'], state) - if state == power_state.SHUTOFF: - # TODO(soren): This is what the compute manager does when you - # terminate # an instance. At some point I figure we'll have a - # "terminated" state and some sort of cleanup job that runs - # occasionally, cleaning them out. - db.instance_destroy(ctxt, instance['id']) + # NOTE(justinsb): We no longer delete these instances, + # the user may want to power them back on + #if state == power_state.SHUTOFF: + # # TODO(soren): This is what the compute manager does when you + # # terminate # an instance. At some point I figure we'll have a + # # "terminated" state and some sort of cleanup job that runs + # # occasionally, cleaning them out. + # db.instance_destroy(ctxt, instance['id']) if state != power_state.RUNNING: continue @@ -474,7 +478,7 @@ class LibvirtConnection(driver.ComputeDriver): xml = self.to_xml(instance) self.firewall_driver.setup_basic_filtering(instance) self.firewall_driver.prepare_instance_filter(instance) - self._conn.createXML(xml, 0) + self._create_new_domain(xml) self.firewall_driver.apply_instance_filter(instance) timer = utils.LoopingCall(f=None) @@ -522,7 +526,7 @@ class LibvirtConnection(driver.ComputeDriver): 'kernel_id': FLAGS.rescue_kernel_id, 'ramdisk_id': FLAGS.rescue_ramdisk_id} self._create_image(instance, xml, '.rescue', rescue_images) - self._conn.createXML(xml, 0) + self._create_new_domain(xml) timer = utils.LoopingCall(f=None) @@ -565,10 +569,15 @@ class LibvirtConnection(driver.ComputeDriver): self.firewall_driver.setup_basic_filtering(instance, network_info) self.firewall_driver.prepare_instance_filter(instance, network_info) self._create_image(instance, xml, network_info) - self._conn.createXML(xml, 0) + domain = self._create_new_domain(xml) LOG.debug(_("instance %s: is running"), instance['name']) self.firewall_driver.apply_instance_filter(instance) + if FLAGS.start_guests_on_host_boot: + LOG.debug(_("instance %s: setting autostart ON") % + instance['name']) + domain.setAutostart(1) + timer = utils.LoopingCall(f=None) def _wait_for_boot(): @@ -964,11 +973,19 @@ class LibvirtConnection(driver.ComputeDriver): return xml def get_info(self, instance_name): + # NOTE(justinsb): When libvirt isn't running / can't connect, we get: + # libvir: Remote error : unable to connect to + # '/var/run/libvirt/libvirt-sock', libvirtd may need to be started: + # No such file or directory try: virt_dom = self._conn.lookupByName(instance_name) - except: - raise exception.NotFound(_("Instance %s not found") - % instance_name) + except libvirt.libvirtError as e: + if e.get_error_code() == libvirt.VIR_ERR_UNKNOWN_HOST: + raise exception.NotFound(_("Instance %s not found") + % instance_name) + LOG.warning(_("Error from libvirt during lookup: %s") % e) + raise + (state, max_mem, mem, num_cpu, cpu_time) = virt_dom.info() return {'state': state, 'max_mem': max_mem, @@ -976,6 +993,24 @@ class LibvirtConnection(driver.ComputeDriver): 'num_cpu': num_cpu, 'cpu_time': cpu_time} + def _create_new_domain(self, xml, persistent=True, launch_flags=0): + # NOTE(justinsb): libvirt has two types of domain: + # * a transient domain disappears when the guest is shutdown + # or the host is rebooted. + # * a permanent domain is not automatically deleted + # NOTE(justinsb): Even for ephemeral instances, transient seems risky + + if persistent: + # To create a persistent domain, first define it, then launch it. + domain = self._conn.defineXML(xml) + + domain.createWithFlags(launch_flags) + else: + # createXML call creates a transient domain + domain = self._conn.createXML(xml, launch_flags) + + return domain + def get_diagnostics(self, instance_name): raise exception.ApiError(_("diagnostics are not supported " "for libvirt")) |
