From 8f7d6b9da89e7154a79ad7d20681d0cb47e042b7 Mon Sep 17 00:00:00 2001 From: Tushar Patil Date: Mon, 21 Mar 2011 12:21:24 -0700 Subject: Fix for LP Bug #739641 --- smoketests/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/smoketests/base.py b/smoketests/base.py index 3e2446c9a..31d82b20b 100644 --- a/smoketests/base.py +++ b/smoketests/base.py @@ -32,7 +32,6 @@ SUITE_NAMES = '[image, instance, volume]' FLAGS = flags.FLAGS flags.DEFINE_string('suite', None, 'Specific test suite to run ' + SUITE_NAMES) flags.DEFINE_integer('ssh_tries', 3, 'Numer of times to try ssh') -boto_v6 = None class SmokeTestCase(unittest.TestCase): @@ -183,6 +182,9 @@ class SmokeTestCase(unittest.TestCase): TEST_DATA = {} +if FLAGS.use_ipv6: + global boto_v6 + boto_v6 = __import__('boto_v6') class UserSmokeTestCase(SmokeTestCase): -- cgit From d1860ce5d26fbbadb2310e8225e924879cde9a6c Mon Sep 17 00:00:00 2001 From: Soren Hansen Date: Tue, 22 Mar 2011 10:35:43 +0100 Subject: Make synchronized support both external (file based) locks as well as internal (semaphore based) locks. Attempt to make it native thread safe at the expense of never cleaning up semaphores. --- nova/tests/test_misc.py | 34 +++++++++++++++++++++++-- nova/utils.py | 67 +++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 92 insertions(+), 9 deletions(-) diff --git a/nova/tests/test_misc.py b/nova/tests/test_misc.py index 1fbaf304f..961499a60 100644 --- a/nova/tests/test_misc.py +++ b/nova/tests/test_misc.py @@ -16,8 +16,12 @@ import errno import os +import random import select +from eventlet import greenpool +from eventlet import greenthread + from nova import test from nova.utils import parse_mailmap, str_dict_replace, synchronized @@ -72,11 +76,37 @@ class LockTestCase(test.TestCase): self.assertEquals(foo.__name__, 'foo', "Wrapped function's name " "got mangled") - def test_synchronized(self): + def test_synchronized_internally(self): + """We can lock across multiple green threads""" + seen_threads = list() + @synchronized('testlock', external=False) + def f(id): + for x in range(10): + seen_threads.append(id) + greenthread.sleep(0) + + threads = [] + pool = greenpool.GreenPool(10) + for i in range(10): + threads.append(pool.spawn(f, i)) + + for thread in threads: + thread.wait() + + self.assertEquals(len(seen_threads), 100) + # Looking at the seen threads, split it into chunks of 10, and verify + # that the last 9 match the first in each chunk. + for i in range(10): + for j in range(9): + self.assertEquals(seen_threads[i*10], seen_threads[i*10+1+j]) + + + def test_synchronized_externally(self): + """We can lock across multiple processes""" rpipe1, wpipe1 = os.pipe() rpipe2, wpipe2 = os.pipe() - @synchronized('testlock') + @synchronized('testlock', external=True) def f(rpipe, wpipe): try: os.write(wpipe, "foo") diff --git a/nova/utils.py b/nova/utils.py index 499af2039..8936614cc 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -41,6 +41,7 @@ from xml.sax import saxutils from eventlet import event from eventlet import greenthread +from eventlet import semaphore from eventlet.green import subprocess None from nova import exception @@ -531,17 +532,69 @@ def loads(s): return json.loads(s) -def synchronized(name): +_semaphores_semaphore = semaphore.Semaphore() +_semaphores = {} + + +class _NoopContextManager(object): + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + +def synchronized(name, external=False): + """Synchronization decorator + + Decorating a method like so: + @synchronized('mylock') + def foo(self, *args): + ... + + ensures that only one thread will execute the bar method at a time. + + Different methods can share the same lock: + @synchronized('mylock') + def foo(self, *args): + ... + + @synchronized('mylock') + def bar(self, *args): + ... + + This way only one of either foo or bar can be executing at a time. + + The external keyword argument denotes whether this lock should work across + multiple processes. This means that if two different workers both run a + a method decorated with @synchronized('mylock', external=True), only one + of them will execute at a time. + """ + def wrap(f): @functools.wraps(f) def inner(*args, **kwargs): - LOG.debug(_("Attempting to grab %(lock)s for method " - "%(method)s..." % {"lock": name, + with _semaphores_semaphore: + if name not in _semaphores: + _semaphores[name] = semaphore.Semaphore() + sem = _semaphores[name] + LOG.debug(_('Attempting to grab semaphore "%(lock)s" for method ' + '"%(method)s"...' % {"lock": name, "method": f.__name__})) - lock = lockfile.FileLock(os.path.join(FLAGS.lock_path, - 'nova-%s.lock' % name)) - with lock: - return f(*args, **kwargs) + with sem: + if external: + LOG.debug(_('Attempting to grab file lock "%(lock)s" for ' + 'method "%(method)s"...' % + {"lock": name, "method": f.__name__})) + lock_file_path = os.path.join(FLAGS.lock_path, + 'nova-%s.lock' % name) + lock = lockfile.FileLock(lock_file_path) + else: + lock = _NoopContextManager() + + with lock: + return f(*args, **kwargs) + return inner return wrap -- cgit From 60a3aa86db1d0e1ea2f680c9587881e45fa99336 Mon Sep 17 00:00:00 2001 From: Soren Hansen Date: Tue, 22 Mar 2011 14:14:47 +0100 Subject: Make synchronized decorator not leak semaphores, at the expense of not being truly thread safe (but safe enough for Eventlet style green threads). --- nova/network/linux_net.py | 2 +- nova/tests/test_misc.py | 1 - nova/utils.py | 18 +++++++++++++----- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/nova/network/linux_net.py b/nova/network/linux_net.py index ee36407a6..9bb1685c0 100644 --- a/nova/network/linux_net.py +++ b/nova/network/linux_net.py @@ -274,7 +274,7 @@ class IptablesManager(object): self.semaphore = semaphore.Semaphore() - @utils.synchronized('iptables') + @utils.synchronized('iptables', external=True) def apply(self): """Apply the current in-memory set of iptables rules diff --git a/nova/tests/test_misc.py b/nova/tests/test_misc.py index 961499a60..c0c72bb12 100644 --- a/nova/tests/test_misc.py +++ b/nova/tests/test_misc.py @@ -16,7 +16,6 @@ import errno import os -import random import select from eventlet import greenpool diff --git a/nova/utils.py b/nova/utils.py index 8936614cc..c580e805a 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -574,10 +574,12 @@ def synchronized(name, external=False): def wrap(f): @functools.wraps(f) def inner(*args, **kwargs): - with _semaphores_semaphore: - if name not in _semaphores: - _semaphores[name] = semaphore.Semaphore() - sem = _semaphores[name] + # NOTE(soren): If we ever go natively threaded, this will be racy. + # See http://stackoverflow.com/questions/5390569/dyn\ + # amically-allocating-and-destroying-mutexes + if name not in _semaphores: + _semaphores[name] = semaphore.Semaphore() + sem = _semaphores[name] LOG.debug(_('Attempting to grab semaphore "%(lock)s" for method ' '"%(method)s"...' % {"lock": name, "method": f.__name__})) @@ -593,8 +595,14 @@ def synchronized(name, external=False): lock = _NoopContextManager() with lock: - return f(*args, **kwargs) + retval = f(*args, **kwargs) + # If no-one else is waiting for it, delete it. + # See note about possible raciness above. + if not sem.balance < 1: + del _semaphores[name] + + return retval return inner return wrap -- cgit From 62f9cc7cee30332143bf4e6e54fd21335db3c8da Mon Sep 17 00:00:00 2001 From: Soren Hansen Date: Tue, 22 Mar 2011 14:36:32 +0100 Subject: Convert _cache_image to use utils.synchronized decorator. Disable its test case, since I think it is no longer needed with the tests for synchronized. --- nova/tests/test_virt.py | 2 +- nova/virt/libvirt_conn.py | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/nova/tests/test_virt.py b/nova/tests/test_virt.py index b214f5ce7..b9cd30a79 100644 --- a/nova/tests/test_virt.py +++ b/nova/tests/test_virt.py @@ -62,7 +62,7 @@ class CacheConcurrencyTestCase(test.TestCase): self.stubs.Set(os.path, 'exists', fake_exists) self.stubs.Set(utils, 'execute', fake_execute) - def test_same_fname_concurrency(self): + def notest_same_fname_concurrency(self): """Ensures that the same fname cache runs at a sequentially""" conn = libvirt_conn.LibvirtConnection wait1 = eventlet.event.Event() diff --git a/nova/virt/libvirt_conn.py b/nova/virt/libvirt_conn.py index e80b9fbdf..ca8d81f5f 100644 --- a/nova/virt/libvirt_conn.py +++ b/nova/virt/libvirt_conn.py @@ -48,7 +48,6 @@ from xml.dom import minidom from eventlet import tpool -from eventlet import semaphore import IPy @@ -552,13 +551,12 @@ class LibvirtConnection(object): os.mkdir(base_dir) base = os.path.join(base_dir, fname) - if fname not in LibvirtConnection._image_sems: - LibvirtConnection._image_sems[fname] = semaphore.Semaphore() - with LibvirtConnection._image_sems[fname]: + @utils.synchronized(fname) + def call_if_not_exists(base, fn, *args, **kwargs): if not os.path.exists(base): fn(target=base, *args, **kwargs) - if not LibvirtConnection._image_sems[fname].locked(): - del LibvirtConnection._image_sems[fname] + + call_if_not_exists(base, fn, *args, **kwargs) if cow: utils.execute('qemu-img', 'create', '-f', 'qcow2', '-o', -- cgit -- cgit From 01e7e598d0eb4aab9c3e7f69926a2875cdf22136 Mon Sep 17 00:00:00 2001 From: Soren Hansen Date: Tue, 22 Mar 2011 14:39:35 +0100 Subject: Get rid of IptablesManager's explicit semaphore. --- nova/network/linux_net.py | 4 ---- nova/virt/libvirt_conn.py | 11 ++++------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/nova/network/linux_net.py b/nova/network/linux_net.py index 9bb1685c0..8cbf8db24 100644 --- a/nova/network/linux_net.py +++ b/nova/network/linux_net.py @@ -21,8 +21,6 @@ import inspect import os import calendar -from eventlet import semaphore - from nova import db from nova import exception from nova import flags @@ -272,8 +270,6 @@ class IptablesManager(object): self.ipv4['nat'].add_chain('floating-snat') self.ipv4['nat'].add_rule('snat', '-j $floating-snat') - self.semaphore = semaphore.Semaphore() - @utils.synchronized('iptables', external=True) def apply(self): """Apply the current in-memory set of iptables rules diff --git a/nova/virt/libvirt_conn.py b/nova/virt/libvirt_conn.py index ca8d81f5f..902866167 100644 --- a/nova/virt/libvirt_conn.py +++ b/nova/virt/libvirt_conn.py @@ -1766,14 +1766,11 @@ class IptablesFirewallDriver(FirewallDriver): def refresh_security_group_members(self, security_group): pass + @utils.synchronized('iptables', external=True) def refresh_security_group_rules(self, security_group): - # We use the semaphore to make sure noone applies the rule set - # after we've yanked the existing rules but before we've put in - # the new ones. - with self.iptables.semaphore: - for instance in self.instances.values(): - self.remove_filters_for_instance(instance) - self.add_filters_for_instance(instance) + for instance in self.instances.values(): + self.remove_filters_for_instance(instance) + self.add_filters_for_instance(instance) self.iptables.apply() def _security_group_chain_name(self, security_group_id): -- cgit From 804083b6ba811834c0bf9d5e2edcdf0130d7d1ce Mon Sep 17 00:00:00 2001 From: Soren Hansen Date: Tue, 22 Mar 2011 14:50:53 +0100 Subject: IptablesManager.semaphore is no more. --- nova/network/linux_net.py | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/nova/network/linux_net.py b/nova/network/linux_net.py index 8cbf8db24..9faa7de07 100644 --- a/nova/network/linux_net.py +++ b/nova/network/linux_net.py @@ -277,28 +277,23 @@ class IptablesManager(object): This will blow away any rules left over from previous runs of the same component of Nova, and replace them with our current set of rules. This happens atomically, thanks to iptables-restore. - - We wrap the call in a semaphore lock, so that we don't race with - ourselves. In the event of a race with another component running - an iptables-* command at the same time, we retry up to 5 times. """ - with self.semaphore: - s = [('iptables', self.ipv4)] - if FLAGS.use_ipv6: - s += [('ip6tables', self.ipv6)] - - for cmd, tables in s: - for table in tables: - current_table, _ = self.execute('sudo', - '%s-save' % (cmd,), - '-t', '%s' % (table,), - attempts=5) - current_lines = current_table.split('\n') - new_filter = self._modify_rules(current_lines, - tables[table]) - self.execute('sudo', '%s-restore' % (cmd,), - process_input='\n'.join(new_filter), - attempts=5) + s = [('iptables', self.ipv4)] + if FLAGS.use_ipv6: + s += [('ip6tables', self.ipv6)] + + for cmd, tables in s: + for table in tables: + current_table, _ = self.execute('sudo', + '%s-save' % (cmd,), + '-t', '%s' % (table,), + attempts=5) + current_lines = current_table.split('\n') + new_filter = self._modify_rules(current_lines, + tables[table]) + self.execute('sudo', '%s-restore' % (cmd,), + process_input='\n'.join(new_filter), + attempts=5) def _modify_rules(self, current_lines, table, binary=None): unwrapped_chains = table.unwrapped_chains -- cgit From de2ecf115ff0baf43fa530807997513c728ffdaf Mon Sep 17 00:00:00 2001 From: Soren Hansen Date: Tue, 22 Mar 2011 15:16:08 +0100 Subject: Fix locking problem in security group refresh code. --- nova/virt/libvirt_conn.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/nova/virt/libvirt_conn.py b/nova/virt/libvirt_conn.py index 902866167..fcd78b3b2 100644 --- a/nova/virt/libvirt_conn.py +++ b/nova/virt/libvirt_conn.py @@ -1766,12 +1766,15 @@ class IptablesFirewallDriver(FirewallDriver): def refresh_security_group_members(self, security_group): pass - @utils.synchronized('iptables', external=True) def refresh_security_group_rules(self, security_group): + self.do_refresh_security_group_rules(security_group) + self.iptables.apply() + + @utils.synchronized('iptables', external=True) + def do_refresh_security_group_rules(self, security_group): for instance in self.instances.values(): self.remove_filters_for_instance(instance) self.add_filters_for_instance(instance) - self.iptables.apply() def _security_group_chain_name(self, security_group_id): return 'nova-sg-%s' % (security_group_id,) -- cgit -- cgit From d2494199df440809bbfbc55868b0dd57053868ed Mon Sep 17 00:00:00 2001 From: Soren Hansen Date: Tue, 22 Mar 2011 16:23:47 +0100 Subject: Remove checks in _cache_image tests that were too implementation specific. --- nova/tests/test_virt.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/nova/tests/test_virt.py b/nova/tests/test_virt.py index b9cd30a79..6bafac39f 100644 --- a/nova/tests/test_virt.py +++ b/nova/tests/test_virt.py @@ -62,7 +62,7 @@ class CacheConcurrencyTestCase(test.TestCase): self.stubs.Set(os.path, 'exists', fake_exists) self.stubs.Set(utils, 'execute', fake_execute) - def notest_same_fname_concurrency(self): + def test_same_fname_concurrency(self): """Ensures that the same fname cache runs at a sequentially""" conn = libvirt_conn.LibvirtConnection wait1 = eventlet.event.Event() @@ -77,13 +77,11 @@ class CacheConcurrencyTestCase(test.TestCase): eventlet.sleep(0) try: self.assertFalse(done2.ready()) - self.assertTrue('fname' in conn._image_sems) finally: wait1.send() done1.wait() eventlet.sleep(0) self.assertTrue(done2.ready()) - self.assertFalse('fname' in conn._image_sems) def test_different_fname_concurrency(self): """Ensures that two different fname caches are concurrent""" -- cgit From 9aac55b650e9f39c5771d4683e51af5eac6204bb Mon Sep 17 00:00:00 2001 From: Soren Hansen Date: Tue, 22 Mar 2011 16:24:03 +0100 Subject: Add a test for leaked semaphores. --- nova/tests/test_misc.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/nova/tests/test_misc.py b/nova/tests/test_misc.py index c0c72bb12..8fc5d67c0 100644 --- a/nova/tests/test_misc.py +++ b/nova/tests/test_misc.py @@ -22,7 +22,8 @@ from eventlet import greenpool from eventlet import greenthread from nova import test -from nova.utils import parse_mailmap, str_dict_replace, synchronized +from nova import utils +from nova.utils import parse_mailmap, str_dict_replace class ProjectTestCase(test.TestCase): @@ -66,7 +67,7 @@ class ProjectTestCase(test.TestCase): class LockTestCase(test.TestCase): def test_synchronized_wrapped_function_metadata(self): - @synchronized('whatever') + @utils.synchronized('whatever') def foo(): """Bar""" pass @@ -77,8 +78,9 @@ class LockTestCase(test.TestCase): def test_synchronized_internally(self): """We can lock across multiple green threads""" + saved_sem_num = len(utils._semaphores) seen_threads = list() - @synchronized('testlock', external=False) + @utils.synchronized('testlock2', external=False) def f(id): for x in range(10): seen_threads.append(id) @@ -99,13 +101,15 @@ class LockTestCase(test.TestCase): for j in range(9): self.assertEquals(seen_threads[i*10], seen_threads[i*10+1+j]) + self.assertEqual(saved_sem_num, len(utils._semaphores), + "Semaphore leak detected") def test_synchronized_externally(self): """We can lock across multiple processes""" rpipe1, wpipe1 = os.pipe() rpipe2, wpipe2 = os.pipe() - @synchronized('testlock', external=True) + @utils.synchronized('testlock1', external=True) def f(rpipe, wpipe): try: os.write(wpipe, "foo") -- cgit From b2bdeb82024b1a015ccb2ad14606d6e9ccf80aa8 Mon Sep 17 00:00:00 2001 From: Soren Hansen Date: Tue, 22 Mar 2011 16:29:37 +0100 Subject: pep8 --- nova/tests/test_misc.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nova/tests/test_misc.py b/nova/tests/test_misc.py index 8fc5d67c0..4e17e1ce0 100644 --- a/nova/tests/test_misc.py +++ b/nova/tests/test_misc.py @@ -80,6 +80,7 @@ class LockTestCase(test.TestCase): """We can lock across multiple green threads""" saved_sem_num = len(utils._semaphores) seen_threads = list() + @utils.synchronized('testlock2', external=False) def f(id): for x in range(10): @@ -99,7 +100,8 @@ class LockTestCase(test.TestCase): # that the last 9 match the first in each chunk. for i in range(10): for j in range(9): - self.assertEquals(seen_threads[i*10], seen_threads[i*10+1+j]) + self.assertEquals(seen_threads[i * 10], + seen_threads[i * 10 + 1 + j]) self.assertEqual(saved_sem_num, len(utils._semaphores), "Semaphore leak detected") -- cgit From 06815cb729d8687403fc736ae6125c26867f42b3 Mon Sep 17 00:00:00 2001 From: Soren Hansen Date: Tue, 22 Mar 2011 17:13:48 +0100 Subject: Remove unused global semaphore. --- nova/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nova/utils.py b/nova/utils.py index c580e805a..8b9ce4734 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -532,7 +532,6 @@ def loads(s): return json.loads(s) -_semaphores_semaphore = semaphore.Semaphore() _semaphores = {} -- cgit From 3796b5a8fc2baa9a35ebbc721735f22e952e6aa3 Mon Sep 17 00:00:00 2001 From: Todd Willey Date: Wed, 23 Mar 2011 00:31:50 -0400 Subject: Fix some crypto strangeness (\n in file_name field of certificates, wrong IMPL method for certificate_update). --- nova/crypto.py | 3 ++- nova/db/api.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/nova/crypto.py b/nova/crypto.py index 2a8d4abca..b112e5b92 100644 --- a/nova/crypto.py +++ b/nova/crypto.py @@ -26,6 +26,7 @@ import gettext import hashlib import os import shutil +import string import struct import tempfile import time @@ -267,7 +268,7 @@ def _sign_csr(csr_text, ca_folder): './openssl.cnf', '-infiles', inbound) out, _err = utils.execute('openssl', 'x509', '-in', outbound, '-serial', '-noout') - serial = out.rpartition("=")[2] + serial = string.strip(out.rpartition("=")[2]) os.chdir(start) with open(outbound, "r") as crtfile: return (serial, crtfile.read()) diff --git a/nova/db/api.py b/nova/db/api.py index add5bd83e..afc1bff2f 100644 --- a/nova/db/api.py +++ b/nova/db/api.py @@ -214,7 +214,7 @@ def certificate_update(context, certificate_id, values): Raises NotFound if service does not exist. """ - return IMPL.service_update(context, certificate_id, values) + return IMPL.certificate_update(context, certificate_id, values) ################### -- cgit From a822941d1fbfcfff7d52e2e42f2a50cb8aca6f0d Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Wed, 23 Mar 2011 01:02:13 -0700 Subject: Report the exception (happens when can't import libvirt) --- nova/compute/manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 576937cd8..4f338135b 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -118,8 +118,8 @@ class ComputeManager(manager.Manager): try: self.driver = utils.import_object(compute_driver) - except ImportError: - LOG.error("Unable to load the virtualization driver.") + except ImportError as e: + LOG.error(_("Unable to load the virtualization driver: %s") % (e)) sys.exit(1) self.network_manager = utils.import_object(FLAGS.network_manager) -- cgit From abb764f51385a0b811b23379d78f7db027d4cca5 Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Wed, 23 Mar 2011 14:41:35 -0500 Subject: Automatically unrescue instances after a given timeout --- nova/compute/manager.py | 12 +++++- nova/utils.py | 7 ++++ nova/virt/libvirt_conn.py | 4 ++ nova/virt/xenapi/vmops.py | 95 +++++++++++++++++++++++++++++++++++------------ nova/virt/xenapi_conn.py | 4 ++ 5 files changed, 96 insertions(+), 26 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 576937cd8..3834c33ab 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -65,8 +65,11 @@ flags.DEFINE_string('console_host', socket.gethostname(), 'Console proxy host to use to connect to instances on' 'this host.') flags.DEFINE_integer('live_migration_retry_count', 30, - ("Retry count needed in live_migration." - " sleep 1 sec for each count")) + "Retry count needed in live_migration." + " sleep 1 sec for each count") +flags.DEFINE_integer("rescue_timeout", 0, + "Automatically unrescue an instance after N hours." + " Set to 0 to disable.") LOG = logging.getLogger('nova.compute.manager') @@ -132,6 +135,11 @@ class ComputeManager(manager.Manager): """ self.driver.init_host(host=self.host) + def periodic_tasks(self, context=None): + """Tasks to be run at a periodic interval.""" + super(ComputeManager, self).periodic_tasks(context) + self.driver.poll_rescued_instances(FLAGS.rescue_timeout) + def _update_state(self, context, instance_id): """Update the state of an instance from the driver info.""" # FIXME(ja): include other fields from state? diff --git a/nova/utils.py b/nova/utils.py index 499af2039..38cdb8021 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -334,6 +334,13 @@ def utcnow(): utcnow.override_time = None +def is_then_greater(then, seconds): + if utcnow() - then > datetime.timedelta(seconds=seconds): + return True + else: + return False + + def utcnow_ts(): """Timestamp version of our utcnow function.""" return time.mktime(utcnow().timetuple()) diff --git a/nova/virt/libvirt_conn.py b/nova/virt/libvirt_conn.py index e80b9fbdf..07545382d 100644 --- a/nova/virt/libvirt_conn.py +++ b/nova/virt/libvirt_conn.py @@ -412,6 +412,10 @@ class LibvirtConnection(object): # the normal xml file, we can just call reboot here self.reboot(instance) + @exception.wrap_exception + def poll_rescued_instances(self, timeout): + pass + @exception.wrap_exception def spawn(self, instance): xml = self.to_xml(instance) diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index 61ff00903..f46ac3b7e 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -51,6 +51,7 @@ class VMOps(object): def __init__(self, session): self.XenAPI = session.get_imported_xenapi() self._session = session + self.poll_rescue_last_ran = None VMHelper.XenAPI = self.XenAPI @@ -462,6 +463,10 @@ class VMOps(object): except self.XenAPI.Failure, exc: LOG.exception(exc) + def _shutdown_rescue(self, vm_ref): + """Shutdown a rescue instance""" + self._session.call_xenapi("Async.VM.hard_shutdown", rescue_vm_ref) + def _destroy_vdis(self, instance, vm_ref): """Destroys all VDIs associated with a VM""" instance_id = instance.id @@ -479,6 +484,24 @@ class VMOps(object): except self.XenAPI.Failure, exc: LOG.exception(exc) + def _destroy_rescue_vdis(self, rescue_vm_ref): + """Destroys all VDIs associated with a rescued VM""" + vdi_refs = VMHelper.lookup_vm_vdis(self._session, rescue_vm_ref) + for vdi_ref in vdi_refs: + try: + self._session.call_xenapi("Async.VDI.destroy", vdi_ref) + except self.XenAPI.Failure: + continue + + def _destroy_rescue_vbds(self, rescue_vm_ref): + """Destroys all VBDs tied to a rescue VM""" + vbd_refs = self._session.get_xenapi().VM.get_VBDs(rescue_vm_ref) + for vbd_ref in vbd_refs: + _vbd_ref = self._session.get_xenapi().VBD.get_record(vbd_ref) + if _vbd_ref["userdevice"] == "1": + VMHelper.unplug_vbd(self._session, vbd_ref) + VMHelper.destroy_vbd(self._session, vbd_ref) + def _destroy_kernel_ramdisk(self, instance, vm_ref): """ Three situations can occur: @@ -529,6 +552,10 @@ class VMOps(object): LOG.debug(_("Instance %(instance_id)s VM destroyed") % locals()) + def _destroy_rescue(self, vm_ref): + """Destroy a rescue instance""" + self._session.call_xenapi("Async.VM.destroy", rescue_vm_ref) + def destroy(self, instance): """ Destroy VM instance @@ -632,41 +659,61 @@ class VMOps(object): """ rescue_vm_ref = VMHelper.lookup(self._session, - instance.name + "-rescue") + instance.name + "-rescue") if not rescue_vm_ref: raise exception.NotFound(_( "Instance is not in Rescue Mode: %s" % instance.name)) original_vm_ref = self._get_vm_opaque_ref(instance) - vbd_refs = self._session.get_xenapi().VM.get_VBDs(rescue_vm_ref) - instance._rescue = False - for vbd_ref in vbd_refs: - _vbd_ref = self._session.get_xenapi().VBD.get_record(vbd_ref) - if _vbd_ref["userdevice"] == "1": - VMHelper.unplug_vbd(self._session, vbd_ref) - VMHelper.destroy_vbd(self._session, vbd_ref) - - task1 = self._session.call_xenapi("Async.VM.hard_shutdown", - rescue_vm_ref) - self._session.wait_for_task(task1, instance.id) - - vdi_refs = VMHelper.lookup_vm_vdis(self._session, rescue_vm_ref) - for vdi_ref in vdi_refs: - try: - task = self._session.call_xenapi('Async.VDI.destroy', vdi_ref) - self._session.wait_for_task(task, instance.id) - except self.XenAPI.Failure: - continue - - task2 = self._session.call_xenapi('Async.VM.destroy', rescue_vm_ref) - self._session.wait_for_task(task2, instance.id) - + self._destroy_rescue_vbds(rescue_vm_ref) + self._shutdown_rescue(rescue_vm_ref) + self._destroy_rescue_vdis(rescue_vm_ref) + self._destroy_rescue(rescue_vm_ref) self._release_bootlock(original_vm_ref) self._start(instance, original_vm_ref) + def poll_rescued_instances(self, timeout): + """Look for expirable rescued instances + - forcibly exit rescue mode for any instances that have been + in rescue mode for >= the provided timeout + """ + last_ran = self.poll_rescue_last_ran + if last_ran: + if not utils.is_then_greater(last_ran, timeout * 60 * 60): + # Do not run. Let's bail. + return + else: + # Update the time tracker and proceed. + self.poll_rescue_last_ran = utils.utcnow() + else: + # We need a base time to start tracking. + self.poll_rescue_last_ran = utils.utcnow() + return + + vms = [] + for instance in self.list_instances(): + if instance.endswith("-rescue"): + vms.append(dict(name=instance, + vm_ref=VMHelper.lookup(self._session, + instance))) + + for vm in vms: + rescue_name = vm["name"] + rescue_vm_ref = vm["vm_ref"] + original_name = vm["name"].split("-rescue", 1)[0] + original_vm_ref = VMHelper.lookup(self._session, original_name) + + self._destroy_rescue_vbds(rescue_vm_ref) + self._shutdown_rescue(rescue_vm_ref) + self._destroy_rescue_vdis(rescue_vm_ref) + self._destroy_rescue(rescue_vm_ref) + self._release_bootlock(original_vm_ref) + self._session.call_xenapi("VM.start", original_vm_ref, False, + False) + def get_info(self, instance): """Return data about VM instance""" vm_ref = self._get_vm_opaque_ref(instance) diff --git a/nova/virt/xenapi_conn.py b/nova/virt/xenapi_conn.py index da42a83b6..50aad96b8 100644 --- a/nova/virt/xenapi_conn.py +++ b/nova/virt/xenapi_conn.py @@ -225,6 +225,10 @@ class XenAPIConnection(object): """Unrescue the specified instance""" self._vmops.unrescue(instance, callback) + def poll_rescued_instances(self, timeout): + """Poll for rescued instances""" + self._vmops.poll_rescued_instances(timeout) + def reset_network(self, instance): """reset networking for specified instance""" self._vmops.reset_network(instance) -- cgit From a291e68fef876080d7984a1d7192e939808596bf Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Wed, 23 Mar 2011 14:55:33 -0500 Subject: Fixed some typos --- nova/virt/xenapi/vmops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index cb36730a0..0a516bd36 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -463,7 +463,7 @@ class VMOps(object): except self.XenAPI.Failure, exc: LOG.exception(exc) - def _shutdown_rescue(self, vm_ref): + def _shutdown_rescue(self, rescue_vm_ref): """Shutdown a rescue instance""" self._session.call_xenapi("Async.VM.hard_shutdown", rescue_vm_ref) @@ -552,7 +552,7 @@ class VMOps(object): LOG.debug(_("Instance %(instance_id)s VM destroyed") % locals()) - def _destroy_rescue(self, vm_ref): + def _destroy_rescue_instance(self, rescue_vm_ref): """Destroy a rescue instance""" self._session.call_xenapi("Async.VM.destroy", rescue_vm_ref) @@ -671,7 +671,7 @@ class VMOps(object): self._destroy_rescue_vbds(rescue_vm_ref) self._shutdown_rescue(rescue_vm_ref) self._destroy_rescue_vdis(rescue_vm_ref) - self._destroy_rescue(rescue_vm_ref) + self._destroy_rescue_instance(rescue_vm_ref) self._release_bootlock(original_vm_ref) self._start(instance, original_vm_ref) @@ -709,7 +709,7 @@ class VMOps(object): self._destroy_rescue_vbds(rescue_vm_ref) self._shutdown_rescue(rescue_vm_ref) self._destroy_rescue_vdis(rescue_vm_ref) - self._destroy_rescue(rescue_vm_ref) + self._destroy_rescue_instance(rescue_vm_ref) self._release_bootlock(original_vm_ref) self._session.call_xenapi("VM.start", original_vm_ref, False, False) -- cgit From 83e519b734078d8214fa0dc1d518607c7c0b244a Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Wed, 23 Mar 2011 15:21:18 -0500 Subject: Only run periodic task when rescue_timeout is greater than 0 --- nova/compute/manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 3834c33ab..9cb210c77 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -138,7 +138,8 @@ class ComputeManager(manager.Manager): def periodic_tasks(self, context=None): """Tasks to be run at a periodic interval.""" super(ComputeManager, self).periodic_tasks(context) - self.driver.poll_rescued_instances(FLAGS.rescue_timeout) + if FLAGS.rescue_timeout > 0: + self.driver.poll_rescued_instances(FLAGS.rescue_timeout) def _update_state(self, context, instance_id): """Update the state of an instance from the driver info.""" -- cgit From b3a8c70304672abe9b461c6cfeed3e8b517ca0b6 Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Wed, 23 Mar 2011 16:56:54 -0500 Subject: Added docstring --- nova/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nova/utils.py b/nova/utils.py index 38cdb8021..bf1aa4a91 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -335,6 +335,7 @@ utcnow.override_time = None def is_then_greater(then, seconds): + """Return True of 'then' is greater than 'seconds'""" if utcnow() - then > datetime.timedelta(seconds=seconds): return True else: -- cgit From a12b6f0a0808fba5541723a537118447b55b69ad Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Wed, 23 Mar 2011 17:15:41 -0500 Subject: Better method name --- nova/utils.py | 6 +++--- nova/virt/xenapi/vmops.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nova/utils.py b/nova/utils.py index bf1aa4a91..04b6c9778 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -334,9 +334,9 @@ def utcnow(): utcnow.override_time = None -def is_then_greater(then, seconds): - """Return True of 'then' is greater than 'seconds'""" - if utcnow() - then > datetime.timedelta(seconds=seconds): +def is_older_than(before, seconds): + """Return True if before is older than 'seconds'""" + if utcnow() - before > datetime.timedelta(seconds=seconds): return True else: return False diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index 0a516bd36..3f1eceddc 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -682,7 +682,7 @@ class VMOps(object): """ last_ran = self.poll_rescue_last_ran if last_ran: - if not utils.is_then_greater(last_ran, timeout * 60 * 60): + if not utils.is_older_than(last_ran, timeout * 60 * 60): # Do not run. Let's bail. return else: -- cgit From e19b12f668fb6cd693df6834f8895fb5487953d7 Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Wed, 23 Mar 2011 18:34:47 -0500 Subject: Review feedback --- nova/compute/manager.py | 2 +- nova/virt/xenapi/vmops.py | 30 +++++++++++++++--------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 9cb210c77..ce1ae87e3 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -68,7 +68,7 @@ flags.DEFINE_integer('live_migration_retry_count', 30, "Retry count needed in live_migration." " sleep 1 sec for each count") flags.DEFINE_integer("rescue_timeout", 0, - "Automatically unrescue an instance after N hours." + "Automatically unrescue an instance after N seconds." " Set to 0 to disable.") LOG = logging.getLogger('nova.compute.manager') diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index 3f1eceddc..1f2e10aa6 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -497,8 +497,8 @@ class VMOps(object): """Destroys all VBDs tied to a rescue VM""" vbd_refs = self._session.get_xenapi().VM.get_VBDs(rescue_vm_ref) for vbd_ref in vbd_refs: - _vbd_ref = self._session.get_xenapi().VBD.get_record(vbd_ref) - if _vbd_ref["userdevice"] == "1": + vbd_rec = self._session.get_xenapi().VBD.get_record(vbd_ref) + if vbd_rec["userdevice"] == "1": # primary VBD is always 1 VMHelper.unplug_vbd(self._session, vbd_ref) VMHelper.destroy_vbd(self._session, vbd_ref) @@ -554,6 +554,10 @@ class VMOps(object): def _destroy_rescue_instance(self, rescue_vm_ref): """Destroy a rescue instance""" + self._destroy_rescue_vbds(rescue_vm_ref) + self._shutdown_rescue(rescue_vm_ref) + self._destroy_rescue_vdis(rescue_vm_ref) + self._session.call_xenapi("Async.VM.destroy", rescue_vm_ref) def destroy(self, instance): @@ -668,9 +672,6 @@ class VMOps(object): original_vm_ref = self._get_vm_opaque_ref(instance) instance._rescue = False - self._destroy_rescue_vbds(rescue_vm_ref) - self._shutdown_rescue(rescue_vm_ref) - self._destroy_rescue_vdis(rescue_vm_ref) self._destroy_rescue_instance(rescue_vm_ref) self._release_bootlock(original_vm_ref) self._start(instance, original_vm_ref) @@ -682,7 +683,7 @@ class VMOps(object): """ last_ran = self.poll_rescue_last_ran if last_ran: - if not utils.is_older_than(last_ran, timeout * 60 * 60): + if not utils.is_older_than(last_ran, timeout): # Do not run. Let's bail. return else: @@ -693,23 +694,22 @@ class VMOps(object): self.poll_rescue_last_ran = utils.utcnow() return - vms = [] + rescue_vms = [] for instance in self.list_instances(): if instance.endswith("-rescue"): - vms.append(dict(name=instance, - vm_ref=VMHelper.lookup(self._session, - instance))) + rescue_vms.append(dict(name=instance, + vm_ref=VMHelper.lookup(self._session, + instance))) - for vm in vms: + for vm in rescue_vms: rescue_name = vm["name"] rescue_vm_ref = vm["vm_ref"] + + self._destroy_rescue_instance(rescue_vm_ref) + original_name = vm["name"].split("-rescue", 1)[0] original_vm_ref = VMHelper.lookup(self._session, original_name) - self._destroy_rescue_vbds(rescue_vm_ref) - self._shutdown_rescue(rescue_vm_ref) - self._destroy_rescue_vdis(rescue_vm_ref) - self._destroy_rescue_instance(rescue_vm_ref) self._release_bootlock(original_vm_ref) self._session.call_xenapi("VM.start", original_vm_ref, False, False) -- cgit From 10e61af8a23c126c15fcfcf25156d32facf19ec2 Mon Sep 17 00:00:00 2001 From: Josh Kearney Date: Wed, 23 Mar 2011 22:55:04 -0500 Subject: Added hyperv stub --- nova/virt/hyperv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nova/virt/hyperv.py b/nova/virt/hyperv.py index 29d18dac5..75fed6d4f 100644 --- a/nova/virt/hyperv.py +++ b/nova/virt/hyperv.py @@ -467,3 +467,6 @@ class HyperVConnection(object): if vm is None: raise exception.NotFound('Cannot detach volume from missing %s ' % instance_name) + + def poll_rescued_instances(self, timeout): + pass -- cgit