summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJenkins <jenkins@review.openstack.org>2012-02-25 01:24:23 +0000
committerGerrit Code Review <review@openstack.org>2012-02-25 01:24:23 +0000
commitb02cefe64631d51cebdfb5e3a83d22cdfea3b767 (patch)
tree89c21761bde7957380500cd6696e1f3630422041
parentf7140d66c9a476a8d38fc428d4d011356cb87823 (diff)
parent2fbccc0c693193533284330325f5803c8c6ce52a (diff)
downloadnova-b02cefe64631d51cebdfb5e3a83d22cdfea3b767.tar.gz
nova-b02cefe64631d51cebdfb5e3a83d22cdfea3b767.tar.xz
nova-b02cefe64631d51cebdfb5e3a83d22cdfea3b767.zip
Merge "Clean stale lockfiles on service startup : fixes bug 785955"
-rw-r--r--nova/service.py2
-rw-r--r--nova/tests/test_utils.py149
-rw-r--r--nova/utils.py85
-rwxr-xr-xtools/clean_file_locks.py63
4 files changed, 298 insertions, 1 deletions
diff --git a/nova/service.py b/nova/service.py
index adf242f3d..1da10e697 100644
--- a/nova/service.py
+++ b/nova/service.py
@@ -158,6 +158,7 @@ class Service(object):
vcs_string = version.version_string_with_vcs()
LOG.audit(_('Starting %(topic)s node (version %(vcs_string)s)'),
{'topic': self.topic, 'vcs_string': vcs_string})
+ utils.cleanup_file_locks()
self.manager.init_host()
self.model_disconnected = False
ctxt = context.get_admin_context()
@@ -360,6 +361,7 @@ class WSGIService(object):
:returns: None
"""
+ utils.cleanup_file_locks()
if self.manager:
self.manager.init_host()
self.server.start()
diff --git a/nova/tests/test_utils.py b/nova/tests/test_utils.py
index 93146876e..843b48dd3 100644
--- a/nova/tests/test_utils.py
+++ b/nova/tests/test_utils.py
@@ -18,6 +18,8 @@ import __builtin__
import datetime
import hashlib
import os
+import os.path
+import socket
import StringIO
import tempfile
@@ -811,3 +813,150 @@ class Iso8601TimeTest(test.TestCase):
west = utils.parse_isotime(str)
normed = utils.normalize_time(west)
self._instaneous(normed, 2012, 2, 13, 23, 53, 07, 0)
+
+
+class TestLockCleanup(test.TestCase):
+ """unit tests for utils.cleanup_file_locks()"""
+
+ def setUp(self):
+ super(TestLockCleanup, self).setUp()
+
+ self.pid = os.getpid()
+ self.dead_pid = self._get_dead_pid()
+ self.lock_name = 'nova-testlock'
+ self.lock_file = os.path.join(FLAGS.lock_path,
+ self.lock_name + '.lock')
+ self.hostname = socket.gethostname()
+ print self.pid, self.dead_pid
+ try:
+ os.unlink(self.lock_file)
+ except OSError as (errno, strerror):
+ if errno == 2:
+ pass
+
+ def _get_dead_pid(self):
+ """get a pid for a process that does not exist"""
+
+ candidate_pid = self.pid - 1
+ while os.path.exists(os.path.join('/proc', str(candidate_pid))):
+ candidate_pid -= 1
+ if candidate_pid == 1:
+ return 0
+ return candidate_pid
+
+ def _get_sentinel_name(self, hostname, pid, thread='MainThread'):
+ return os.path.join(FLAGS.lock_path,
+ '%s.%s-%d' % (hostname, thread, pid))
+
+ def _create_sentinel(self, hostname, pid, thread='MainThread'):
+ name = self._get_sentinel_name(hostname, pid, thread)
+ open(name, 'wb').close()
+ return name
+
+ def test_clean_stale_locks(self):
+ """verify locks for dead processes are cleaned up"""
+
+ # create sentinels for two processes, us and a 'dead' one
+ # no actve lock
+ sentinel1 = self._create_sentinel(self.hostname, self.pid)
+ sentinel2 = self._create_sentinel(self.hostname, self.dead_pid)
+
+ utils.cleanup_file_locks()
+
+ self.assertTrue(os.path.exists(sentinel1))
+ self.assertFalse(os.path.exists(self.lock_file))
+ self.assertFalse(os.path.exists(sentinel2))
+
+ os.unlink(sentinel1)
+
+ def test_clean_stale_locks_active(self):
+ """verify locks for dead processes are cleaned with an active lock """
+
+ # create sentinels for two processes, us and a 'dead' one
+ # create an active lock for us
+ sentinel1 = self._create_sentinel(self.hostname, self.pid)
+ sentinel2 = self._create_sentinel(self.hostname, self.dead_pid)
+ os.link(sentinel1, self.lock_file)
+
+ utils.cleanup_file_locks()
+
+ self.assertTrue(os.path.exists(sentinel1))
+ self.assertTrue(os.path.exists(self.lock_file))
+ self.assertFalse(os.path.exists(sentinel2))
+
+ os.unlink(sentinel1)
+ os.unlink(self.lock_file)
+
+ def test_clean_stale_with_threads(self):
+ """verify locks for multiple threads are cleaned up """
+
+ # create sentinels for four threads in our process, and a 'dead'
+ # process. no lock.
+ sentinel1 = self._create_sentinel(self.hostname, self.pid, 'Default-1')
+ sentinel2 = self._create_sentinel(self.hostname, self.pid, 'Default-2')
+ sentinel3 = self._create_sentinel(self.hostname, self.pid, 'Default-3')
+ sentinel4 = self._create_sentinel(self.hostname, self.pid, 'Default-4')
+ sentinel5 = self._create_sentinel(self.hostname, self.dead_pid,
+ 'Default-1')
+
+ utils.cleanup_file_locks()
+
+ self.assertTrue(os.path.exists(sentinel1))
+ self.assertTrue(os.path.exists(sentinel2))
+ self.assertTrue(os.path.exists(sentinel3))
+ self.assertTrue(os.path.exists(sentinel4))
+ self.assertFalse(os.path.exists(self.lock_file))
+ self.assertFalse(os.path.exists(sentinel5))
+
+ os.unlink(sentinel1)
+ os.unlink(sentinel2)
+ os.unlink(sentinel3)
+ os.unlink(sentinel4)
+
+ def test_clean_stale_with_threads_active(self):
+ """verify locks for multiple threads are cleaned up """
+
+ # create sentinels for four threads in our process, and a 'dead'
+ # process
+ sentinel1 = self._create_sentinel(self.hostname, self.pid, 'Default-1')
+ sentinel2 = self._create_sentinel(self.hostname, self.pid, 'Default-2')
+ sentinel3 = self._create_sentinel(self.hostname, self.pid, 'Default-3')
+ sentinel4 = self._create_sentinel(self.hostname, self.pid, 'Default-4')
+ sentinel5 = self._create_sentinel(self.hostname, self.dead_pid,
+ 'Default-1')
+
+ os.link(sentinel1, self.lock_file)
+
+ utils.cleanup_file_locks()
+
+ self.assertTrue(os.path.exists(sentinel1))
+ self.assertTrue(os.path.exists(sentinel2))
+ self.assertTrue(os.path.exists(sentinel3))
+ self.assertTrue(os.path.exists(sentinel4))
+ self.assertTrue(os.path.exists(self.lock_file))
+ self.assertFalse(os.path.exists(sentinel5))
+
+ os.unlink(sentinel1)
+ os.unlink(sentinel2)
+ os.unlink(sentinel3)
+ os.unlink(sentinel4)
+ os.unlink(self.lock_file)
+
+ def test_clean_bogus_lockfiles(self):
+ """verify lockfiles are cleaned """
+
+ lock1 = os.path.join(FLAGS.lock_path, 'nova-testlock1.lock')
+ lock2 = os.path.join(FLAGS.lock_path, 'nova-testlock2.lock')
+ lock3 = os.path.join(FLAGS.lock_path, 'testlock3.lock')
+
+ open(lock1, 'wb').close()
+ open(lock2, 'wb').close()
+ open(lock3, 'wb').close()
+
+ utils.cleanup_file_locks()
+
+ self.assertFalse(os.path.exists(lock1))
+ self.assertFalse(os.path.exists(lock2))
+ self.assertTrue(os.path.exists(lock3))
+
+ os.unlink(lock3)
diff --git a/nova/utils.py b/nova/utils.py
index ec62f87fc..0f3e61897 100644
--- a/nova/utils.py
+++ b/nova/utils.py
@@ -26,7 +26,6 @@ import hashlib
import inspect
import itertools
import json
-import lockfile
import os
import pyclbr
import random
@@ -46,6 +45,7 @@ from eventlet import greenthread
from eventlet import semaphore
from eventlet.green import subprocess
import iso8601
+import lockfile
import netaddr
from nova import exception
@@ -857,6 +857,89 @@ def synchronized(name, external=False):
return wrap
+def cleanup_file_locks():
+ """clean up stale locks left behind by process failures
+
+ The lockfile module, used by @synchronized, can leave stale lockfiles
+ behind after process failure. These locks can cause process hangs
+ at startup, when a process deadlocks on a lock which will never
+ be unlocked.
+
+ Intended to be called at service startup.
+
+ """
+
+ # NOTE(mikeyp) this routine incorporates some internal knowledge
+ # from the lockfile module, and this logic really
+ # should be part of that module.
+ #
+ # cleanup logic:
+ # 1) look for the lockfile modules's 'sentinel' files, of the form
+ # hostname.[thread-.*]-pid, extract the pid.
+ # if pid doesn't match a running process, delete the file since
+ # it's from a dead process.
+ # 2) check for the actual lockfiles. if lockfile exists with linkcount
+ # of 1, it's bogus, so delete it. A link count >= 2 indicates that
+ # there are probably sentinels still linked to it from active
+ # processes. This check isn't perfect, but there is no way to
+ # reliably tell which sentinels refer to which lock in the
+ # lockfile implementation.
+
+ if FLAGS.disable_process_locking:
+ return
+
+ hostname = socket.gethostname()
+ sentinel_re = hostname + r'\..*-(\d+$)'
+ lockfile_re = r'nova-.*\.lock'
+ files = os.listdir(FLAGS.lock_path)
+
+ # cleanup sentinels
+ for filename in files:
+ match = re.match(sentinel_re, filename)
+ if match is None:
+ continue
+ pid = match.group(1)
+ LOG.debug(_('Found sentinel %(filename)s for pid %(pid)s' %
+ {'filename': filename, 'pid': pid}))
+ if not os.path.exists(os.path.join('/proc', pid)):
+ delete_if_exists(os.path.join(FLAGS.lock_path, filename))
+ LOG.debug(_('Cleaned sentinel %(filename)s for pid %(pid)s' %
+ {'filename': filename, 'pid': pid}))
+
+ # cleanup lock files
+ for filename in files:
+ match = re.match(lockfile_re, filename)
+ if match is None:
+ continue
+ try:
+ stat_info = os.stat(os.path.join(FLAGS.lock_path, filename))
+ except OSError as (errno, strerror):
+ if errno == 2: # doesn't exist
+ continue
+ else:
+ raise
+ msg = _('Found lockfile %(file)s with link count %(count)d' %
+ {'file': filename, 'count': stat_info.st_nlink})
+ LOG.debug(msg)
+ if stat_info.st_nlink == 1:
+ delete_if_exists(os.path.join(FLAGS.lock_path, filename))
+ msg = _('Cleaned lockfile %(file)s with link count %(count)d' %
+ {'file': filename, 'count': stat_info.st_nlink})
+ LOG.debug(msg)
+
+
+def delete_if_exists(pathname):
+ """delete a file, but ignore file not found error"""
+
+ try:
+ os.unlink(pathname)
+ except OSError as (errno, strerror):
+ if errno == 2: # doesn't exist
+ return
+ else:
+ raise
+
+
def get_from_path(items, path):
"""Returns a list of items matching the specified path.
diff --git a/tools/clean_file_locks.py b/tools/clean_file_locks.py
new file mode 100755
index 000000000..eb21177aa
--- /dev/null
+++ b/tools/clean_file_locks.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+# Copyright 2012 La Honda Research Center, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""clean_file_locks.py - Cleans stale interprocess locks
+
+This rountine can be used to find and delete stale lock files from
+nova's interprocess synchroization. It can be used safely while services
+are running.
+
+"""
+
+import logging
+import optparse
+
+from nova import flags
+from nova import utils
+from nova import log
+
+
+LOG = log.getLogger('nova.utils')
+FLAGS = flags.FLAGS
+
+
+def parse_options():
+ """process command line options."""
+
+ parser = optparse.OptionParser('usage: %prog [options]')
+ parser.add_option('--verbose', action='store_true',
+ help='List lock files found and deleted')
+
+ options, args = parser.parse_args()
+
+ return options, args
+
+
+def main():
+ """Main loop."""
+ options, args = parse_options()
+ verbose = options.verbose
+
+ if verbose:
+ LOG.logger.setLevel(logging.DEBUG)
+ else:
+ LOG.logger.setLevel(logging.INFO)
+ LOG.info('Cleaning stale locks from %s' % FLAGS.lock_path)
+ utils.cleanup_file_locks()
+ LOG.info('Finished')
+
+if __name__ == '__main__':
+ main()