From e4377fdb0ef2087a36d2b1fbee96543f735040de Mon Sep 17 00:00:00 2001 From: Michael Still Date: Wed, 12 Dec 2012 17:13:53 +1100 Subject: Stop nbd leaks, remove pid race. With the previous implementation, if a nbd device was found to be in use by something other than nova it was removed from the class scoped list of devices, but never re-added. This meant we "leaked" devices away over time if we were competing with other nbd users on the machine. Instead of tracking our use at all, we should rely on the presence of a user pid in /sys. Resolves bug 1088083. Change-Id: If777e270a0dda12034ea7ef1bc7fd688cadde8a9 --- nova/tests/virt/disk/test_nbd.py | 118 ++++++++++++--------------------------- nova/virt/disk/mount/nbd.py | 78 ++++++++++---------------- 2 files changed, 68 insertions(+), 128 deletions(-) diff --git a/nova/tests/virt/disk/test_nbd.py b/nova/tests/virt/disk/test_nbd.py index 41abace8e..5e08215a5 100644 --- a/nova/tests/virt/disk/test_nbd.py +++ b/nova/tests/virt/disk/test_nbd.py @@ -1,6 +1,6 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 -# Copyright 2012 Michael Still +# Copyright 2012 Michael Still and Canonical Inc # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may @@ -43,8 +43,6 @@ def _fake_listdir_nbd_devices(path): return ['nbd0', 'nbd1'] return ORIG_LISTDIR(path) -ORIG_EXISTS = os.path.exists - def _fake_exists_no_users(path): if path.startswith('/sys/block/nbd'): @@ -54,26 +52,41 @@ def _fake_exists_no_users(path): return ORIG_EXISTS(path) +def _fake_exists_all_used(path): + if path.startswith('/sys/block/nbd'): + return True + return ORIG_EXISTS(path) + + +def _fake_detect_nbd_devices_none(self): + return [] + + +def _fake_detect_nbd_devices(self): + return ['nbd0', 'nbd1'] + + +def _fake_noop(*args, **kwargs): + return + + class NbdTestCase(test.TestCase): def setUp(self): super(NbdTestCase, self).setUp() - nbd.NbdMount._DEVICES_INITIALIZED = False - nbd.NbdMount._DEVICES = [] self.useFixture(fixtures.MonkeyPatch('os.listdir', _fake_listdir_nbd_devices)) - def test_nbd_initialize(self): + def test_nbd_no_devices(self): tempdir = self.useFixture(fixtures.TempDir()).path n = nbd.NbdMount(None, tempdir) - self.assertTrue(n._DEVICES_INITIALIZED) - self.assertThat(nbd.NbdMount._DEVICES, - MatchesListwise([Equals('nbd0'), - Equals('nbd1')])) + n.detect_nbd_device = _fake_detect_nbd_devices_none + self.assertEquals(None, n._allocate_nbd()) def test_nbd_no_free_devices(self): tempdir = self.useFixture(fixtures.TempDir()).path n = nbd.NbdMount(None, tempdir) - nbd.NbdMount._DEVICES = [] + self.useFixture(fixtures.MonkeyPatch('os.path.exists', + _fake_exists_all_used)) self.assertEquals(None, n._allocate_nbd()) def test_nbd_not_loaded(self): @@ -93,44 +106,26 @@ class NbdTestCase(test.TestCase): self.assertEquals(None, n._allocate_nbd()) self.assertEquals('nbd unavailable: module not loaded', n.error) - # And no device should be consumed - self.assertThat(nbd.NbdMount._DEVICES, - MatchesListwise([Equals('nbd0'), - Equals('nbd1')])) - - def test_nbd_all_allocated(self): - tempdir = self.useFixture(fixtures.TempDir()).path - n = nbd.NbdMount(None, tempdir) - nbd.NbdMount._DEVICES = [] - self.useFixture(fixtures.MonkeyPatch('os.path.exists', - _fake_exists_no_users)) - - # Allocate a nbd device, fails - self.assertEquals(None, n._allocate_nbd()) - self.assertEquals('No free nbd devices', n.error) - def test_nbd_allocation(self): tempdir = self.useFixture(fixtures.TempDir()).path n = nbd.NbdMount(None, tempdir) + n.detect_nbd_device = _fake_detect_nbd_devices self.useFixture(fixtures.MonkeyPatch('os.path.exists', _fake_exists_no_users)) + self.useFixture(fixtures.MonkeyPatch('random.shuffle', _fake_noop)) # Allocate a nbd device - self.assertEquals('/dev/nbd1', n._allocate_nbd()) - self.assertEquals(['nbd0'], nbd.NbdMount._DEVICES) - - # Allocate another self.assertEquals('/dev/nbd0', n._allocate_nbd()) - self.assertEquals([], nbd.NbdMount._DEVICES) def test_nbd_allocation_one_in_use(self): tempdir = self.useFixture(fixtures.TempDir()).path n = nbd.NbdMount(None, tempdir) + self.useFixture(fixtures.MonkeyPatch('random.shuffle', _fake_noop)) # Fake out os.path.exists def fake_exists(path): if path.startswith('/sys/block/nbd'): - if path == '/sys/block/nbd1/pid': + if path == '/sys/block/nbd0/pid': return True if path.endswith('pid'): return False @@ -142,36 +137,12 @@ class NbdTestCase(test.TestCase): # TODO(mikal): Note that there is a leak here, as the in use nbd device # is removed from the list, but not returned so it will never be # re-added. I will fix this in a later patch. - self.assertEquals('/dev/nbd0', n._allocate_nbd()) - self.assertEquals([], nbd.NbdMount._DEVICES) - self.assertTrue('nbd0' not in nbd.NbdMount._DEVICES) - - def test_free(self): - tempdir = self.useFixture(fixtures.TempDir()).path - n = nbd.NbdMount(None, tempdir) - d = 'nosuch' - self.assertFalse(d in nbd.NbdMount._DEVICES) - - # Now free it - n._free_nbd(d) - self.assertThat(nbd.NbdMount._DEVICES, - MatchesListwise([Equals('nbd0'), - Equals('nbd1'), - Equals(d)])) - self.assertTrue(d in nbd.NbdMount._DEVICES) - - # Double free - n._free_nbd(d) - self.assertThat(nbd.NbdMount._DEVICES, - MatchesListwise([Equals('nbd0'), - Equals('nbd1'), - Equals(d)])) - self.assertTrue(d in nbd.NbdMount._DEVICES) + self.assertEquals('/dev/nbd1', n._allocate_nbd()) def test_get_dev_no_devices(self): tempdir = self.useFixture(fixtures.TempDir()).path n = nbd.NbdMount(None, tempdir) - nbd.NbdMount._DEVICES = [] + n.detect_nbd_device = _fake_detect_nbd_devices self.assertFalse(n.get_dev()) def test_get_dev_qemu_fails(self): @@ -188,13 +159,11 @@ class NbdTestCase(test.TestCase): # Error logged, no device consumed self.assertFalse(n.get_dev()) self.assertTrue(n.error.startswith('qemu-nbd error')) - self.assertThat(nbd.NbdMount._DEVICES, - MatchesListwise([Equals('nbd0'), - Equals('nbd1')])) def test_get_dev_qemu_timeout(self): tempdir = self.useFixture(fixtures.TempDir()).path n = nbd.NbdMount(None, tempdir) + n.detect_nbd_device = _fake_detect_nbd_devices self.useFixture(fixtures.MonkeyPatch('os.path.exists', _fake_exists_no_users)) @@ -202,22 +171,18 @@ class NbdTestCase(test.TestCase): def fake_trycmd(*args, **kwargs): return '', '' self.useFixture(fixtures.MonkeyPatch('nova.utils.trycmd', fake_trycmd)) - - # We steal time.sleep() as well to speed up this test - def fake_sleep(duration): - return - self.useFixture(fixtures.MonkeyPatch('time.sleep', fake_sleep)) + self.useFixture(fixtures.MonkeyPatch('time.sleep', _fake_noop)) # Error logged, no device consumed self.assertFalse(n.get_dev()) self.assertTrue(n.error.endswith('did not show up')) - self.assertThat(nbd.NbdMount._DEVICES, - MatchesListwise([Equals('nbd0'), - Equals('nbd1')])) def test_get_dev_works(self): tempdir = self.useFixture(fixtures.TempDir()).path n = nbd.NbdMount(None, tempdir) + n.detect_nbd_device = _fake_detect_nbd_devices + self.useFixture(fixtures.MonkeyPatch('random.shuffle', _fake_noop)) + self.useFixture(fixtures.MonkeyPatch('nova.utils.execute', _fake_noop)) # We need the pid file for the device which is allocated to exist, but # only once it is allocated to us @@ -236,7 +201,7 @@ class NbdTestCase(test.TestCase): def fake_trycmd(*args, **kwargs): def fake_exists_two(path): if path.startswith('/sys/block/nbd'): - if path == '/sys/block/nbd1/pid': + if path == '/sys/block/nbd0/pid': return True if path.endswith('pid'): return False @@ -247,25 +212,16 @@ class NbdTestCase(test.TestCase): return '', '' self.useFixture(fixtures.MonkeyPatch('nova.utils.trycmd', fake_trycmd)) - # Ditto execute - def fake_exec(*args, **kwargs): - return - self.useFixture(fixtures.MonkeyPatch('nova.utils.execute', fake_exec)) - # No error logged, device consumed self.assertTrue(n.get_dev()) self.assertTrue(n.linked) self.assertEquals('', n.error) - self.assertEquals(['nbd0'], nbd.NbdMount._DEVICES) - self.assertEquals('/dev/nbd1', n.device) + self.assertEquals('/dev/nbd0', n.device) # Free n.unget_dev() self.assertFalse(n.linked) self.assertEquals('', n.error) - self.assertThat(nbd.NbdMount._DEVICES, - MatchesListwise([Equals('nbd0'), - Equals('nbd1')])) self.assertEquals(None, n.device) def test_unget_dev_simple(self): diff --git a/nova/virt/disk/mount/nbd.py b/nova/virt/disk/mount/nbd.py index 5298a3a8e..90d858a4b 100644 --- a/nova/virt/disk/mount/nbd.py +++ b/nova/virt/disk/mount/nbd.py @@ -16,6 +16,7 @@ """Support for mounting images with qemu-nbd""" import os +import random import re import time @@ -42,82 +43,66 @@ class NbdMount(api.Mount): """qemu-nbd support disk images.""" mode = 'nbd' - # NOTE(padraig): There are two issues with this nbd device handling - # 1. We assume nothing else on the system uses nbd devices - # 2. Multiple workers on a system can race against each other - # A patch has been proposed in Nov 2011, to add add a -f option to - # qemu-nbd, akin to losetup -f. One could test for this by running qemu-nbd - # with just the -f option, where it will fail if not supported, or if there - # are no free devices. Note that patch currently hardcodes 16 devices. - # We might be able to alleviate problem 2. by scanning /proc/partitions - # like the aformentioned patch does. - - _DEVICES_INITIALIZED = False - _DEVICES = None - - def __init__(self, image, mount_dir, partition=None, device=None): - super(NbdMount, self).__init__(image, mount_dir, partition=partition, - device=device) - - # NOTE(mikal): this must be done here, because we need configuration - # to have been parsed before we initialize. Note the scoping to ensure - # we're updating the class scoped variables. - if not self._DEVICES_INITIALIZED: - NbdMount._DEVICES = list(self._detect_nbd_devices()) - NbdMount._DEVICES_INITIALIZED = True + # NOTE(padraig): The remaining issue with this code is that multiple + # workers on a system can race against each other. def _detect_nbd_devices(self): """Detect nbd device files.""" return filter(NBD_DEVICE_RE.match, os.listdir('/sys/block/')) + def _find_unused(self, devices): + for device in devices: + if not os.path.exists(os.path.join('/sys/block/', device, 'pid')): + return device + LOG.warn(_('No free nbd devices')) + return None + def _allocate_nbd(self): - if not os.path.exists("/sys/block/nbd0"): + if not os.path.exists('/sys/block/nbd0'): + LOG.error(_('ndb module not loaded')) self.error = _('nbd unavailable: module not loaded') return None - while True: - if not self._DEVICES: - # really want to log this info, not raise - self.error = _('No free nbd devices') - return None - - device = self._DEVICES.pop() - if not os.path.exists('/sys/block/%s/pid' % device): - break + devices = self._detect_nbd_devices() + random.shuffle(devices) + device = self._find_unused(devices) + if not device: + # really want to log this info, not raise + self.error = _('No free nbd devices') + return None return os.path.join('/dev', device) - def _free_nbd(self, device): - # The device could already be present if unget_dev - # is called right after a nova restart - # (when destroying an LXC container for example). - device = os.path.basename(device) - if not device in self._DEVICES: - self._DEVICES.append(device) + def _read_pid_file(self, pidfile): + # This is for unit test convenience + with open(pidfile) as f: + pid = int(f.readline()) + return pid def get_dev(self): device = self._allocate_nbd() if not device: return False - LOG.debug(_("Get nbd device %(dev)s for %(imgfile)s") % + # NOTE(mikal): qemu-nbd will return an error if the device file is + # already in use. + LOG.debug(_('Get nbd device %(dev)s for %(imgfile)s'), {'dev': device, 'imgfile': self.image}) _out, err = utils.trycmd('qemu-nbd', '-c', device, self.image, run_as_root=True) if err: self.error = _('qemu-nbd error: %s') % err - self._free_nbd(device) return False # NOTE(vish): this forks into another process, so give it a chance - # to set up before continuing + # to set up before continuing + pidfile = "/sys/block/%s/pid" % os.path.basename(device) for _i in range(CONF.timeout_nbd): - if os.path.exists("/sys/block/%s/pid" % os.path.basename(device)): + if os.path.exists(pidfile): self.device = device break time.sleep(1) else: self.error = _('nbd device %s did not show up') % device - self._free_nbd(device) return False self.linked = True @@ -126,8 +111,7 @@ class NbdMount(api.Mount): def unget_dev(self): if not self.linked: return - LOG.debug(_("Release nbd device %s"), self.device) + LOG.debug(_('Release nbd device %s'), self.device) utils.execute('qemu-nbd', '-d', self.device, run_as_root=True) - self._free_nbd(self.device) self.linked = False self.device = None -- cgit