From 6da1dbc8498b4c3143221fd4e1b9f59374134fd3 Mon Sep 17 00:00:00 2001 From: Michael Still Date: Fri, 14 Dec 2012 10:22:44 +1100 Subject: Retry NBD device allocation. Mitigates bug 726244. Change-Id: I2f09f3a69c3eb61ca3af90355d5861373b7ae277 --- nova/virt/disk/mount/nbd.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'nova/virt') diff --git a/nova/virt/disk/mount/nbd.py b/nova/virt/disk/mount/nbd.py index 90d858a4b..1b6cc0778 100644 --- a/nova/virt/disk/mount/nbd.py +++ b/nova/virt/disk/mount/nbd.py @@ -37,15 +37,13 @@ CONF = cfg.CONF CONF.register_opts(nbd_opts) NBD_DEVICE_RE = re.compile('nbd[0-9]+') +MAX_NBD_WAIT = 30 class NbdMount(api.Mount): """qemu-nbd support disk images.""" mode = 'nbd' - # NOTE(padraig): The remaining issue with this code is that multiple - # workers on a system can race against each other. - def _detect_nbd_devices(self): """Detect nbd device files.""" return filter(NBD_DEVICE_RE.match, os.listdir('/sys/block/')) @@ -78,7 +76,7 @@ class NbdMount(api.Mount): pid = int(f.readline()) return pid - def get_dev(self): + def _inner_get_dev(self): device = self._allocate_nbd() if not device: return False @@ -102,12 +100,33 @@ class NbdMount(api.Mount): break time.sleep(1) else: + _out, err = utils.trycmd('qemu-nbd', '-d', device, + run_as_root=True) + if err: + LOG.warn(_('Detaching from erroneous nbd device returned ' + 'error: %s'), err) self.error = _('nbd device %s did not show up') % device return False + self.error = '' self.linked = True return True + def get_dev(self): + """Retry requests for NBD devices.""" + start_time = time.time() + device = self._inner_get_dev() + while not device: + LOG.info(_('nbd device allocation failed. Will retry in 2 ' + 'seconds.')) + time.sleep(2) + if time.time() - start_time > MAX_NBD_WAIT: + LOG.warn(_('nbd device allocation failed after repeated ' + 'retries.')) + return False + device = self._inner_get_dev() + return True + def unget_dev(self): if not self.linked: return -- cgit