From fa4c69330585ead1a1dd58b3bec4cc3f0f92082c Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Date: Sun, 12 Sep 2010 04:44:57 -0700 Subject: export devices unique --- nova/db/sqlalchemy/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nova/db/sqlalchemy/models.py b/nova/db/sqlalchemy/models.py index 41013f41b..b6a8c134a 100644 --- a/nova/db/sqlalchemy/models.py +++ b/nova/db/sqlalchemy/models.py @@ -25,7 +25,7 @@ import datetime # TODO(vish): clean up these imports from sqlalchemy.orm import relationship, backref, exc, object_mapper -from sqlalchemy import Column, Integer, String +from sqlalchemy import Column, Integer, String, schema from sqlalchemy import ForeignKey, DateTime, Boolean, Text from sqlalchemy.ext.declarative import declarative_base @@ -315,6 +315,7 @@ class Quota(BASE, NovaBase): class ExportDevice(BASE, NovaBase): """Represates a shelf and blade that a volume can be exported on""" __tablename__ = 'export_devices' + __table_args__ = (schema.UniqueConstraint("name", "site"), {'mysql_engine': 'InnoDB'}) id = Column(Integer, primary_key=True) shelf_id = Column(Integer) blade_id = Column(Integer) -- cgit From e77e8a4c368a5c4da1f3e64938bc8940c3603418 Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Date: Sun, 12 Sep 2010 04:48:49 -0700 Subject: fixed name for unique constraint --- nova/db/sqlalchemy/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nova/db/sqlalchemy/models.py b/nova/db/sqlalchemy/models.py index b6a8c134a..a6c7d83c0 100644 --- a/nova/db/sqlalchemy/models.py +++ b/nova/db/sqlalchemy/models.py @@ -315,7 +315,7 @@ class Quota(BASE, NovaBase): class ExportDevice(BASE, NovaBase): """Represates a shelf and blade that a volume can be exported on""" __tablename__ = 'export_devices' - __table_args__ = (schema.UniqueConstraint("name", "site"), {'mysql_engine': 'InnoDB'}) + __table_args__ = (schema.UniqueConstraint("shelf_id", "blade_id"), {'mysql_engine': 'InnoDB'}) id = Column(Integer, primary_key=True) shelf_id = Column(Integer) blade_id = Column(Integer) -- cgit From b8516a2239658f0734299049648cbf2828b845eb Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Date: Sun, 12 Sep 2010 04:57:46 -0700 Subject: allow multiple volumes to run ensure_blades without creating duplicates --- nova/db/api.py | 10 +++++++--- nova/db/sqlalchemy/api.py | 8 +++++--- nova/volume/manager.py | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/nova/db/api.py b/nova/db/api.py index 9f6ff99c3..f78536967 100644 --- a/nova/db/api.py +++ b/nova/db/api.py @@ -413,9 +413,13 @@ def export_device_count(context): return IMPL.export_device_count(context) -def export_device_create(context, values): - """Create an export_device from the values dictionary.""" - return IMPL.export_device_create(context, values) +def export_device_create_safe(context, values): + """Create an export_device from the values dictionary. + + The device is not returned. If the create violates the unique + constraints because the shelf_id and blade_id already exist, + no exception is raised.""" + return IMPL.export_device_create_safe(context, values) ################### diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index d612fe669..c96a97951 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -630,12 +630,14 @@ def export_device_count(_context): return models.ExportDevice.count() -def export_device_create(_context, values): +def export_device_create_safe(_context, values): export_device_ref = models.ExportDevice() for (key, value) in values.iteritems(): export_device_ref[key] = value - export_device_ref.save() - return export_device_ref + try: + export_device_ref.save() + except exc.IntegrityError: + pass ################### diff --git a/nova/volume/manager.py b/nova/volume/manager.py index 37b78fdee..7dbd37623 100644 --- a/nova/volume/manager.py +++ b/nova/volume/manager.py @@ -62,7 +62,7 @@ class AOEManager(manager.Manager): for shelf_id in xrange(FLAGS.num_shelves): for blade_id in xrange(FLAGS.blades_per_shelf): dev = {'shelf_id': shelf_id, 'blade_id': blade_id} - self.db.export_device_create(context, dev) + self.db.export_device_create_safe(context, dev) @defer.inlineCallbacks def create_volume(self, context, volume_id): -- cgit From 84a9e5a9ea3105513bb5a7ae9b30d49e6eb3bd3e Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Date: Sun, 12 Sep 2010 05:31:27 -0700 Subject: Integrity error is in a different exc file --- nova/db/sqlalchemy/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index c96a97951..93c80d27c 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -25,6 +25,7 @@ from nova import flags from nova.db.sqlalchemy import models from nova.db.sqlalchemy.session import get_session from sqlalchemy import or_ +from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import joinedload_all from sqlalchemy.sql import func @@ -636,7 +637,7 @@ def export_device_create_safe(_context, values): export_device_ref[key] = value try: export_device_ref.save() - except exc.IntegrityError: + except IntegrityError: pass -- cgit From fb66d1577a7c49b013f619c620c30bd4b11586e7 Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Date: Sun, 12 Sep 2010 05:46:13 -0700 Subject: re added missing volume update --- nova/volume/manager.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nova/volume/manager.py b/nova/volume/manager.py index 7dbd37623..a06070471 100644 --- a/nova/volume/manager.py +++ b/nova/volume/manager.py @@ -95,6 +95,9 @@ class AOEManager(manager.Manager): yield self.driver.ensure_exports() now = datetime.datetime.utcnow() + self.db.volume_update(context, + volume_ref['id'], {'status': 'available', + 'launched_at': now}) logging.debug("volume %s: created successfully", volume_id) defer.returnValue(volume_id) -- cgit From 69e30d197dc3c518528bb8d7101c496d753f2122 Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Date: Sun, 12 Sep 2010 06:05:17 -0700 Subject: deleting is set by cloud --- nova/volume/manager.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/nova/volume/manager.py b/nova/volume/manager.py index a06070471..8472ff33b 100644 --- a/nova/volume/manager.py +++ b/nova/volume/manager.py @@ -105,14 +105,11 @@ class AOEManager(manager.Manager): def delete_volume(self, context, volume_id): """Deletes and unexports volume""" volume_ref = self.db.volume_get(context, volume_id) - if volume_ref['status'] != "available": - raise exception.Error("Volume is not available") if volume_ref['attach_status'] == "attached": raise exception.Error("Volume is still attached") if volume_ref['host'] != self.host: raise exception.Error("Volume is not local to this node") logging.debug("Deleting volume with id of: %s", volume_id) - self.db.volume_update(context, volume_id, {'status': 'deleting'}) shelf_id, blade_id = self.db.volume_get_shelf_and_blade(context, volume_id) yield self.driver.remove_export(volume_ref['str_id'], -- cgit From 83a6767ab7be871fd269bf409f819033378e4ea9 Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Date: Sun, 12 Sep 2010 06:37:08 -0700 Subject: handle exceptions thrown by vblade stop and vblade destroy --- nova/volume/driver.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/nova/volume/driver.py b/nova/volume/driver.py index 4604b85d5..a05e34e51 100644 --- a/nova/volume/driver.py +++ b/nova/volume/driver.py @@ -24,6 +24,7 @@ import logging from twisted.internet import defer +from nova import exception from nova import flags from nova import process @@ -81,16 +82,34 @@ class AOEDriver(object): @defer.inlineCallbacks def remove_export(self, _volume_name, shelf_id, blade_id): """Removes an export for a logical volume""" - yield self._execute( - "sudo vblade-persist stop %s %s" % (shelf_id, blade_id)) - yield self._execute( - "sudo vblade-persist destroy %s %s" % (shelf_id, blade_id)) + # NOTE(vish): These commands can partially fail sometimes, but + # running them a second time on failure will usually + # pick up the remaining tasks even though it also + # raises an exception + try: + yield self._execute("sudo vblade-persist stop %s %s" % + (shelf_id, blade_id)) + except exception.ProcessExecutionError: + logging.exception("vblade stop threw an error, recovering") + yield self._execute("sleep 2") + yield self._execute("sudo vblade-persist stop %s %s" % + (shelf_id, blade_id), + check_exit_code=False) + try: + yield self._execute("sudo vblade-persist destroy %s %s" % + (shelf_id, blade_id)) + except exception.ProcessExecutionError: + logging.exception("vblade destroy threw an error, recovering") + yield self._execute("sleep 2") + yield self._execute("sudo vblade-persist destroy %s %s" % + (shelf_id, blade_id), + check_exit_code=False) @defer.inlineCallbacks def ensure_exports(self): """Runs all existing exports""" # NOTE(ja): wait for blades to appear - yield self._execute("sleep 5") + yield self._execute("sleep 2") yield self._execute("sudo vblade-persist auto all", check_exit_code=False) yield self._execute("sudo vblade-persist start all", -- cgit From f201f562fe79d09b0bbad42c4630ec8e4c76bf06 Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Date: Sun, 12 Sep 2010 07:04:36 -0700 Subject: more error handling in volume driver code --- nova/volume/driver.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/nova/volume/driver.py b/nova/volume/driver.py index a05e34e51..e8d11c74d 100644 --- a/nova/volume/driver.py +++ b/nova/volume/driver.py @@ -50,17 +50,26 @@ class AOEDriver(object): sizestr = '100M' else: sizestr = '%sG' % size - yield self._execute( - "sudo lvcreate -L %s -n %s %s" % (sizestr, - volume_name, - FLAGS.volume_group)) + yield self._execute("sudo lvcreate -L %s -n %s %s" % + (sizestr, + volume_name, + FLAGS.volume_group)) @defer.inlineCallbacks def delete_volume(self, volume_name): """Deletes a logical volume""" - yield self._execute( - "sudo lvremove -f %s/%s" % (FLAGS.volume_group, - volume_name)) + # NOTE(vish): Sometimes complains that the volume is still + # open, so delay and try again before failing + try: + yield self._execute("sudo lvremove -f %s/%s" % + (FLAGS.volume_group, + volume_name)) + except exception.ProcessExecutionError: + logging.exception("lvremove threw an error, recovering") + yield self._execute("sleep 2") + yield self._execute("sudo lvremove -f %s/%s" % + (FLAGS.volume_group, + volume_name)) @defer.inlineCallbacks def create_export(self, volume_name, shelf_id, blade_id): @@ -85,7 +94,8 @@ class AOEDriver(object): # NOTE(vish): These commands can partially fail sometimes, but # running them a second time on failure will usually # pick up the remaining tasks even though it also - # raises an exception + # raises an exception. We therefore ignore the + # failure on the second try. try: yield self._execute("sudo vblade-persist stop %s %s" % (shelf_id, blade_id)) -- cgit From 517348e33b8cc50e6a0d09f9112b7daab55b132c Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Date: Sun, 12 Sep 2010 07:24:31 -0700 Subject: generalized retry into try_execute --- nova/volume/driver.py | 59 +++++++++++++++++---------------------------------- 1 file changed, 20 insertions(+), 39 deletions(-) diff --git a/nova/volume/driver.py b/nova/volume/driver.py index e8d11c74d..a9ea5caa3 100644 --- a/nova/volume/driver.py +++ b/nova/volume/driver.py @@ -41,6 +41,19 @@ class AOEDriver(object): def __init__(self, execute=process.simple_execute, *args, **kwargs): self._execute = execute + @defer.inlineCallbacks + def _try_execute(self, command): + # NOTE(vish): Volume commands can partially fail due to timing, but + # running them a second time on failure will usually + # recover nicely. + try: + yield self._execute(command) + except exception.ProcessExecutionError: + logging.exception("Attempting to recover from a failed execute.") + yield self._execute("sleep 2") + yield self._execute(command) + + @defer.inlineCallbacks def create_volume(self, volume_name, size): """Creates a logical volume""" @@ -50,7 +63,7 @@ class AOEDriver(object): sizestr = '100M' else: sizestr = '%sG' % size - yield self._execute("sudo lvcreate -L %s -n %s %s" % + yield self._try_execute("sudo lvcreate -L %s -n %s %s" % (sizestr, volume_name, FLAGS.volume_group)) @@ -58,23 +71,14 @@ class AOEDriver(object): @defer.inlineCallbacks def delete_volume(self, volume_name): """Deletes a logical volume""" - # NOTE(vish): Sometimes complains that the volume is still - # open, so delay and try again before failing - try: - yield self._execute("sudo lvremove -f %s/%s" % - (FLAGS.volume_group, - volume_name)) - except exception.ProcessExecutionError: - logging.exception("lvremove threw an error, recovering") - yield self._execute("sleep 2") - yield self._execute("sudo lvremove -f %s/%s" % + yield self._try_execute("sudo lvremove -f %s/%s" % (FLAGS.volume_group, volume_name)) @defer.inlineCallbacks def create_export(self, volume_name, shelf_id, blade_id): """Creates an export for a logical volume""" - yield self._execute( + yield self._try_execute( "sudo vblade-persist setup %s %s %s /dev/%s/%s" % (shelf_id, blade_id, @@ -91,39 +95,16 @@ class AOEDriver(object): @defer.inlineCallbacks def remove_export(self, _volume_name, shelf_id, blade_id): """Removes an export for a logical volume""" - # NOTE(vish): These commands can partially fail sometimes, but - # running them a second time on failure will usually - # pick up the remaining tasks even though it also - # raises an exception. We therefore ignore the - # failure on the second try. - try: - yield self._execute("sudo vblade-persist stop %s %s" % + yield self._try_execute("sudo vblade-persist stop %s %s" % (shelf_id, blade_id)) - except exception.ProcessExecutionError: - logging.exception("vblade stop threw an error, recovering") - yield self._execute("sleep 2") - yield self._execute("sudo vblade-persist stop %s %s" % - (shelf_id, blade_id), - check_exit_code=False) - try: - yield self._execute("sudo vblade-persist destroy %s %s" % + yield self._try_execute("sudo vblade-persist destroy %s %s" % (shelf_id, blade_id)) - except exception.ProcessExecutionError: - logging.exception("vblade destroy threw an error, recovering") - yield self._execute("sleep 2") - yield self._execute("sudo vblade-persist destroy %s %s" % - (shelf_id, blade_id), - check_exit_code=False) @defer.inlineCallbacks def ensure_exports(self): """Runs all existing exports""" - # NOTE(ja): wait for blades to appear - yield self._execute("sleep 2") - yield self._execute("sudo vblade-persist auto all", - check_exit_code=False) - yield self._execute("sudo vblade-persist start all", - check_exit_code=False) + yield self._try_execute("sudo vblade-persist auto all") + yield self._try_execute("sudo vblade-persist start all") class FakeAOEDriver(AOEDriver): -- cgit From 0fd7cb594e5482d78fed8a026a24c4e1c8dac3bc Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Date: Sun, 12 Sep 2010 07:37:03 -0700 Subject: auto all and start all exceptions should be ignored --- nova/volume/driver.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/nova/volume/driver.py b/nova/volume/driver.py index a9ea5caa3..7d5db4ab0 100644 --- a/nova/volume/driver.py +++ b/nova/volume/driver.py @@ -103,8 +103,18 @@ class AOEDriver(object): @defer.inlineCallbacks def ensure_exports(self): """Runs all existing exports""" - yield self._try_execute("sudo vblade-persist auto all") - yield self._try_execute("sudo vblade-persist start all") + # NOTE(vish): The standard _try_execute does not work here + # because these methods throw errors if other + # volumes on this host are in the process of + # being created. The good news is the command + # still works for the other volumes, so we + # just wait a bit for the current volume to + # be ready and ignore any errors. + yield self._execute("sleep 2") + yield self._execute("sudo vblade-persist auto all", + check_exit_code=False) + yield self._execute("sudo vblade-persist start all", + check_exit_code=False) class FakeAOEDriver(AOEDriver): -- cgit From ee766c9c8164ff526a9518c668ba08be4786ac35 Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Date: Sun, 12 Sep 2010 08:06:44 -0700 Subject: flag for retries on volume commands --- nova/volume/driver.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/nova/volume/driver.py b/nova/volume/driver.py index 7d5db4ab0..a710ee3d6 100644 --- a/nova/volume/driver.py +++ b/nova/volume/driver.py @@ -34,6 +34,8 @@ flags.DEFINE_string('volume_group', 'nova-volumes', 'Name for the VG that will contain exported volumes') flags.DEFINE_string('aoe_eth_dev', 'eth0', 'Which device to export the volumes on') +flags.DEFINE_string('num_shell_tries', 3, + 'number of times to attempt to run flakey shell commands') class AOEDriver(object): @@ -46,12 +48,18 @@ class AOEDriver(object): # NOTE(vish): Volume commands can partially fail due to timing, but # running them a second time on failure will usually # recover nicely. - try: - yield self._execute(command) - except exception.ProcessExecutionError: - logging.exception("Attempting to recover from a failed execute.") - yield self._execute("sleep 2") - yield self._execute(command) + tries = 0 + while True: + try: + yield self._execute(command) + defer.returnValue(True) + except exception.ProcessExecutionError: + tries = tries + 1 + if tries >= FLAGS.num_shell_tries: + raise + logging.exception("Recovering from a failed execute." + "Try number %s", tries) + yield self._execute("sleep %s", tries ** 2) @defer.inlineCallbacks -- cgit From 10be00e16c6428bf3709590f13984246fdfaf14b Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Date: Sun, 12 Sep 2010 08:16:59 -0700 Subject: fixed typo --- nova/volume/driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nova/volume/driver.py b/nova/volume/driver.py index a710ee3d6..cca619550 100644 --- a/nova/volume/driver.py +++ b/nova/volume/driver.py @@ -59,7 +59,7 @@ class AOEDriver(object): raise logging.exception("Recovering from a failed execute." "Try number %s", tries) - yield self._execute("sleep %s", tries ** 2) + yield self._execute("sleep %s" % tries ** 2) @defer.inlineCallbacks -- cgit From 7190ad478b5e92a42d5109d01b5f178de2181127 Mon Sep 17 00:00:00 2001 From: Vishvananda Ishaya Date: Tue, 28 Sep 2010 09:38:58 -0700 Subject: return a value if possible from export_device_create_safe --- nova/db/sqlalchemy/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index b6ac87901..8e6aa317b 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -677,8 +677,9 @@ def export_device_create_safe(_context, values): export_device_ref[key] = value try: export_device_ref.save() + return export_device_ref except IntegrityError: - pass + return None ################### -- cgit