From 75545ba974afb1ac76186a8dad359ae0aa751c01 Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Thu, 9 May 2013 19:15:25 -0700 Subject: Check cached SSH connection in PowerVM driver The PowerVM operator code caches an SSH connection to the hypervisor which can become invalid if the connection to the hypervisor is not terminated cleanly, e.g. the hypervisor is rebooted while the compute node is connected to it. This code checks an existing SSH connection object to see if it's transport is still alive and if not, re-establishes the connection before attempting to run SSH commands on the hypervisor. Also added some basic unit tests to cover the new check_connection function in nova.virt.powervm.common. Fixes bug 1177104 Change-Id: I08079cf0d9e60e1d8902d32d684d979b06f7f287 --- nova/tests/test_powervm.py | 70 ++++++++++++++++++++++++++++++++++++------- nova/virt/powervm/blockdev.py | 8 +++-- nova/virt/powervm/common.py | 30 ++++++++++++++++++- nova/virt/powervm/operator.py | 8 +++-- 4 files changed, 98 insertions(+), 18 deletions(-) diff --git a/nova/tests/test_powervm.py b/nova/tests/test_powervm.py index 73299f23a..ccfe64734 100644 --- a/nova/tests/test_powervm.py +++ b/nova/tests/test_powervm.py @@ -1,6 +1,6 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 -# Copyright 2012 IBM Corp. +# Copyright 2013 IBM Corp. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,6 +19,7 @@ Test suite for PowerVMDriver. """ import contextlib +import paramiko from nova import context from nova import db @@ -47,6 +48,16 @@ def fake_lpar(instance_name): uptime=939395, state='Running') +def fake_ssh_connect(connection): + """Returns a new paramiko.SSHClient object.""" + return paramiko.SSHClient() + + +def raise_(ex): + """Raises the given Exception.""" + raise ex + + class FakePowerVMOperator(powervm_operator.PowerVMOperator): def get_lpar(self, instance_name, resource_type='lpar'): @@ -216,11 +227,6 @@ class PowerVMDriverTestCase(test.TestCase): self.assertEqual(state, power_state.RUNNING) def test_spawn_create_lpar_fail(self): - # Verify on a failed spawn, we get the original exception raised. - # helper function - def raise_(ex): - raise ex - self.flags(powervm_img_local_path='/images/') self.stubs.Set(images, 'fetch', lambda *x, **y: None) self.stubs.Set( @@ -237,11 +243,6 @@ class PowerVMDriverTestCase(test.TestCase): {'id': 'ANY_ID'}, [], 's3cr3t', fake_net_info) def test_spawn_cleanup_on_fail(self): - # Verify on a failed spawn, we get the original exception raised. - # helper function - def raise_(ex): - raise ex - self.flags(powervm_img_local_path='/images/') self.stubs.Set(images, 'fetch', lambda *x, **y: None) self.stubs.Set( @@ -617,3 +618,50 @@ class PowerVMDriverLparTestCase(test.TestCase): self.mox.ReplayAll() fake_op._operator.set_lpar_mac_base_value(inst_name, mac) + + +class PowerVMDriverCommonTestCase(test.TestCase): + """Unit tests for the nova.virt.powervm.common module.""" + + def setUp(self): + super(PowerVMDriverCommonTestCase, self).setUp() + # our fake connection information never changes since we can't + # actually connect to anything for these tests + self.connection = common.Connection('fake_host', 'user', 'password') + + def test_check_connection_ssh_is_none(self): + """ + Passes a null ssh object to the check_connection method. + The method should create a new ssh connection using the + Connection object and return it. + """ + self.stubs.Set(common, 'ssh_connect', fake_ssh_connect) + ssh = common.check_connection(None, self.connection) + self.assertIsNotNone(ssh) + + def test_check_connection_transport_is_dead(self): + """ + Passes an ssh object to the check_connection method which + does not have a transport set. + The method should create a new ssh connection using the + Connection object and return it. + """ + self.stubs.Set(common, 'ssh_connect', fake_ssh_connect) + ssh1 = fake_ssh_connect(self.connection) + ssh2 = common.check_connection(ssh1, self.connection) + self.assertIsNotNone(ssh2) + self.assertNotEqual(ssh1, ssh2) + + def test_check_connection_raise_ssh_exception(self): + """ + Passes an ssh object to the check_connection method which + does not have a transport set. + The method should raise an SSHException. + """ + self.stubs.Set(common, 'ssh_connect', + lambda *x, **y: raise_(paramiko.SSHException( + 'Error connecting to host.'))) + ssh = fake_ssh_connect(self.connection) + self.assertRaises(paramiko.SSHException, + common.check_connection, + ssh, self.connection) diff --git a/nova/virt/powervm/blockdev.py b/nova/virt/powervm/blockdev.py index 657e96f06..33b708cd9 100644 --- a/nova/virt/powervm/blockdev.py +++ b/nova/virt/powervm/blockdev.py @@ -1,6 +1,6 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 -# Copyright 2012 IBM Corp. +# Copyright 2013 IBM Corp. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -125,8 +125,10 @@ class PowerVMLocalVolumeAdapter(PowerVMDiskAdapter): self.connection_data = connection def _set_connection(self): - if self._connection is None: - self._connection = common.ssh_connect(self.connection_data) + # create a new connection or verify an existing connection + # and re-establish if the existing connection is dead + self._connection = common.check_connection(self._connection, + self.connection_data) def create_volume(self, size): """Creates a logical volume with a minimum size diff --git a/nova/virt/powervm/common.py b/nova/virt/powervm/common.py index dd6517dcd..0b1dc6a14 100644 --- a/nova/virt/powervm/common.py +++ b/nova/virt/powervm/common.py @@ -1,6 +1,6 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 -# Copyright 2012 IBM Corp. +# Copyright 2013 IBM Corp. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -56,12 +56,40 @@ def ssh_connect(connection): port=connection.port, key_filename=connection.keyfile, timeout=constants.POWERVM_CONNECTION_TIMEOUT) + + LOG.debug("SSH connection with %s established successfully." % + connection.host) + + # send TCP keepalive packets every 20 seconds + ssh.get_transport().set_keepalive(20) + return ssh except Exception: LOG.exception(_('Connection error connecting PowerVM manager')) raise exception.PowerVMConnectionFailed() +def check_connection(ssh, connection): + """ + Checks the SSH connection to see if the transport is valid. + If the connection is dead, a new connection is created and returned. + + :param ssh: an existing paramiko.SSHClient connection. + :param connection: a Connection object. + :returns: paramiko.SSHClient -- an active ssh connection. + :raises: PowerVMConnectionFailed -- if the ssh connection fails. + """ + # if the ssh client is not set or the transport is dead, re-connect + if (ssh is None or + ssh.get_transport() is None or + not ssh.get_transport().is_active()): + LOG.debug("Connection to host %s will be established." % + connection.host) + ssh = ssh_connect(connection) + + return ssh + + def ssh_command_as_root(ssh_connection, cmd, check_exit_code=True): """Method to execute remote command as root. diff --git a/nova/virt/powervm/operator.py b/nova/virt/powervm/operator.py index b5ead9064..2539f29b1 100644 --- a/nova/virt/powervm/operator.py +++ b/nova/virt/powervm/operator.py @@ -1,6 +1,6 @@ # vim: tabstop=4 shiftwidth=4 softtabstop=4 -# Copyright 2012 IBM Corp. +# Copyright 2013 IBM Corp. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -465,8 +465,10 @@ class BaseOperator(object): self.connection_data = connection def _set_connection(self): - if self._connection is None: - self._connection = common.ssh_connect(self.connection_data) + # create a new connection or verify an existing connection + # and re-establish if the existing connection is dead + self._connection = common.check_connection(self._connection, + self.connection_data) def get_lpar(self, instance_name, resource_type='lpar'): """Return a LPAR object by its instance name. -- cgit