diff options
author | John Garbutt <john.garbutt@rackspace.com> | 2013-06-07 18:11:09 +0100 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2013-06-19 10:18:19 +0000 |
commit | 834fc60a8ca852be64aaaaeb5ebb3cc0de807fef (patch) | |
tree | 554e3c4c0c94008f0bacb9d22f91d53184ea178f | |
parent | 6808e052dbe9bc51b5836814d0a0331dcc5a6bd2 (diff) | |
download | nova-834fc60a8ca852be64aaaaeb5ebb3cc0de807fef.tar.gz nova-834fc60a8ca852be64aaaaeb5ebb3cc0de807fef.tar.xz nova-834fc60a8ca852be64aaaaeb5ebb3cc0de807fef.zip |
xenapi: revisit error handling around calls to agent
Now we have settings to hint if the agent is present or not,
and by default we do not check for the agent,
if the agent is not responding to our calls for its version,
we can fail the build.
In environments that need the agent to inject the networking,
you really want the agent to fail if it is not present.
If the agent did not inject the networking, the server will
have no networking.
However, we can still leave the agent upgrade to silently
fail, as the agent should be backwards compatible.
fixes bug 1188540
Change-Id: I8acdabd8d2bd24b088dad3cd4abec300d0ada3fb
-rw-r--r-- | nova/exception.py | 13 | ||||
-rw-r--r-- | nova/tests/virt/xenapi/test_xenapi.py | 99 | ||||
-rw-r--r-- | nova/virt/xenapi/agent.py | 174 | ||||
-rw-r--r-- | nova/virt/xenapi/fake.py | 9 |
4 files changed, 185 insertions, 110 deletions
diff --git a/nova/exception.py b/nova/exception.py index c774b56cc..2b3903e1e 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -1241,6 +1241,19 @@ class CoreAPIMissing(NovaException): message = _("Core API extensions are missing: %(missing_apis)s") +class AgentError(NovaException): + message = _('Error during following call to agent: %(method)s') + + +class AgentTimeout(AgentError): + message = _('Unable to contact guest agent. ' + 'The following call timed out: %(method)s') + + +class AgentNotImplemented(AgentError): + message = _('Agent does not support the call: %(method)s') + + class InstanceGroupNotFound(NotFound): message = _("Instance group %(group_uuid)s could not be found.") diff --git a/nova/tests/virt/xenapi/test_xenapi.py b/nova/tests/virt/xenapi/test_xenapi.py index 91d4f0770..f6ff23aba 100644 --- a/nova/tests/virt/xenapi/test_xenapi.py +++ b/nova/tests/virt/xenapi/test_xenapi.py @@ -31,6 +31,7 @@ from nova.compute import power_state from nova.compute import task_states from nova.compute import vm_states from nova import context +from nova import crypto from nova import db from nova import exception from nova.openstack.common import importutils @@ -983,13 +984,14 @@ class XenAPIVMTestCase(stubs.XenAPITestBase): actual_injected_files.append((path, contents)) return jsonutils.dumps({'returncode': '0', 'message': 'success'}) - def noop(*args, **kwargs): - pass - self.stubs.Set(stubs.FakeSessionForVMTests, '_plugin_agent_inject_file', fake_inject_file) - self.stubs.Set(agent.XenAPIBasedAgent, - 'set_admin_password', noop) + + def fake_encrypt_text(sshkey, new_pass): + self.assertEqual("fake_keydata", sshkey) + return "fake" + + self.stubs.Set(crypto, 'ssh_encrypt_text', fake_encrypt_text) expected_data = ('\n# The following ssh key was injected by ' 'Nova\nfake_keydata\n') @@ -1020,6 +1022,93 @@ class XenAPIVMTestCase(stubs.XenAPITestBase): self.check_vm_params_for_linux() self.assertEquals(actual_injected_files, injected_files) + def test_spawn_agent_upgrade(self): + self.flags(xenapi_use_agent_default=True) + actual_injected_files = [] + + def fake_agent_build(_self, *args): + return {"version": "1.1.0", "architecture": "x86-64", + "hypervisor": "xen", "os": "windows", + "url": "url", "md5hash": "asdf"} + + self.stubs.Set(self.conn.virtapi, 'agent_build_get_by_triple', + fake_agent_build) + + self._test_spawn(IMAGE_VHD, None, None, + os_type="linux", architecture="x86-64") + + def test_spawn_agent_upgrade_fails_silently(self): + self.flags(xenapi_use_agent_default=True) + actual_injected_files = [] + + def fake_agent_build(_self, *args): + return {"version": "1.1.0", "architecture": "x86-64", + "hypervisor": "xen", "os": "windows", + "url": "url", "md5hash": "asdf"} + + self.stubs.Set(self.conn.virtapi, 'agent_build_get_by_triple', + fake_agent_build) + + def fake_agent_update(self, method, args): + raise xenapi_fake.Failure(["fake_error"]) + + self.stubs.Set(stubs.FakeSessionForVMTests, + '_plugin_agent_agentupdate', fake_agent_update) + + self._test_spawn(IMAGE_VHD, None, None, + os_type="linux", architecture="x86-64") + + def _test_spawn_fails_with(self, trigger, expected_exception): + self.flags(xenapi_use_agent_default=True) + self.flags(agent_version_timeout=0) + actual_injected_files = [] + + def fake_agent_version(self, method, args): + raise xenapi_fake.Failure([trigger]) + + self.stubs.Set(stubs.FakeSessionForVMTests, + '_plugin_agent_version', fake_agent_version) + + self.assertRaises(expected_exception, self._test_spawn, + IMAGE_VHD, None, None, os_type="linux", architecture="x86-64") + + def test_spawn_fails_with_agent_timeout(self): + self._test_spawn_fails_with("TIMEOUT:fake", exception.AgentTimeout) + + def test_spawn_fails_with_agent_not_implemented(self): + self._test_spawn_fails_with("NOT IMPLEMENTED:fake", + exception.AgentNotImplemented) + + def test_spawn_fails_with_agent_error(self): + self._test_spawn_fails_with("fake_error", exception.AgentError) + + def test_spawn_fails_with_agent_bad_return(self): + self.flags(xenapi_use_agent_default=True) + self.flags(agent_version_timeout=0) + actual_injected_files = [] + + def fake_agent_version(self, method, args): + return xenapi_fake.as_json(returncode='-1', message='fake') + self.stubs.Set(stubs.FakeSessionForVMTests, + '_plugin_agent_version', fake_agent_version) + + self.assertRaises(exception.AgentError, self._test_spawn, + IMAGE_VHD, None, None, os_type="linux", architecture="x86-64") + + def test_spawn_fails_agent_not_implemented(self): + # Test spawning with injected_files. + self.flags(xenapi_use_agent_default=True) + self.flags(agent_version_timeout=0) + actual_injected_files = [] + + def fake_agent_version(self, method, args): + raise xenapi_fake.Failure(["NOT IMPLEMENTED:fake"]) + self.stubs.Set(stubs.FakeSessionForVMTests, + '_plugin_agent_version', fake_agent_version) + + self.assertRaises(exception.AgentNotImplemented, self._test_spawn, + IMAGE_VHD, None, None, os_type="linux", architecture="x86-64") + def test_rescue(self): instance = self._create_instance() session = xenapi_conn.XenAPISession('test_url', 'root', 'test_pass', diff --git a/nova/virt/xenapi/agent.py b/nova/virt/xenapi/agent.py index c9e011856..05a0fae41 100644 --- a/nova/virt/xenapi/agent.py +++ b/nova/virt/xenapi/agent.py @@ -27,6 +27,7 @@ from nova.api.metadata import password from nova.compute import api as compute_api from nova import context from nova import crypto +from nova import exception from nova.openstack.common import jsonutils from nova.openstack.common import log as logging from nova.openstack.common import strutils @@ -77,7 +78,7 @@ CONF.register_opts(xenapi_agent_opts) def _call_agent(session, instance, vm_ref, method, addl_args=None, - timeout=None): + timeout=None, success_code='0'): """Abstracts out the interaction with the agent xenapi plugin.""" if addl_args is None: addl_args = {} @@ -101,43 +102,39 @@ def _call_agent(session, instance, vm_ref, method, addl_args=None, LOG.error(_('TIMEOUT: The call to %(method)s timed out. ' 'args=%(args)r'), {'method': method, 'args': args}, instance=instance) - return {'returncode': 'timeout', 'message': err_msg} + raise exception.AgentTimeout(method=method) elif 'NOT IMPLEMENTED:' in err_msg: - LOG.error(_('NOT IMPLEMENTED: The call to %(method)s is not' - ' supported by the agent. args=%(args)r'), + LOG.error(_('NOT IMPLEMENTED: The call to %(method)s is not ' + 'supported by the agent. args=%(args)r'), {'method': method, 'args': args}, instance=instance) - return {'returncode': 'notimplemented', 'message': err_msg} + raise exception.AgentNotImplemented(method=method) else: LOG.error(_('The call to %(method)s returned an error: %(e)s. ' 'args=%(args)r'), {'method': method, 'args': args, 'e': e}, instance=instance) - return {'returncode': 'error', 'message': err_msg} - return None + raise exception.AgentError(method=method) - if isinstance(ret, dict): - return ret - try: - return jsonutils.loads(ret) - except TypeError: - LOG.error(_('The agent call to %(method)s returned an invalid ' - 'response: %(ret)r. args=%(args)r'), + if not isinstance(ret, dict): + try: + ret = jsonutils.loads(ret) + except TypeError: + LOG.error(_('The agent call to %(method)s returned an invalid ' + 'response: %(ret)r. args=%(args)r'), + {'method': method, 'ret': ret, 'args': args}, + instance=instance) + raise exception.AgentError(method=method) + + if ret['returncode'] != success_code: + LOG.error(_('The agent call to %(method)s returned an ' + 'an error: %(ret)r. args=%(args)r'), {'method': method, 'ret': ret, 'args': args}, instance=instance) - return {'returncode': 'error', - 'message': 'unable to deserialize response'} - - -def _get_agent_version(session, instance, vm_ref): - resp = _call_agent(session, instance, vm_ref, 'version') - if resp['returncode'] != '0': - LOG.error(_('Failed to query agent version: %r'), - resp, instance=instance) - return None + raise exception.AgentError(method=method) # Some old versions of the Windows agent have a trailing \\r\\n # (ie CRLF escaped) for some reason. Strip that off. - return resp['message'].replace('\\r\\n', '') + return ret['message'].replace('\\r\\n', '') class XenAPIBasedAgent(object): @@ -147,6 +144,11 @@ class XenAPIBasedAgent(object): self.instance = instance self.vm_ref = vm_ref + def _call_agent(self, method, addl_args=None, timeout=None, + success_code='0'): + return _call_agent(self.session, self.instance, self.vm_ref, + method, addl_args, timeout, success_code) + def get_agent_version(self): """Get the version of the agent running on the VM instance.""" @@ -159,31 +161,47 @@ class XenAPIBasedAgent(object): # normal as well as watch for domid changes expiration = time.time() + CONF.agent_version_timeout - while time.time() < expiration: - ret = _get_agent_version(self.session, self.instance, self.vm_ref) - if ret: - return ret - - LOG.info(_('Reached maximum time attempting to query agent version'), - instance=self.instance) - - return None + while True: + try: + return self._call_agent('version') + except exception.AgentTimeout: + if time.time() > expiration: + raise def agent_update(self, agent_build): """Update agent on the VM instance.""" - LOG.info(_('Updating agent to %s'), agent_build['version'], - instance=self.instance) + LOG.debug(_('Updating agent to %s'), agent_build['version'], + instance=self.instance) # Send the encrypted password args = {'url': agent_build['url'], 'md5sum': agent_build['md5hash']} - resp = _call_agent( - self.session, self.instance, self.vm_ref, 'agentupdate', args) - if resp['returncode'] != '0': - LOG.error(_('Failed to update agent: %r'), resp, - instance=self.instance) - return None - return resp['message'] + try: + self._call_agent('agentupdate', args) + except exception.AgentError as exc: + # Silently fail for agent upgrades + LOG.warning(_("Unable to update the agent due " + "to: %(exc)s") % dict(exc=exc), + instance=self.instance) + + def _exchange_key_with_agent(self): + dh = SimpleDH() + args = {'pub': str(dh.get_public())} + resp = self._call_agent('key_init', args, success_code='D0') + agent_pub = int(resp) + dh.compute_shared(agent_pub) + return dh + + def _save_instance_password_if_sshkey_present(self, new_pass): + sshkey = self.instance.get('key_data') + if sshkey: + ctxt = context.get_admin_context() + enc = crypto.ssh_encrypt_text(sshkey, new_pass) + sys_meta = utils.metadata_to_dict(self.instance['system_metadata']) + sys_meta.update(password.convert_password(ctxt, + base64.b64encode(enc))) + self.virtapi.instance_update(ctxt, self.instance['uuid'], + {'system_metadata': sys_meta}) def set_admin_password(self, new_pass): """Set the root/admin password on the VM instance. @@ -196,59 +214,24 @@ class XenAPIBasedAgent(object): """ LOG.debug(_('Setting admin password'), instance=self.instance) - dh = SimpleDH() - - # Exchange keys - args = {'pub': str(dh.get_public())} - resp = _call_agent( - self.session, self.instance, self.vm_ref, 'key_init', args) - - # Successful return code from key_init is 'D0' - if resp['returncode'] != 'D0': - msg = _('Failed to exchange keys: %r') % resp - LOG.error(msg, instance=self.instance) - raise NotImplementedError(msg) - - # Some old versions of the Windows agent have a trailing \\r\\n - # (ie CRLF escaped) for some reason. Strip that off. - agent_pub = int(resp['message'].replace('\\r\\n', '')) - dh.compute_shared(agent_pub) - + dh = self._exchange_key_with_agent() # Some old versions of Linux and Windows agent expect trailing \n # on password to work correctly. enc_pass = dh.encrypt(new_pass + '\n') - # Send the encrypted password args = {'enc_pass': enc_pass} - resp = _call_agent( - self.session, self.instance, self.vm_ref, 'password', args) - - # Successful return code from password is '0' - if resp['returncode'] != '0': - msg = _('Failed to exchange keys: %r') % resp - LOG.error(msg, instance=self.instance) - raise NotImplementedError(msg) - - sshkey = self.instance.get('key_data') - if sshkey: - ctxt = context.get_admin_context() - enc = crypto.ssh_encrypt_text(sshkey, new_pass) - sys_meta = utils.metadata_to_dict(self.instance['system_metadata']) - sys_meta.update(password.convert_password(ctxt, - base64.b64encode(enc))) - self.virtapi.instance_update(ctxt, self.instance['uuid'], - {'system_metadata': sys_meta}) - - return resp['message'] + self._call_agent('password', args) + self._save_instance_password_if_sshkey_present(new_pass) def inject_ssh_key(self): sshkey = self.instance.get('key_data') if not sshkey: return if self.instance['os_type'] == 'windows': - LOG.warning(_("Skipping setting of ssh key for Windows."), - instance=self.instance) + LOG.debug(_("Skipping setting of ssh key for Windows."), + instance=self.instance) return + sshkey = str(sshkey) keyfile = '/root/.ssh/authorized_keys' key_data = ''.join([ @@ -268,30 +251,13 @@ class XenAPIBasedAgent(object): b64_contents = base64.b64encode(contents) args = {'b64_path': b64_path, 'b64_contents': b64_contents} - - # If the agent doesn't support file injection, a NotImplementedError - # will be raised with the appropriate message. - resp = _call_agent( - self.session, self.instance, self.vm_ref, 'inject_file', args) - if resp['returncode'] != '0': - LOG.error(_('Failed to inject file: %r'), resp, - instance=self.instance) - return None - - return resp['message'] + return self._call_agent('inject_file', args) def resetnetwork(self): LOG.debug(_('Resetting network'), instance=self.instance) - resp = _call_agent( - self.session, self.instance, self.vm_ref, 'resetnetwork', - timeout=CONF.agent_resetnetwork_timeout) - if resp['returncode'] != '0': - LOG.error(_('Failed to reset network: %r'), resp, - instance=self.instance) - return None - - return resp['message'] + return self._call_agent('resetnetwork', + timeout=CONF.agent_resetnetwork_timeout) def find_guest_agent(base_dir): diff --git a/nova/virt/xenapi/fake.py b/nova/virt/xenapi/fake.py index 2dd9765d1..f4eac3887 100644 --- a/nova/virt/xenapi/fake.py +++ b/nova/virt/xenapi/fake.py @@ -565,7 +565,7 @@ class SessionBase(object): return 12 * 1024 * 1024 * 1024 def _plugin_agent_version(self, method, args): - return as_json(returncode='0', message='1.0') + return as_json(returncode='0', message='1.0\\r\\n') def _plugin_agent_key_init(self, method, args): return as_json(returncode='D0', message='1') @@ -579,6 +579,13 @@ class SessionBase(object): def _plugin_agent_resetnetwork(self, method, args): return as_json(returncode='0', message='success') + def _plugin_agent_agentupdate(self, method, args): + url = args["url"] + md5 = args["md5sum"] + message = "success with %(url)s and hash:%(md5)s" % dict(url=url, + md5=md5) + return as_json(returncode='0', message=message) + def _plugin_noop(self, method, args): return '' |