diff options
| author | Sandy Walsh <sandy.walsh@rackspace.com> | 2011-03-17 18:54:16 -0700 |
|---|---|---|
| committer | Sandy Walsh <sandy.walsh@rackspace.com> | 2011-03-17 18:54:16 -0700 |
| commit | 23efe8d14973a7c94de167562340938ba00d043b (patch) | |
| tree | 4e383662f4d11763684901e454025ec9c9297543 /nova/scheduler | |
| parent | 609a912fa8a816c1f47140489dcc1131356cd67c (diff) | |
| parent | abc6c82449dfc46a33dcd8190840e51f44b5b930 (diff) | |
| download | nova-23efe8d14973a7c94de167562340938ba00d043b.tar.gz nova-23efe8d14973a7c94de167562340938ba00d043b.tar.xz nova-23efe8d14973a7c94de167562340938ba00d043b.zip | |
refactored out middleware, now it's a decorator on service.api
Diffstat (limited to 'nova/scheduler')
| -rw-r--r-- | nova/scheduler/api.py | 79 | ||||
| -rw-r--r-- | nova/scheduler/driver.py | 237 | ||||
| -rw-r--r-- | nova/scheduler/manager.py | 52 |
3 files changed, 348 insertions, 20 deletions
diff --git a/nova/scheduler/api.py b/nova/scheduler/api.py index f0b645c09..8c9fd2298 100644 --- a/nova/scheduler/api.py +++ b/nova/scheduler/api.py @@ -86,27 +86,66 @@ def _wrap_method(function, self): return _wrap -def _process(self, zone): +def _process(func, zone): """Worker stub for green thread pool""" nova = client.OpenStackClient(zone.username, zone.password, zone.api_url) nova.authenticate() - return self.process(nova, zone) - - -class ChildZoneHelper(object): - """Delegate a call to a set of Child Zones and wait for their - responses. Could be used for Zone Redirect or by the Scheduler - plug-ins to query the children.""" - - def start(self, zone_list): - """Spawn a green thread for each child zone, calling the - derived classes process() method as the worker. Returns - a list of HTTP Responses. 1 per child.""" - self.green_pool = greenpool.GreenPool() - return [result for result in self.green_pool.imap( - _wrap_method(_process, self), zone_list)] - - def process(self, client, zone): - """Worker Method. Derived class must override.""" - pass + return func(nova, zone) + + +def child_zone_helper(zone_list, func): + green_pool = greenpool.GreenPool() + return [result for result in green_pool.imap( + _wrap_method(_process, func), zone_list)] + + +def _issue_novaclient_command(nova, zone, method_name, instance_id): + server = None + try: + if isinstance(instance_id, int) or instance_id.isdigit(): + server = manager.get(int(instance_id)) + else: + server = manager.find(name=instance_id) + except novaclient.NotFound: + url = zone.api_url + LOG.debug(_("Instance %(instance_id)s not found on '%(url)s'" % + locals())) + return + + return getattr(server, method_name)() + + +def wrap_novaclient_function(f, method_name, instance_id): + def inner(nova, zone): + return f(nova, zone, method_name, instance_id) + + return inner + + +class reroute_if_not_found(object): + """Decorator used to indicate that the method should + delegate the call the child zones if the db query + can't find anything. + """ + def __init__(self, method_name): + self.method_name = method_name + + def __call__(self, f): + def wrapped_f(*args, **kwargs): + LOG.debug("***REROUTE-3: %s / %s" % (args, kwargs)) + context = args[1] + instance_id = args[2] + try: + return f(*args, **kwargs) + except exception.InstanceNotFound, e: + LOG.debug(_("Instance %(instance_id)s not found " + "locally: '%(e)s'" % locals())) + + zones = db.zone_get_all(context) + result = child_zone_helper(zones, + wrap_novaclient_function(_issue_novaclient_command, + self.method_name, instance_id)) + LOG.debug("***REROUTE: %s" % result) + return result + return wrapped_f diff --git a/nova/scheduler/driver.py b/nova/scheduler/driver.py index 317a039cc..ce05d9f6a 100644 --- a/nova/scheduler/driver.py +++ b/nova/scheduler/driver.py @@ -26,10 +26,14 @@ import datetime from nova import db from nova import exception from nova import flags +from nova import log as logging +from nova import rpc +from nova.compute import power_state FLAGS = flags.FLAGS flags.DEFINE_integer('service_down_time', 60, 'maximum time since last checkin for up service') +flags.DECLARE('instances_path', 'nova.compute.manager') class NoValidHost(exception.Error): @@ -71,3 +75,236 @@ class Scheduler(object): def schedule(self, context, topic, *_args, **_kwargs): """Must override at least this method for scheduler to work.""" raise NotImplementedError(_("Must implement a fallback schedule")) + + def schedule_live_migration(self, context, instance_id, dest): + """Live migration scheduling method. + + :param context: + :param instance_id: + :param dest: destination host + :return: + The host where instance is running currently. + Then scheduler send request that host. + + """ + + # Whether instance exists and is running. + instance_ref = db.instance_get(context, instance_id) + + # Checking instance. + self._live_migration_src_check(context, instance_ref) + + # Checking destination host. + self._live_migration_dest_check(context, instance_ref, dest) + + # Common checking. + self._live_migration_common_check(context, instance_ref, dest) + + # Changing instance_state. + db.instance_set_state(context, + instance_id, + power_state.PAUSED, + 'migrating') + + # Changing volume state + for volume_ref in instance_ref['volumes']: + db.volume_update(context, + volume_ref['id'], + {'status': 'migrating'}) + + # Return value is necessary to send request to src + # Check _schedule() in detail. + src = instance_ref['host'] + return src + + def _live_migration_src_check(self, context, instance_ref): + """Live migration check routine (for src host). + + :param context: security context + :param instance_ref: nova.db.sqlalchemy.models.Instance object + + """ + + # Checking instance is running. + if (power_state.RUNNING != instance_ref['state'] or \ + 'running' != instance_ref['state_description']): + ec2_id = instance_ref['hostname'] + raise exception.Invalid(_('Instance(%s) is not running') % ec2_id) + + # Checing volume node is running when any volumes are mounted + # to the instance. + if len(instance_ref['volumes']) != 0: + services = db.service_get_all_by_topic(context, 'volume') + if len(services) < 1 or not self.service_is_up(services[0]): + raise exception.Invalid(_("volume node is not alive" + "(time synchronize problem?)")) + + # Checking src host exists and compute node + src = instance_ref['host'] + services = db.service_get_all_compute_by_host(context, src) + + # Checking src host is alive. + if not self.service_is_up(services[0]): + raise exception.Invalid(_("%s is not alive(time " + "synchronize problem?)") % src) + + def _live_migration_dest_check(self, context, instance_ref, dest): + """Live migration check routine (for destination host). + + :param context: security context + :param instance_ref: nova.db.sqlalchemy.models.Instance object + :param dest: destination host + + """ + + # Checking dest exists and compute node. + dservice_refs = db.service_get_all_compute_by_host(context, dest) + dservice_ref = dservice_refs[0] + + # Checking dest host is alive. + if not self.service_is_up(dservice_ref): + raise exception.Invalid(_("%s is not alive(time " + "synchronize problem?)") % dest) + + # Checking whether The host where instance is running + # and dest is not same. + src = instance_ref['host'] + if dest == src: + ec2_id = instance_ref['hostname'] + raise exception.Invalid(_("%(dest)s is where %(ec2_id)s is " + "running now. choose other host.") + % locals()) + + # Checking dst host still has enough capacities. + self.assert_compute_node_has_enough_resources(context, + instance_ref, + dest) + + def _live_migration_common_check(self, context, instance_ref, dest): + """Live migration common check routine. + + Below checkings are followed by + http://wiki.libvirt.org/page/TodoPreMigrationChecks + + :param context: security context + :param instance_ref: nova.db.sqlalchemy.models.Instance object + :param dest: destination host + + """ + + # Checking shared storage connectivity + self.mounted_on_same_shared_storage(context, instance_ref, dest) + + # Checking dest exists. + dservice_refs = db.service_get_all_compute_by_host(context, dest) + dservice_ref = dservice_refs[0]['compute_node'][0] + + # Checking original host( where instance was launched at) exists. + try: + oservice_refs = db.service_get_all_compute_by_host(context, + instance_ref['launched_on']) + except exception.NotFound: + raise exception.Invalid(_("host %s where instance was launched " + "does not exist.") + % instance_ref['launched_on']) + oservice_ref = oservice_refs[0]['compute_node'][0] + + # Checking hypervisor is same. + orig_hypervisor = oservice_ref['hypervisor_type'] + dest_hypervisor = dservice_ref['hypervisor_type'] + if orig_hypervisor != dest_hypervisor: + raise exception.Invalid(_("Different hypervisor type" + "(%(orig_hypervisor)s->" + "%(dest_hypervisor)s)')" % locals())) + + # Checkng hypervisor version. + orig_hypervisor = oservice_ref['hypervisor_version'] + dest_hypervisor = dservice_ref['hypervisor_version'] + if orig_hypervisor > dest_hypervisor: + raise exception.Invalid(_("Older hypervisor version" + "(%(orig_hypervisor)s->" + "%(dest_hypervisor)s)") % locals()) + + # Checking cpuinfo. + try: + rpc.call(context, + db.queue_get_for(context, FLAGS.compute_topic, dest), + {"method": 'compare_cpu', + "args": {'cpu_info': oservice_ref['cpu_info']}}) + + except rpc.RemoteError: + src = instance_ref['host'] + logging.exception(_("host %(dest)s is not compatible with " + "original host %(src)s.") % locals()) + raise + + def assert_compute_node_has_enough_resources(self, context, + instance_ref, dest): + """Checks if destination host has enough resource for live migration. + + Currently, only memory checking has been done. + If storage migration(block migration, meaning live-migration + without any shared storage) will be available, local storage + checking is also necessary. + + :param context: security context + :param instance_ref: nova.db.sqlalchemy.models.Instance object + :param dest: destination host + + """ + + # Getting instance information + ec2_id = instance_ref['hostname'] + + # Getting host information + service_refs = db.service_get_all_compute_by_host(context, dest) + compute_node_ref = service_refs[0]['compute_node'][0] + + mem_total = int(compute_node_ref['memory_mb']) + mem_used = int(compute_node_ref['memory_mb_used']) + mem_avail = mem_total - mem_used + mem_inst = instance_ref['memory_mb'] + if mem_avail <= mem_inst: + raise exception.NotEmpty(_("Unable to migrate %(ec2_id)s " + "to destination: %(dest)s " + "(host:%(mem_avail)s " + "<= instance:%(mem_inst)s)") + % locals()) + + def mounted_on_same_shared_storage(self, context, instance_ref, dest): + """Check if the src and dest host mount same shared storage. + + At first, dest host creates temp file, and src host can see + it if they mounts same shared storage. Then src host erase it. + + :param context: security context + :param instance_ref: nova.db.sqlalchemy.models.Instance object + :param dest: destination host + + """ + + src = instance_ref['host'] + dst_t = db.queue_get_for(context, FLAGS.compute_topic, dest) + src_t = db.queue_get_for(context, FLAGS.compute_topic, src) + + try: + # create tmpfile at dest host + filename = rpc.call(context, dst_t, + {"method": 'create_shared_storage_test_file'}) + + # make sure existence at src host. + rpc.call(context, src_t, + {"method": 'check_shared_storage_test_file', + "args": {'filename': filename}}) + + except rpc.RemoteError: + ipath = FLAGS.instances_path + logging.error(_("Cannot confirm tmpfile at %(ipath)s is on " + "same shared storage between %(src)s " + "and %(dest)s.") % locals()) + raise + + finally: + rpc.call(context, dst_t, + {"method": 'cleanup_shared_storage_test_file', + "args": {'filename': filename}}) diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py index d3d338943..7d62cfc4e 100644 --- a/nova/scheduler/manager.py +++ b/nova/scheduler/manager.py @@ -89,3 +89,55 @@ class SchedulerManager(manager.Manager): {"method": method, "args": kwargs}) LOG.debug(_("Casting to %(topic)s %(host)s for %(method)s") % locals()) + + # NOTE (masumotok) : This method should be moved to nova.api.ec2.admin. + # Based on bexar design summit discussion, + # just put this here for bexar release. + def show_host_resources(self, context, host, *args): + """Shows the physical/usage resource given by hosts. + + :param context: security context + :param host: hostname + :returns: + example format is below. + {'resource':D, 'usage':{proj_id1:D, proj_id2:D}} + D: {'vcpus':3, 'memory_mb':2048, 'local_gb':2048} + + """ + + compute_ref = db.service_get_all_compute_by_host(context, host) + compute_ref = compute_ref[0] + + # Getting physical resource information + compute_node_ref = compute_ref['compute_node'][0] + resource = {'vcpus': compute_node_ref['vcpus'], + 'memory_mb': compute_node_ref['memory_mb'], + 'local_gb': compute_node_ref['local_gb'], + 'vcpus_used': compute_node_ref['vcpus_used'], + 'memory_mb_used': compute_node_ref['memory_mb_used'], + 'local_gb_used': compute_node_ref['local_gb_used']} + + # Getting usage resource information + usage = {} + instance_refs = db.instance_get_all_by_host(context, + compute_ref['host']) + if not instance_refs: + return {'resource': resource, 'usage': usage} + + project_ids = [i['project_id'] for i in instance_refs] + project_ids = list(set(project_ids)) + for project_id in project_ids: + vcpus = db.instance_get_vcpu_sum_by_host_and_project(context, + host, + project_id) + mem = db.instance_get_memory_sum_by_host_and_project(context, + host, + project_id) + hdd = db.instance_get_disk_sum_by_host_and_project(context, + host, + project_id) + usage[project_id] = {'vcpus': int(vcpus), + 'memory_mb': int(mem), + 'local_gb': int(hdd)} + + return {'resource': resource, 'usage': usage} |
