Keep the ComputeNode model updated with usage

Keep the compute host's ComputeNode model in sync with the level of resource usage. This enables the ComputeNode model to be used as a basis for scheduling decisions rather than forcing scheduler to calculate free resources from an instance_get_all on each request. Resources like memory and disk are claimed as instances are built or deleted. There is also support for configurable compute node stats (a generic key/value store) for extensible advertising of other usage stats that may be useful for a particular scheduler implementation. Additionally, there is a periodic task on the compute host that audits actual resource consumption at the virt layer to ensure that the database stays in sync. This change partially implements blueprint: scheduler-resource-race This patch complements: https://review.openstack.org/#/c/9540/ (build re-scheduling support) Change-Id: Ibbe3839a054f8b80664b413d47f766ca8d68e3f2
author: Brian Elliott <brian.elliott@rackspace.com> 2012-06-21 04:25:24 +0000
committer: Brian Elliott <brian.elliott@rackspace.com> 2012-08-20 15:46:53 +0000
commit: c7d812a35bf4ef42907366c3f674fd623cd46905 (patch)
tree: cc0cb8ac745d1fae51dafe7c24f499d5b0b27ea8 /nova/db
parent: 740e93aae891d6c20f38b091ad9f54d71db0d7f7 (diff)
download: nova-c7d812a35bf4ef42907366c3f674fd623cd46905.tar.gz
nova-c7d812a35bf4ef42907366c3f674fd623cd46905.tar.xz
nova-c7d812a35bf4ef42907366c3f674fd623cd46905.zip
4 files changed, 136 insertions, 115 deletions
diff --git a/nova/db/api.py b/nova/db/api.py
index 94fdd8ce2..c36606a5c 100644
--- a/nova/db/api.py
+++ b/nova/db/api.py
@@ -208,31 +208,18 @@ def compute_node_create(context, values):
     return IMPL.compute_node_create(context, values)
 
 
-def compute_node_update(context, compute_id, values, auto_adjust=True):
+def compute_node_update(context, compute_id, values, prune_stats=False):
     """Set the given properties on a computeNode and update it.
 
     Raises NotFound if computeNode does not exist.
     """
-    return IMPL.compute_node_update(context, compute_id, values, auto_adjust)
+    return IMPL.compute_node_update(context, compute_id, values, prune_stats)
 
 
 def compute_node_get_by_host(context, host):
     return IMPL.compute_node_get_by_host(context, host)
 
 
-def compute_node_utilization_update(context, host, free_ram_mb_delta=0,
-                          free_disk_gb_delta=0, work_delta=0, vm_delta=0):
-    return IMPL.compute_node_utilization_update(context, host,
-                          free_ram_mb_delta, free_disk_gb_delta, work_delta,
-                          vm_delta)
-
-
-def compute_node_utilization_set(context, host, free_ram_mb=None,
-                                 free_disk_gb=None, work=None, vms=None):
-    return IMPL.compute_node_utilization_set(context, host, free_ram_mb,
-                                             free_disk_gb, work, vms)
-
-
 def compute_node_statistics(context):
     return IMPL.compute_node_statistics(context)
 
diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py
index d15b7b353..c85dfc723 100644
--- a/nova/db/sqlalchemy/api.py
+++ b/nova/db/sqlalchemy/api.py
@@ -485,6 +485,7 @@ def service_update(context, service_id, values):
 def compute_node_get(context, compute_id, session=None):
     result = model_query(context, models.ComputeNode, session=session).\
                      filter_by(id=compute_id).\
+                     options(joinedload('stats')).\
                      first()
 
     if not result:
@@ -497,6 +498,7 @@ def compute_node_get(context, compute_id, session=None):
 def compute_node_get_all(context, session=None):
     return model_query(context, models.ComputeNode, session=session).\
                     options(joinedload('service')).\
+                    options(joinedload('stats')).\
                     all()
 
 
@@ -509,42 +511,27 @@ def compute_node_search_by_hypervisor(context, hypervisor_match):
                     all()
 
 
-def _get_host_utilization(context, host, ram_mb, disk_gb):
-    """Compute the current utilization of a given host."""
-    instances = instance_get_all_by_host(context, host)
-    vms = len(instances)
-    free_ram_mb = ram_mb - FLAGS.reserved_host_memory_mb
-    free_disk_gb = disk_gb - (FLAGS.reserved_host_disk_mb * 1024)
-
-    work = 0
-    for instance in instances:
-        free_ram_mb -= instance.memory_mb
-        free_disk_gb -= instance.root_gb
-        free_disk_gb -= instance.ephemeral_gb
-        if instance.task_state is not None:
-            work += 1
-    return dict(free_ram_mb=free_ram_mb,
-                free_disk_gb=free_disk_gb,
-                current_workload=work,
-                running_vms=vms)
-
-
-def _adjust_compute_node_values_for_utilization(context, values, session):
-    service_ref = service_get(context, values['service_id'], session=session)
-    host = service_ref['host']
-    ram_mb = values['memory_mb']
-    disk_gb = values['local_gb']
-    values.update(_get_host_utilization(context, host, ram_mb, disk_gb))
+def _prep_stats_dict(values):
+    """Make list of ComputeNodeStats"""
+    stats = []
+    d = values.get('stats', {})
+    for k, v in d.iteritems():
+        stat = models.ComputeNodeStat()
+        stat['key'] = k
+        stat['value'] = v
+        stats.append(stat)
+    values['stats'] = stats
 
 
 @require_admin_context
 def compute_node_create(context, values, session=None):
     """Creates a new ComputeNode and populates the capacity fields
     with the most recent data."""
+    _prep_stats_dict(values)
+
     if not session:
         session = get_session()
 
-    _adjust_compute_node_values_for_utilization(context, values, session)
     with session.begin(subtransactions=True):
         compute_node_ref = models.ComputeNode()
         session.add(compute_node_ref)
@@ -552,17 +539,52 @@ def compute_node_create(context, values, session=None):
     return compute_node_ref
 
 
+def _update_stats(context, new_stats, compute_id, session, prune_stats=False):
+
+    existing = model_query(context, models.ComputeNodeStat, session=session,
+            read_deleted="no").filter_by(compute_node_id=compute_id).all()
+    statmap = {}
+    for stat in existing:
+        key = stat['key']
+        statmap[key] = stat
+
+    stats = []
+    for k, v in new_stats.iteritems():
+        old_stat = statmap.pop(k, None)
+        if old_stat:
+            # update existing value:
+            old_stat.update({'value': v})
+            stats.append(old_stat)
+        else:
+            # add new stat:
+            stat = models.ComputeNodeStat()
+            stat['compute_node_id'] = compute_id
+            stat['key'] = k
+            stat['value'] = v
+            stats.append(stat)
+
+    if prune_stats:
+        # prune un-touched old stats:
+        for stat in statmap.values():
+            session.add(stat)
+            stat.update({'deleted': True})
+
+    # add new and updated stats
+    for stat in stats:
+        session.add(stat)
+
+
 @require_admin_context
-def compute_node_update(context, compute_id, values, auto_adjust):
-    """Creates a new ComputeNode and populates the capacity fields
-    with the most recent data."""
+def compute_node_update(context, compute_id, values, prune_stats=False):
+    """Updates the ComputeNode record with the most recent data"""
+    stats = values.pop('stats', {})
+
     session = get_session()
-    if auto_adjust:
-        _adjust_compute_node_values_for_utilization(context, values, session)
     with session.begin(subtransactions=True):
+        _update_stats(context, stats, compute_id, session, prune_stats)
         compute_ref = compute_node_get(context, compute_id, session=session)
         compute_ref.update(values)
-        compute_ref.save(session=session)
+    return compute_ref
 
 
 def compute_node_get_by_host(context, host):
@@ -576,71 +598,6 @@ def compute_node_get_by_host(context, host):
         return node.first()
 
 
-def compute_node_utilization_update(context, host, free_ram_mb_delta=0,
-                          free_disk_gb_delta=0, work_delta=0, vm_delta=0):
-    """Update a specific ComputeNode entry by a series of deltas.
-    Do this as a single atomic action and lock the row for the
-    duration of the operation. Requires that ComputeNode record exist."""
-    session = get_session()
-    compute_node = None
-    with session.begin(subtransactions=True):
-        compute_node = session.query(models.ComputeNode).\
-                              options(joinedload('service')).\
-                              filter(models.Service.host == host).\
-                              filter_by(deleted=False).\
-                              with_lockmode('update').\
-                              first()
-        if compute_node is None:
-            raise exception.NotFound(_("No ComputeNode for %(host)s") %
-                                     locals())
-
-        # This table thingy is how we get atomic UPDATE x = x + 1
-        # semantics.
-        table = models.ComputeNode.__table__
-        if free_ram_mb_delta != 0:
-            compute_node.free_ram_mb = table.c.free_ram_mb + free_ram_mb_delta
-        if free_disk_gb_delta != 0:
-            compute_node.free_disk_gb = (table.c.free_disk_gb +
-                                         free_disk_gb_delta)
-        if work_delta != 0:
-            compute_node.current_workload = (table.c.current_workload +
-                                             work_delta)
-        if vm_delta != 0:
-            compute_node.running_vms = table.c.running_vms + vm_delta
-    return compute_node
-
-
-def compute_node_utilization_set(context, host, free_ram_mb=None,
-                                 free_disk_gb=None, work=None, vms=None):
-    """Like compute_node_utilization_update() modify a specific host
-    entry. But this function will set the metrics absolutely
-    (vs. a delta update).
-    """
-    session = get_session()
-    compute_node = None
-    with session.begin(subtransactions=True):
-        compute_node = session.query(models.ComputeNode).\
-                              options(joinedload('service')).\
-                              filter(models.Service.host == host).\
-                              filter_by(deleted=False).\
-                              with_lockmode('update').\
-                              first()
-        if compute_node is None:
-            raise exception.NotFound(_("No ComputeNode for %(host)s") %
-                                     locals())
-
-        if free_ram_mb is not None:
-            compute_node.free_ram_mb = free_ram_mb
-        if free_disk_gb is not None:
-            compute_node.free_disk_gb = free_disk_gb
-        if work is not None:
-            compute_node.current_workload = work
-        if vms is not None:
-            compute_node.running_vms = vms
-
-    return compute_node
-
-
 def compute_node_statistics(context):
     """Compute statistics over all compute nodes."""
     result = model_query(context,
diff --git a/nova/db/sqlalchemy/migrate_repo/versions/117_add_compute_node_stats.py b/nova/db/sqlalchemy/migrate_repo/versions/117_add_compute_node_stats.py
new file mode 100644
index 000000000..5b0e19660
--- /dev/null
+++ b/nova/db/sqlalchemy/migrate_repo/versions/117_add_compute_node_stats.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2012 OpenStack, LLC.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from sqlalchemy import Boolean, Column, DateTime, Integer
+from sqlalchemy import Index, MetaData, String, Table
+from nova.openstack.common import log as logging
+
+LOG = logging.getLogger(__name__)
+
+
+def upgrade(migrate_engine):
+    meta = MetaData()
+    meta.bind = migrate_engine
+
+    # load tables for fk
+    compute_nodes = Table('compute_nodes', meta, autoload=True)
+
+    # create new table
+    compute_node_stats = Table('compute_node_stats', meta,
+            Column('created_at', DateTime(timezone=False)),
+            Column('updated_at', DateTime(timezone=False)),
+            Column('deleted_at', DateTime(timezone=False)),
+            Column('deleted', Boolean(create_constraint=True, name=None)),
+            Column('id', Integer(), primary_key=True, nullable=False,
+                autoincrement=True),
+            Column('compute_node_id', Integer, index=True, nullable=False),
+            Column('key', String(length=255, convert_unicode=True,
+                assert_unicode=None, unicode_error=None,
+                _warn_on_bytestring=False), nullable=False),
+            Column('value', String(length=255, convert_unicode=True,
+                assert_unicode=None, unicode_error=None,
+                _warn_on_bytestring=False)),
+            mysql_engine='InnoDB')
+    try:
+        compute_node_stats.create()
+    except Exception:
+        LOG.exception("Exception while creating table 'compute_node_stats'")
+        raise
+
+
+def downgrade(migrate_engine):
+    meta = MetaData()
+    meta.bind = migrate_engine
+
+    # load tables for fk
+    compute_nodes = Table('compute_nodes', meta, autoload=True)
+
+    compute_node_stats = Table('compute_node_stats', meta, autoload=True)
+    compute_node_stats.drop()
diff --git a/nova/db/sqlalchemy/models.py b/nova/db/sqlalchemy/models.py
index 7d4435a7a..a37b1c215 100644
--- a/nova/db/sqlalchemy/models.py
+++ b/nova/db/sqlalchemy/models.py
@@ -144,8 +144,6 @@ class ComputeNode(BASE, NovaBase):
     # Free Ram, amount of activity (resize, migration, boot, etc) and
     # the number of running VM's are a good starting point for what's
     # important when making scheduling decisions.
-    #
-    # NOTE(sandy): We'll need to make this extensible for other schedulers.
     free_ram_mb = Column(Integer)
     free_disk_gb = Column(Integer)
     current_workload = Column(Integer)
@@ -165,6 +163,24 @@ class ComputeNode(BASE, NovaBase):
     disk_available_least = Column(Integer)
 
 
+class ComputeNodeStat(BASE, NovaBase):
+    """Stats related to the current workload of a compute host that are
+    intended to aid in making scheduler decisions."""
+    __tablename__ = 'compute_node_stats'
+    id = Column(Integer, primary_key=True)
+    key = Column(String(511))
+    value = Column(String(255))
+    compute_node_id = Column(Integer, ForeignKey('compute_nodes.id'))
+
+    primary_join = ('and_(ComputeNodeStat.compute_node_id == '
+                    'ComputeNode.id, ComputeNodeStat.deleted == False)')
+    stats = relationship("ComputeNode", backref="stats",
+            primaryjoin=primary_join)
+
+    def __str__(self):
+        return "{%d: %s = %s}" % (self.compute_node_id, self.key, self.value)
+
+
 class Certificate(BASE, NovaBase):
     """Represents a x509 certificate"""
     __tablename__ = 'certificates'
author	Brian Elliott <brian.elliott@rackspace.com>	2012-06-21 04:25:24 +0000
committer	Brian Elliott <brian.elliott@rackspace.com>	2012-08-20 15:46:53 +0000
commit	c7d812a35bf4ef42907366c3f674fd623cd46905 (patch)
tree	cc0cb8ac745d1fae51dafe7c24f499d5b0b27ea8 /nova/db
parent	740e93aae891d6c20f38b091ad9f54d71db0d7f7 (diff)
download	nova-c7d812a35bf4ef42907366c3f674fd623cd46905.tar.gz nova-c7d812a35bf4ef42907366c3f674fd623cd46905.tar.xz nova-c7d812a35bf4ef42907366c3f674fd623cd46905.zip