From 8887f10c66bca248f289db8f834ae8f36f9a03a1 Mon Sep 17 00:00:00 2001
From: Monsyne Dragon <mdragon@rackspace.com>
Date: Mon, 24 Sep 2012 19:38:09 +0000
Subject: Collect more accurate bandwidth data for XenServer

This changes the method used to poll xenserver for bandwidth data.
The reccomended way of collecting such data from xenserver (namely the
RRD files provided by the hosts) do not seem to be reliable, they
will sometimes be correct, often will be signifigantly under (> 10%),
and occasionally will show artifacts, such as phantom 4gb bandwidth
'spikes'.

This patch changes that to use the much simpler method of simply polling the
byte counters on the VIF network devices on the host. (We have old non-nova
code that does that on xenserver, and that method is known to work).

This should also make it much easier for other hypervisors other than
xenserver to implement bandwidth polling, as polling the counters is a rather
more universal method.

Fixes bug 1055737

Change-Id: I6a280d8bbfcc74914f888b11bc09349a270a5f58
---
 nova/virt/driver.py          |  4 ++--
 nova/virt/fake.py            |  8 ++++----
 nova/virt/xenapi/driver.py   | 28 ++++++++++------------------
 nova/virt/xenapi/vm_utils.py |  9 ++++++++-
 nova/virt/xenapi/vmops.py    | 41 +++++++++++++++++++----------------------
 5 files changed, 43 insertions(+), 47 deletions(-)

(limited to 'nova/virt')

diff --git a/nova/virt/driver.py b/nova/virt/driver.py
index 41df132fc..d741524b0 100644
--- a/nova/virt/driver.py
+++ b/nova/virt/driver.py
@@ -221,8 +221,8 @@ class ComputeDriver(object):
         # TODO(Vek): Need to pass context in for access to auth_token
         raise NotImplementedError()
 
-    def get_all_bw_usage(self, instances, start_time, stop_time=None):
-        """Return bandwidth usage info for each interface on each
+    def get_all_bw_counters(self, instances):
+        """Return bandwidth usage counters for each interface on each
            running VM"""
         raise NotImplementedError()
 
diff --git a/nova/virt/fake.py b/nova/virt/fake.py
index a6476f9d9..959ab174c 100644
--- a/nova/virt/fake.py
+++ b/nova/virt/fake.py
@@ -186,11 +186,11 @@ class FakeDriver(driver.ComputeDriver):
     def get_diagnostics(self, instance_name):
         return 'FAKE_DIAGNOSTICS'
 
-    def get_all_bw_usage(self, instances, start_time, stop_time=None):
-        """Return bandwidth usage info for each interface on each
+    def get_all_bw_counters(self, instances):
+        """Return bandwidth usage counters for each interface on each
            running VM"""
-        bwusage = []
-        return bwusage
+        bw = []
+        return bw
 
     def block_stats(self, instance_name, disk_id):
         return [0L, 0L, 0L, 0L, None]
diff --git a/nova/virt/xenapi/driver.py b/nova/virt/xenapi/driver.py
index 3425c64f8..ad2d64a38 100644
--- a/nova/virt/xenapi/driver.py
+++ b/nova/virt/xenapi/driver.py
@@ -298,35 +298,27 @@ class XenAPIDriver(driver.ComputeDriver):
         """Return data about VM diagnostics"""
         return self._vmops.get_diagnostics(instance)
 
-    def get_all_bw_usage(self, instances, start_time, stop_time=None):
-        """Return bandwidth usage info for each interface on each
+    def get_all_bw_counters(self, instances):
+        """Return bandwidth usage counters for each interface on each
            running VM"""
 
         # we only care about VMs that correspond to a nova-managed
         # instance:
         imap = dict([(inst.name, inst.uuid) for inst in instances])
-
-        bwusage = []
-        start_time = time.mktime(start_time.timetuple())
-        if stop_time:
-            stop_time = time.mktime(stop_time.timetuple())
+        bwcounters = []
 
         # get a dictionary of instance names.  values are dictionaries
-        # of mac addresses with values that are the bw stats:
+        # of mac addresses with values that are the bw counters:
         # e.g. {'instance-001' : { 12:34:56:78:90:12 : {'bw_in': 0, ....}}
-        iusages = self._vmops.get_all_bw_usage(start_time, stop_time)
-        for instance_name in iusages:
+        all_counters = self._vmops.get_all_bw_counters()
+        for instance_name, counters in all_counters.iteritems():
             if instance_name in imap:
                 # yes these are stats for a nova-managed vm
                 # correlate the stats with the nova instance uuid:
-                iusage = iusages[instance_name]
-
-                for macaddr, usage in iusage.iteritems():
-                    bwusage.append(dict(mac_address=macaddr,
-                                        uuid=imap[instance_name],
-                                        bw_in=usage['bw_in'],
-                                        bw_out=usage['bw_out']))
-        return bwusage
+                for vif_counter in counters.values():
+                    vif_counter['uuid'] = imap[instance_name]
+                    bwcounters.append(vif_counter)
+        return bwcounters
 
     def get_console_output(self, instance):
         """Return snapshot of console"""
diff --git a/nova/virt/xenapi/vm_utils.py b/nova/virt/xenapi/vm_utils.py
index a9adb4575..2dc358f0f 100644
--- a/nova/virt/xenapi/vm_utils.py
+++ b/nova/virt/xenapi/vm_utils.py
@@ -1351,9 +1351,16 @@ def compile_diagnostics(record):
         return {"Unable to retrieve diagnostics": e}
 
 
+def fetch_bandwidth(session):
+    bw = session.call_plugin_serialized('bandwidth', 'fetch_all_bandwidth')
+    return bw
+
+
 def compile_metrics(start_time, stop_time=None):
     """Compile bandwidth usage, cpu, and disk metrics for all VMs on
-       this host"""
+       this host.
+       Note that some stats, like bandwith, do not seem to be very
+       accurate in some of the data from XenServer (mdragon). """
     start_time = int(start_time)
 
     xml = _get_rrd_updates(_get_rrd_server(), start_time)
diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py
index 52cb9b17b..ad6f0d38c 100644
--- a/nova/virt/xenapi/vmops.py
+++ b/nova/virt/xenapi/vmops.py
@@ -1208,34 +1208,31 @@ class VMOps(object):
         vm_rec = self._session.call_xenapi("VM.get_record", vm_ref)
         return vm_utils.compile_diagnostics(vm_rec)
 
-    def get_all_bw_usage(self, start_time, stop_time=None):
-        """Return bandwidth usage info for each interface on each
+    def _get_vif_device_map(self, vm_rec):
+        vif_map = {}
+        for vif in [self._session.call_xenapi("VIF.get_record", vrec)
+                    for vrec in vm_rec['VIFs']]:
+            vif_map[vif['device']] = vif['MAC']
+        return vif_map
+
+    def get_all_bw_counters(self):
+        """Return running bandwidth counter for each interface on each
            running VM"""
-        try:
-            metrics = vm_utils.compile_metrics(start_time, stop_time)
-        except exception.CouldNotFetchMetrics:
-            LOG.exception(_("Could not get bandwidth info."))
-            return {}
+        counters = vm_utils.fetch_bandwidth(self._session)
         bw = {}
-        for uuid, data in metrics.iteritems():
-            vm_ref = self._session.call_xenapi("VM.get_by_uuid", uuid)
-            vm_rec = self._session.call_xenapi("VM.get_record", vm_ref)
-            vif_map = {}
-            for vif in [self._session.call_xenapi("VIF.get_record", vrec)
-                        for vrec in vm_rec['VIFs']]:
-                vif_map[vif['device']] = vif['MAC']
+        for vm_ref, vm_rec in vm_utils.list_vms(self._session):
+            vif_map = self._get_vif_device_map(vm_rec)
             name = vm_rec['name_label']
             if 'nova_uuid' not in vm_rec['other_config']:
                 continue
+            dom = vm_rec.get('domid')
+            if dom is None or dom not in counters:
+                continue
             vifs_bw = bw.setdefault(name, {})
-            for key, val in data.iteritems():
-                if key.startswith('vif_'):
-                    vname = key.split('_')[1]
-                    vif_bw = vifs_bw.setdefault(vif_map[vname], {})
-                    if key.endswith('tx'):
-                        vif_bw['bw_out'] = int(val)
-                    if key.endswith('rx'):
-                        vif_bw['bw_in'] = int(val)
+            for vif_num, vif_data in counters[dom].iteritems():
+                mac = vif_map[vif_num]
+                vif_data['mac_address'] = mac
+                vifs_bw[mac] = vif_data
         return bw
 
     def get_console_output(self, instance):
-- 
cgit