#!/usr/bin/python -tt # # rteval - script for evaluating platform suitability for RT Linux # # This program is used to determine the suitability of # a system for use in a Real Time Linux environment. # It starts up various system loads and measures event # latency while the loads are running. A report is generated # to show the latencies encountered during the run. # # Copyright 2009,2010,2011,2012 Clark Williams # Copyright 2009,2010,2011,2012 David Sommerseth # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # For the avoidance of doubt the "preferred form" of this code is one which # is in an open unpatent encumbered format. Where cryptographic key signing # forms part of the process of creating an executable the information # including keys needed to generate an equivalently functional executable # are deemed to be part of the source code. # import sys import os import os.path import time import string import threading import subprocess import socket import optparse import tempfile import statvfs import shutil import signal import rtevalclient import ethtool import xmlrpclib import platform import fnmatch import glob from datetime import datetime from distutils import sysconfig # put local path at start of list to overide installed methods sys.path.insert(0, "./rteval") import util import load import cyclictest import xmlout import dmi import rtevalConfig import rtevalMailer from cputopology import CPUtopology pathSave={} def getcmdpath(which): """ getcmdpath is a method which allows finding an executable in the PATH directories to call it from full path """ if not pathSave.has_key(which): for path in os.environ['PATH'].split(':'): cmdfile = os.path.join(path, which) if os.path.isfile(cmdfile) and os.access(cmdfile, os.X_OK): pathSave[which] = cmdfile break if not pathSave[which]: raise RuntimeError, "Command '%s' is unknown on this system" % which return pathSave[which] sigint_received = False def sigint_handler(signum, frame): global sigint_received sigint_received = True print "*** SIGINT received - stopping rteval run ***" def sigterm_handler(signum, frame): raise RuntimeError, "SIGTERM received!" class RtEval(object): def __init__(self, cmdargs): self.version = "1.37" self.load_modules = [] self.workdir = os.getcwd() self.reportdir = os.getcwd() self.inifile = None self.cmd_options = {} self.start = datetime.now() self.init = 'unknown' default_config = { 'rteval': { 'verbose' : False, 'keepdata' : True, 'debugging' : False, 'duration' : '60', 'sysreport' : False, 'reportdir' : None, 'reportfile' : None, 'installdir' : '/usr/share/rteval', 'srcdir' : '/usr/share/rteval/loadsource', 'xmlrpc' : None, 'xslt_report': '/usr/share/rteval/rteval_text.xsl', 'report_interval': '600', 'logging' : False, }, 'loads' : { 'kcompile' : 'module', 'hackbench' : 'module', }, 'kcompile' : { 'source' : 'linux-2.6.39.tar.bz2', 'jobspercore': '2', }, 'hackbench' : { 'source' : 'hackbench.tar.bz2', 'jobspercore': '5', }, 'cyclictest' : { 'interval' : '100', 'buckets' : '2000', } } # setup initial configuration self.config = rtevalConfig.rtevalConfig(default_config, logfunc=self.info) # parse command line options self.parse_options(cmdargs) # read in config file info self.inifile = self.config.Load(self.cmd_options.inifile) # copy the command line options into the rteval config section # (cmd line overrides config file values) self.config.AppendConfig('rteval', self.cmd_options) if self.cmd_options.cyclictest_interval != None: self.config.AppendConfig('cyclictest', { "interval":self.cmd_options.cyclictest_interval }) if self.cmd_options.cyclictest_distance != None: self.config.AppendConfig('cyclictest', { "distance":self.cmd_options.cyclictest_distance }) if self.cmd_options.cyclictest_buckets != None: self.config.AppendConfig('cyclictest', { "buckets":self.cmd_options.cyclictest_distance }) if self.cmd_options.cyclictest_priority != None: self.config.AppendConfig('cyclictest', { "priority":self.cmd_options.cyclictest_priority }) if self.cmd_options.hackbench_jobspercore != None: self.config.AppendConfig('hackbench', { "jobspercore":self.cmd_options.hackbench_jobspercore }) if self.cmd_options.kcompile_jobspercore != None: self.config.AppendConfig('kcompile', { "jobspercore":self.cmd_options.kcompile_jobspercore }) self.debug("workdir: %s" % self.workdir) # prepare a mailer, if that's configured if self.config.HasSection('smtp'): self.mailer = rtevalMailer.rtevalMailer(self.config.GetSection('smtp')) else: self.mailer = None self.loads = [] self.cputopology = None self.numcores = None self.memsize = None self.current_clocksource = None self.available_clocksource = None self.services = None self.kthreads = None self.xml = None self.baseos = "unknown" self.annotate = self.cmd_options.annotate if not self.config.xslt_report.startswith(self.config.installdir): self.config.xslt_report = os.path.join(self.config.installdir, "rteval_text.xsl") if not os.path.exists(self.config.xslt_report): raise RuntimeError, "can't find XSL template (%s)!" % self.config.xslt_report # Add rteval directory into module search path sys.path.insert(0, '%s/rteval' % sysconfig.get_python_lib()) # generate a set of "junk" characters to use for filtering later self.junk = "" for c in range(0, 0xff): s = chr(c) if s not in string.printable: self.junk += s self.transtable = string.maketrans("", "") # If --xmlrpc-submit is given, check that we can access the server res = None if self.config.xmlrpc: self.debug("Checking if XML-RPC server '%s' is reachable" % self.config.xmlrpc) attempt = 0 warning_sent = False ping_failed = False while attempt < 6: try: client = rtevalclient.rtevalclient("http://%s/rteval/API1/" % self.config.xmlrpc) res = client.Hello() attempt = 10 ping_failed = False except xmlrpclib.ProtocolError: # Server do not support Hello(), but is reachable self.info("Got XML-RPC connection with %s but it did not support Hello()" % self.config.xmlrpc) res = None except socket.error, err: self.info("Could not establish XML-RPC contact with %s\n%s" % (self.config.xmlrpc, str(err))) if (self.mailer is not None) and (not warning_sent): self.mailer.SendMessage("[RTEVAL:WARNING] Failed to ping XML-RPC server", "Server %s did not respond. Not giving up yet." % self.config.xmlrpc) warning_sent = True # Do attempts handling attempt += 1 if attempt > 5: break # To avoid sleeping before we abort print "Failed pinging XML-RPC server. Doing another attempt(%i) " % attempt time.sleep(attempt*15) # Incremental sleep - sleep attempts*15 seconds ping_failed = True if ping_failed: if not self.cmd_options.xmlrpc_noabort: print "ERROR: Could not reach XML-RPC server '%s'. Aborting." % self.config.xmlrpc sys.exit(2) else: print "WARNING: Could not ping the XML-RPC server. Will continue anyway." if res: self.info("Verified XML-RPC connection with %s (XML-RPC API version: %i)" % (res["server"], res["APIversion"])) self.debug("Recieved greeting: %s" % res["greeting"]) def get_cpu_topology(self): ''' figure out how many processors we have available''' topology = CPUtopology() topology.parse() self.numcores = topology.getCPUcores(True) self.debug("counted %d cores (%d online) and %d sockets" % (topology.getCPUcores(False), self.numcores, topology.getCPUsockets())) return topology.getXMLdata() def __get_services_sysvinit(self): reject = ('functions', 'halt', 'killall', 'single', 'linuxconf', 'kudzu', 'skeleton', 'README', '*.dpkg-dist', '*.dpkg-old', 'rc', 'rcS', 'single', 'reboot', 'bootclean.sh') for sdir in ('/etc/init.d', '/etc/rc.d/init.d'): if os.path.isdir(sdir): servicesdir = sdir break if not servicesdir: raise RuntimeError, "No services dir (init.d) found on your system" self.debug("Services located in %s, going through each service file to check status" % servicesdir) ret_services = {} for service in glob.glob(os.path.join(servicesdir, '*')): servicename = os.path.basename(service) if not [1 for p in reject if fnmatch.fnmatch(servicename, p)] and os.access(service, os.X_OK): cmd = '%s -qs "\(^\|\W\)status)" %s' % (getcmdpath('grep'), service) c = subprocess.Popen(cmd, shell=True) c.wait() if c.returncode == 0: cmd = ['env', '-i', 'LANG="%s"' % os.environ['LANG'], 'PATH="%s"' % os.environ['PATH'], 'TERM="%s"' % os.environ['TERM'], service, 'status'] c = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) c.wait() if c.returncode == 0 and (c.stdout.read() or c.stderr.read()): ret_services[servicename] = 'running' else: ret_services[servicename] = 'not running' else: ret_services[servicename] = 'unknown' return ret_services def __get_services_systemd(self): ret_services = {} cmd = '%s list-unit-files -t service --no-legend' % getcmdpath('systemctl') self.debug("cmd: %s" % cmd) c = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) for p in c.stdout: # p are lines like "servicename.service status" v = p.strip().split() ret_services[v[0].split('.')[0]] = v[1] return ret_services def get_services(self): cmd = [getcmdpath('ps'), '-ocomm=', '1'] c = subprocess.Popen(cmd, stdout=subprocess.PIPE) self.init = c.stdout.read().strip() if self.init == 'systemd': self.debug("Using systemd to get services status") return self.__get_services_systemd() elif self.init == 'init': self.init = 'sysvinit' self.debug("Using sysvinit to get services status") return self.__get_services_sysvinit() else: raise RuntimeError, "Unknown init system (%s)" % self.init return {} def get_kthreads(self): policies = {'FF':'fifo', 'RR':'rrobin', 'TS':'other', '?':'unknown' } ret_kthreads = {} self.debug("getting kthread status") cmd = '%s -eocommand,pid,policy,rtprio,comm' % getcmdpath('ps') self.debug("cmd: %s" % cmd) c = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) for p in c.stdout: v = p.strip().split() kcmd = v.pop(0) try: if int(v[0]) > 0 and kcmd.startswith('[') and kcmd.endswith(']'): ret_kthreads[v[0]] = {'policy' : policies[v[1]], 'priority' : v[2], 'name' : v[3] } except ValueError: pass # Ignore lines which don't have a number in the first row return ret_kthreads def get_modules(self): modlist = [] try: fp = open('/proc/modules', 'r') line = fp.readline() while line: mod = line.split() modlist.append({"modname": mod[0], "modsize": mod[1], "numusers": mod[2], "usedby": mod[3], "modstate": mod[4]}) line = fp.readline() fp.close() except Exception, err: raise err return modlist def parse_options(self, cmdargs): '''parse the command line arguments''' parser = optparse.OptionParser() parser.add_option("-d", "--duration", dest="duration", type="string", default=self.config.duration, help="specify length of test run (default: %default)") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=self.config.verbose, help="turn on verbose prints (default: %default)") parser.add_option("-w", "--workdir", dest="workdir", type="string", default=self.workdir, help="top directory for rteval data (default: %default)") parser.add_option("-l", "--loaddir", dest="srcdir", type="string", default=self.config.srcdir, help="directory for load source tarballs (default: %default)") parser.add_option("-i", "--installdir", dest="installdir", type="string", default=self.config.installdir, help="place to locate installed templates (default: %default)") parser.add_option("-s", "--sysreport", dest="sysreport", action="store_true", default=self.config.sysreport, help='run sysreport to collect system data (default: %default)') parser.add_option("-D", '--debug', dest='debugging', action='store_true', default=self.config.debugging, help='turn on debug prints (default: %default)') parser.add_option("-X", '--xmlrpc-submit', dest='xmlrpc', action='store', default=self.config.xmlrpc, metavar='HOST', help='Hostname to XML-RPC server to submit reports') parser.add_option("-P", "--xmlrpc-no-abort", dest="xmlrpc_noabort", action='store_true', default=False, help="Do not abort if XML-RPC server do not respond to ping request"); parser.add_option("-Z", '--summarize', dest='summarize', action='store_true', default=False, help='summarize an already existing XML report') parser.add_option("-H", '--raw-histogram', dest='rawhistogram', action='store_true', default=False, help='Generate raw histogram data for an already existing XML report') parser.add_option("-f", "--inifile", dest="inifile", type='string', default=None, help="initialization file for configuring loads and behavior") parser.add_option("-a", "--annotate", dest="annotate", type="string", default=None, help="Add a little annotation which is stored in the report") parser.add_option("-L", "--logging", dest="logging", action='store_true', default=False, help='log the output of the loads in the report directory') parser.add_option("-O", "--onlyload", dest="onlyload", action='store_true', default=False, help="only run the loads (don't run measurement threads)") # module options parser.add_option("", "--cyclictest-interval", dest="cyclictest_interval", action="store", type="int", help="cyclictest measurement interval in microseconds") parser.add_option("", "--cyclictest-distance", dest="cyclictest_distance", action="store", type="int", help="cyclictest measurement interval increment in microseconds") parser.add_option("", "--cyclictest-buckets", dest="cyclictest_buckets", action="store", type="int", help="number of cyclictest 1 microsecond histogram buckets") parser.add_option("", "--cyclictest-priority", dest="cyclictest_priority", action="store", type="int", help="SCHED_FIFO priority of measurement threads") parser.add_option("", "--hackbench-jobspercore", dest="hackbench_jobspercore", action="store", type="int", help="number of hackbench jobs per-core") parser.add_option("", "--kcompile-jobspercore", dest="kcompile_jobspercore", action="store", type="int", help="number of kernel compile jobs per-core") (self.cmd_options, self.cmd_arguments) = parser.parse_args(args = cmdargs) if self.cmd_options.duration: mult = 1.0 v = self.cmd_options.duration.lower() if v.endswith('s'): v = v[:-1] elif v.endswith('m'): v = v[:-1] mult = 60.0 elif v.endswith('h'): v = v[:-1] mult = 3600.0 elif v.endswith('d'): v = v[:-1] mult = 3600.0 * 24.0 self.cmd_options.duration = float(v) * mult self.workdir = os.path.abspath(self.cmd_options.workdir) def debug(self, str): if self.config.debugging is True: print "rteval: %s" % str def info(self, str): if self.config.verbose is True: print str def run_sysreport(self): import glob if os.path.exists('/usr/sbin/sosreport'): exe = '/usr/sbin/sosreport' elif os.path.exists('/usr/sbin/sysreport'): exe = '/usr/sbin/sysreport' else: raise RuntimeError, "Can't find sosreport/sysreport" self.debug("report tool: %s" % exe) options = ['-k', 'rpm.rpmva=off', '--name=rteval', '--batch', '--no-progressbar'] self.info("Generating SOS report") self.info("using command %s" % " ".join([exe]+options)) subprocess.call([exe] + options) for s in glob.glob('/tmp/s?sreport-rteval-*'): self.debug("moving %s to %s" % (s, self.reportdir)) shutil.move(s, self.reportdir) def genxml(self, duration, accum, samples, xslt = None): seconds = duration.seconds hours = seconds / 3600 if hours: seconds -= (hours * 3600) minutes = seconds / 60 if minutes: seconds -= (minutes * 60) (sys, node, release, ver, machine) = os.uname() # Start new XML report self.xmlreport = xmlout.XMLOut('rteval', self.version) self.xmlreport.NewReport() self.xmlreport.openblock('run_info', {'days': duration.days, 'hours': hours, 'minutes': minutes, 'seconds': seconds}) self.xmlreport.taggedvalue('date', self.start.strftime('%Y-%m-%d')) self.xmlreport.taggedvalue('time', self.start.strftime('%H:%M:%S')) if self.annotate: self.xmlreport.taggedvalue('annotate', self.annotate) self.xmlreport.closeblock() self.xmlreport.openblock('uname') self.xmlreport.taggedvalue('node', node) isrt = 1 if ver.find(' RT ') == -1: isrt = 0 self.xmlreport.taggedvalue('kernel', release, {'is_RT':isrt}) self.xmlreport.taggedvalue('arch', machine) self.xmlreport.taggedvalue('baseos', self.baseos) self.xmlreport.closeblock() self.xmlreport.openblock("clocksource") self.xmlreport.taggedvalue('current', self.current_clocksource) self.xmlreport.taggedvalue('available', self.available_clocksource) self.xmlreport.closeblock() self.xmlreport.openblock('hardware') self.xmlreport.AppendXMLnodes(self.cputopology) self.xmlreport.taggedvalue('numa_nodes', self.numanodes) self.xmlreport.taggedvalue('memory_size', "%.3f" % self.memsize[0], {"unit": self.memsize[1]}) self.xmlreport.closeblock() self.xmlreport.openblock('services', {'init': self.init}) for s in self.services: self.xmlreport.taggedvalue("service", self.services[s], {"name": s}) self.xmlreport.closeblock() keys = self.kthreads.keys() if len(keys): keys.sort() self.xmlreport.openblock('kthreads') for pid in keys: self.xmlreport.taggedvalue('thread', self.kthreads[pid]['name'], { 'policy' : self.kthreads[pid]['policy'], 'priority' : self.kthreads[pid]['priority'], }) self.xmlreport.closeblock() modlist = util.get_modules() if len(modlist): self.xmlreport.openblock('kernelmodules') for mod in modlist: self.xmlreport.openblock('module') self.xmlreport.taggedvalue('info', mod['modname'], {'size': mod['modsize'], 'state': mod['modstate'], 'numusers': mod['numusers']}) if mod['usedby'] != '-': self.xmlreport.openblock('usedby') for ub in mod['usedby'].split(','): if len(ub): self.xmlreport.taggedvalue('module', ub, None) self.xmlreport.closeblock() self.xmlreport.closeblock() self.xmlreport.closeblock() # # Retrieve configured IP addresses # self.xmlreport.openblock('network_config') # Get the interface name for the IPv4 default gw route = open('/proc/net/route') defgw4 = None if route: rl = route.readline() while rl != '' : rl = route.readline() splt = rl.split("\t") # Only catch default route if len(splt) > 2 and splt[2] != '00000000' and splt[1] == '00000000': defgw4 = splt[0] break route.close() # Make an interface tag for each device found if hasattr(ethtool, 'get_interfaces_info'): # Using the newer python-ethtool API (version >= 0.4) for dev in ethtool.get_interfaces_info(ethtool.get_devices()): if cmp(dev.device,'lo') == 0: continue self.xmlreport.openblock('interface', {'device': dev.device, 'hwaddr': dev.mac_address} ) # Protcol configurations if dev.ipv4_address: self.xmlreport.openblock('IPv4', {'ipaddr': dev.ipv4_address, 'netmask': dev.ipv4_netmask, 'broadcast': dev.ipv4_broadcast, 'defaultgw': (defgw4 == dev.device) and '1' or '0'} ) self.xmlreport.closeblock() for ip6 in dev.get_ipv6_addresses(): self.xmlreport.openblock('IPv6', {'ipaddr': ip6.address, 'netmask': ip6.netmask, 'scope': ip6.scope} ) self.xmlreport.closeblock() self.xmlreport.closeblock() else: # Fall back to older python-ethtool API (version < 0.4) ifdevs = ethtool.get_active_devices() ifdevs.remove('lo') ifdevs.sort() for dev in ifdevs: self.xmlreport.openblock('interface', {'device': dev, 'hwaddr': ethtool.get_hwaddr(dev)} ) self.xmlreport.openblock('IPv4', {'ipaddr': ethtool.get_ipaddr(dev), 'netmask': ethtool.get_netmask(dev), 'defaultgw': (defgw4 == dev) and '1' or '0'} ) self.xmlreport.closeblock() self.xmlreport.closeblock() self.xmlreport.closeblock() self.xmlreport.openblock('loads', {'load_average':str(accum / samples)}) for load in self.loads: load.genxml(self.xmlreport) self.xmlreport.closeblock() self.cyclictest.genxml(self.xmlreport) # now generate the dmidecode data for this host d = dmi.DMIinfo(self.config.GetSection('rteval')) d.genxml(self.xmlreport) # Close the report - prepare for return the result self.xmlreport.close() # Write XML (or write XSLT parsed XML if xslt != None) if self.xml != None: self.xmlreport.Write(self.xml, xslt) else: # If no file is set, use stdout self.xmlreport.Write("-", xslt) # libxml2 defines a filename as "-" to be stdout def report(self): "Create a screen report, based on a predefined XSLT template" self.xmlreport.Write("-", self.config.xslt_report) def XMLreport(self): "Retrieves the complete rteval XML report as a libxml2.xmlDoc object" return self.xmlreport.GetXMLdocument() def show_report(self, xmlfile, xsltfile): '''summarize a previously generated xml file''' print "Loading %s for summarizing" % xmlfile xsltfullpath = os.path.join(self.config.installdir, xsltfile) if not os.path.exists(xsltfullpath): raise RuntimeError, "can't find XSL template (%s)!" % xsltfullpath xmlreport = xmlout.XMLOut('rteval', self.version) xmlreport.LoadReport(xmlfile) xmlreport.Write('-', xsltfullpath) del xmlreport def start_loads(self): if len(self.loads) == 0: raise RuntimeError, "start_loads: No loads defined!" self.info ("starting loads:") for l in self.loads: l.start() # now wait until they're all ready self.info("waiting for ready from all loads") ready=False while not ready: busy = 0 for l in self.loads: if not l.isAlive(): raise RuntimeError, "%s died" % l.name if not l.isReady(): busy += 1 self.debug("waiting for %s" % l.name) if busy: time.sleep(1.0) else: ready = True def stop_loads(self): if len(self.loads) == 0: raise RuntimeError, "stop_loads: No loads defined!" self.info("stopping loads: ") for l in self.loads: self.info("\t%s" % l.name) l.stopevent.set() l.join(2.0) def make_report_dir(self): t = self.start i = 1 self.reportdir = os.path.join(self.workdir, t.strftime("rteval-%Y%m%d-"+str(i))) while os.path.exists(self.reportdir): i += 1 self.reportdir = os.path.join(self.workdir, t.strftime('rteval-%Y%m%d-'+str(i))) if not os.path.isdir(self.reportdir): os.mkdir(self.reportdir) os.mkdir(os.path.join(self.reportdir, "logs")) return self.reportdir def get_dmesg(self): dpath = "/var/log/dmesg" if not os.path.exists(dpath): print "dmesg file not found at %s" % dpath return shutil.copyfile(dpath, os.path.join(self.reportdir, "dmesg")) def show_remaining_time(self, remaining): r = int(remaining) days = r / 86400 if days: r = r - (days * 86400) hours = r / 3600 if hours: r = r - (hours * 3600) minutes = r / 60 if minutes: r = r - (minutes * 60) print "rteval time remaining: %d days, %d hours, %d minutes, %d seconds" % (days, hours, minutes, r) def measure(self): # Collect misc system info self.baseos = util.get_base_os() self.cputopology = self.get_cpu_topology() self.numanodes = util.get_num_nodes() self.memsize = util.get_memory_size() (self.current_clocksource, self.available_clocksource) = util.get_clocksources() self.services = self.get_services() self.kthreads = self.get_kthreads() onlyload = self.cmd_options.onlyload builddir = os.path.join(self.workdir, 'rteval-build') if not os.path.isdir(builddir): os.mkdir(builddir) self.reportfile = os.path.join(self.reportdir, "summary.rpt") self.xml = os.path.join(self.reportdir, "summary.xml") # read in loads from the ini file self.load_modules = [] loads = self.config.GetSection("loads") for l in loads: # hope to eventually have different kinds but module is only on # for now (jcw) if l[1].lower() == 'module': self.info("importing load module %s" % l[0]) self.load_modules.append(__import__(l[0])) self.info("setting up loads") self.loads = [] params = {'workdir':self.workdir, 'reportdir':self.reportdir, 'builddir':builddir, 'srcdir':self.config.srcdir, 'verbose': self.config.verbose, 'debugging': self.config.debugging, 'numcores':self.numcores, 'logging':self.config.logging, 'memsize':self.memsize, 'numanodes':self.numanodes, 'duration':self.config.duration, } for m in self.load_modules: self.config.AppendConfig(m.__name__, params) self.info("creating load instance for %s" % m.__name__) self.loads.append(m.create(self.config.GetSection(m.__name__))) if not onlyload: self.config.AppendConfig('cyclictest', params) self.info("setting up cyclictest") self.cyclictest = cyclictest.Cyclictest(params=self.config.GetSection('cyclictest')) nthreads = 0 try: # start the loads self.start_loads() print "rteval run on %s started at %s" % (os.uname()[2], time.asctime()) print "started %d loads on %d cores" % (len(self.loads), self.numcores), if self.numanodes > 1: print " with %d numa nodes" % self.numanodes else: print "" print "Run duration: %d seconds" % self.config.duration start = datetime.now() if not onlyload: # start the cyclictest thread self.info("starting cyclictest") self.cyclictest.start() # turn loose the loads self.info("sending start event to all loads") for l in self.loads: l.startevent.set() nthreads += 1 accum = 0.0 samples = 0 report_interval = int(self.config.GetSection('rteval').report_interval) # wait for time to expire or thread to die signal.signal(signal.SIGINT, sigint_handler) signal.signal(signal.SIGTERM, sigterm_handler) self.info("waiting for duration (%f)" % self.config.duration) stoptime = (time.time() + self.config.duration) currtime = time.time() rpttime = currtime + report_interval loadcount = 5 while (currtime <= stoptime) and not sigint_received: time.sleep(1.0) if not onlyload and not self.cyclictest.isAlive(): raise RuntimeError, "cyclictest thread died!" if len(threading.enumerate()) < nthreads: raise RuntimeError, "load thread died!" if not loadcount: # open the loadavg /proc entry p = open("/proc/loadavg") load = float(p.readline().split()[0]) p.close() accum += load samples += 1 loadcount = 5 #self.debug("current loadavg: %f, running avg: %f (load: %f, samples: %d)" % \ # (load, accum/samples, load, samples)) else: loadcount -= 1 if currtime >= rpttime: left_to_run = stoptime - currtime self.show_remaining_time(left_to_run) rpttime = currtime + report_interval print "load average: %.2f" % (accum / samples) currtime = time.time() self.debug("out of measurement loop") signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) except RuntimeError, e: print "Runtime error during measurement: %s", e raise finally: if not onlyload: # stop cyclictest self.cyclictest.stopevent.set() # stop the loads self.stop_loads() print "stopping run at %s" % time.asctime() if not onlyload: # wait for cyclictest to finish calculating stats self.cyclictest.finished.wait() self.genxml(datetime.now() - start, accum, samples) self.report() if self.config.sysreport: self.run_sysreport() def XMLRPC_Send(self): "Sends the report to a given XML-RPC host. Returns 0 on success or 2 on submission failure." if not self.config.xmlrpc: return 2 url = "http://%s/rteval/API1/" % self.config.xmlrpc attempt = 0 exitcode = 2 # Presume failure warning_sent = False while attempt < 6: try: client = rtevalclient.rtevalclient(url) print "Submitting report to %s" % url rterid = client.SendReport(self.xmlreport.GetXMLdocument()) print "Report registered with submission id %i" % rterid attempt = 10 exitcode = 0 # Success except socket.error: if (self.mailer is not None) and (not warning_sent): self.mailer.SendMessage("[RTEVAL:WARNING] Failed to submit report to XML-RPC server", "Server %s did not respond. Not giving up yet." % self.config.xmlrpc) warning_sent = True attempt += 1 if attempt > 5: break # To avoid sleeping before we abort print "Failed sending report. Doing another attempt(%i) " % attempt time.sleep(attempt*5*60) # Incremental sleep - sleep attempts*5 minutes except Exception, err: raise err if (self.mailer is not None): # Send final result messages if exitcode == 2: self.mailer.SendMessage("[RTEVAL:FAILURE] Failed to submit report to XML-RPC server", "Server %s did not respond at all after %i attempts." % (self.config.xmlrpc, attempt - 1)) elif (exitcode == 0) and warning_sent: self.mailer.SendMessage("[RTEVAL:SUCCESS] XML-RPC server available again", "Succeeded to submit the report to %s in the end." % (self.config.xmlrpc)) return exitcode def tar_results(self): if not os.path.isdir(self.reportdir): raise RuntimeError, "no such directory: %s" % self.reportdir import tarfile dirname = os.path.dirname(self.reportdir) rptdir = os.path.basename(self.reportdir) cwd = os.getcwd() os.chdir(dirname) try: t = tarfile.open(rptdir + ".tar.bz2", "w:bz2") t.add(rptdir) t.close() except: os.chdir(cwd) def summarize(self, file): isarchive = False summary = file if file.endswith(".tar.bz2"): import tarfile try: t = tarfile.open(file) except: print "Don't know how to summarize %s (tarfile open failed)" % file return element = None for f in t.getnames(): if f.find('summary.xml') != -1: element = f break if element == None: print "No summary.xml found in tar archive %s" % file return tmp = tempfile.gettempdir() self.debug("extracting %s from %s for summarizing" % (element, file)) t.extract(element, path=tmp) summary = os.path.join(tmp, element) isarchive = True self.show_report(summary, 'rteval_text.xsl') if isarchive: os.unlink(summary) def rteval(self): ''' main function for rteval''' retval = 0; # Parse initial DMI decoding errors dmi.ProcessWarnings() # if --summarize was specified then just parse the XML, print it and exit if self.cmd_options.summarize or self.cmd_options.rawhistogram: if len(self.cmd_arguments) < 1: raise RuntimeError, "Must specify at least one XML file with --summarize!" for x in self.cmd_arguments: if self.cmd_options.summarize: self.summarize(x) elif self.cmd_options.rawhistogram: self.show_report(x, 'rteval_histogram_raw.xsl') sys.exit(0) if os.getuid() != 0: print "Must be root to run rteval!" sys.exit(-1) self.debug('''rteval options: workdir: %s loaddir: %s reportdir: %s verbose: %s debugging: %s logging: %s duration: %f sysreport: %s inifile: %s''' % (self.workdir, self.config.srcdir, self.reportdir, self.config.verbose, self.config.debugging, self.config.logging, self.config.duration, self.config.sysreport, self.inifile)) if not os.path.isdir(self.workdir): raise RuntimeError, "work directory %d does not exist" % self.workdir # create our report directory try: self.make_report_dir() except: print "Cannot create the report dir!" print "(is this an NFS filesystem with rootsquash turned on?)" sys.exit(-1) self.measure() # if --xmlrpc-submit | -X was given, send our report to this host if self.config.xmlrpc: retval = self.XMLRPC_Send() self.get_dmesg() self.tar_results() return retval if __name__ == '__main__': import pwd, grp try: # Parse initial DMI decoding errors dmi.ProcessWarnings() rteval = RtEval(sys.argv[1:]) ec = rteval.rteval() rteval.debug("exiting with exit code: %d" % ec) sys.exit(ec) except KeyboardInterrupt: sys.exit(0)