#!/usr/bin/python -tt """ Rsyncd log parsing module for Epylog """ ## # Copyright (C) 2003 by Duke University # Written by Seth Vidal # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA # 02111-1307, USA. # import sys import re ## # This is for testing purposes, so you can invoke this from the # modules directory. See also the testing notes at the end of the # file. # sys.path.insert(0, '../py/') from epylog import Result, InternalModule class rsyncd_mod(InternalModule): ## # opts: is a map with extra options set in # [conf] section of the module config, or on the # command line using -o flag to the module. # logger: A logging object. API: # logger.put(loglvl, 'Message') # Only critical stuff needs to go onto lvl 0. # Common output goes to lvl 1. # Others are debug levels. # def __init__(self, opts, logger): ## # Do a "super-init" so the class we are subclassing gets # instantiated. # InternalModule.__init__(self) self.logger = logger ## # Convenience # rc = re.compile self.regex_map = { rc('rsyncd\[\d+\]: rsync on'): self.rsync_hosts, rc('rsyncd\[\d+\]: (?:sent|wrote)\s\S*\sbytes'): self.rsync_results } self.topcount = int(opts.get('report_top', 5)) #get report_top, default to 5 if not set ig_s = opts.get('ignore_hosts', '') ig_s.replace(',',' ') self.ignore_hosts = ig_s.split(' ') # dict to store all of our data self.rsync_pid_bytes = {} self.rsync_pid_host = {} self.rsync_host_loc = rc('rsyncd\[(\d+)\]: rsync\son\s(\S*)\sfrom\s.*\((\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\)') self.rsync_bytes = rc('rsyncd\[(\d+)\]: (?:sent|wrote)\s(\d+) bytes (?:read|received)\s(\d+) bytes total size (\d+)') def rsync_hosts(self, linemap): (sys, msg, multi) = self.get_smm(linemap) pid, loc, ip = self.rsync_host_loc.search(msg).groups() host = self.gethost(ip) if host not in self.ignore_hosts: self.rsync_pid_host[pid] = (host, loc) return {(loc, host): 1} def rsync_results(self, linemap): (sys, msg, multi) = self.get_smm(linemap) pid, wbytes, rbytes, tbytes = self.rsync_bytes.search(msg).groups() self.rsync_pid_bytes[pid] = (wbytes, rbytes, tbytes) return {(pid, wbytes): 1} def _uniq(self, s): """Return a list of the elements in s, but without duplicates. For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], unique("abcabc") some permutation of ["a", "b", "c"], and unique(([1, 2], [2, 3], [1, 2])) some permutation of [[2, 3], [1, 2]]. For best speed, all sequence elements should be hashable. Then unique() will usually work in linear time. If not possible, the sequence elements should enjoy a total ordering, and if list(s).sort() doesn't raise TypeError it's assumed that they do enjoy a total ordering. Then unique() will usually work in O(N*log2(N)) time. If that's not possible either, the sequence elements must support equality-testing. Then unique() will usually work in quadratic time. """ n = len(s) if n == 0: return [] # Try using a dict first, as that's the fastest and will usually # work. If it doesn't work, it will usually fail quickly, so it # usually doesn't cost much to *try* it. It requires that all the # sequence elements be hashable, and support equality comparison. u = {} try: for x in s: u[x] = 1 except TypeError: del u # move on to the next method else: return u.keys() # We can't hash all the elements. Second fastest is to sort, # which brings the equal elements together; then duplicates are # easy to weed out in a single pass. # NOTE: Python's list.sort() was designed to be efficient in the # presence of many duplicate elements. This isn't true of all # sort functions in all languages or libraries, so this approach # is more effective in Python than it may be elsewhere. try: t = list(s) t.sort() except TypeError: del t # move on to the next method else: assert n > 0 last = t[0] lasti = i = 1 while i < n: if t[i] != last: t[lasti] = last = t[i] lasti += 1 i += 1 return t[:lasti] # Brute force is all that's left. u = [] for x in s: if x not in u: u.append(x) return u def _sortByVal(self, dict, reverse=0): if type(dict) is not type({}): return [] keys = dict.keys() s = map(lambda k: (dict[k], k), keys) s.sort() if reverse: s.reverse() return s def finalize(self, resultset): ## # A resultset is a dictionary of all values returned by your # handler functions -- except they are unique and show how many # times each tuple occurs. # See epylog.Result for some convenience methods to use when # processing and analyzing the results. # hostloc = {} # key = host, val = [loc, loc, loc] hosttotal = {} # key = host val = totalwbytes foo = "\n\t\n" for pid in self.rsync_pid_host.keys(): (host, loc) = self.rsync_pid_host[pid] if self.rsync_pid_bytes.has_key(pid): if not hostloc.has_key(host): hostloc[host] = [] if not hosttotal.has_key(host): hosttotal[host] = 0L hostloc[host].append(loc) bytes = long(self.rsync_pid_bytes[pid][0]) hosttotal[host] += bytes for host in hostloc.keys(): hostloc[host] = self._uniq(hostloc[host]) hosts = self._sortByVal(hosttotal, 1) count = 0L for (tot,host) in hosts[:self.topcount]: if count % 2: bgcolor = "#dddddd" else: bgcolor = "#ffffff" count+=1 line = '\t\t\n' % (bgcolor, host) line = line + '\t\t\n' size, marker = self.mk_size_unit(hosttotal[host]) line = line + '\t\t\n' % (bgcolor, size, marker) line = line + '\t\n' foo = foo + line foo = foo + '
%s\n' % bgcolor for loc in hostloc[host]: line = line + '\t\t\t%s
\n' % loc line = line + '\t\t
%s%s
\n' return foo ## # This is useful when testing your module out. # Invoke without command-line parameters to learn about the proper # invocation. # if __name__ == '__main__': from epylog.helpers import ModuleTest ModuleTest(rsyncd_mod, sys.argv)