summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjistone <jistone>2006-02-28 02:27:27 +0000
committerjistone <jistone>2006-02-28 02:27:27 +0000
commit5425ac506de8c6e2ba7294c4ee03c9eba5a845cf (patch)
tree2621dd8369092fd7cbfa7fc25b31ba3e8d5b1b46
parent16c1c80843a7115253b5f2844c3cda9089a72232 (diff)
downloadsystemtap-steved-5425ac506de8c6e2ba7294c4ee03c9eba5a845cf.tar.gz
systemtap-steved-5425ac506de8c6e2ba7294c4ee03c9eba5a845cf.tar.xz
systemtap-steved-5425ac506de8c6e2ba7294c4ee03c9eba5a845cf.zip
2006-02-27 Josh Stone <joshua.i.stone@intel.com>
* safety/*: Add a static safety checker.
-rw-r--r--safety/README10
-rw-r--r--safety/data/opcodes-i686107
-rw-r--r--safety/data/opcodes-x86_64104
-rw-r--r--safety/data/references82
-rwxr-xr-xsafety/safety.py232
5 files changed, 535 insertions, 0 deletions
diff --git a/safety/README b/safety/README
new file mode 100644
index 00000000..61eda4a2
--- /dev/null
+++ b/safety/README
@@ -0,0 +1,10 @@
+This is a static safety-checker for SystemTap modules. It attempts to
+validate modules by checking the opcodes used and the external references
+against a whitelist.
+
+The script relies on external data files to provide the whitelists, which by
+default are in the <script-dir>/data directory. The 'references' file
+provides a plain list of allowed references. The 'opcodes' file provides a
+list of regular expressions that match allowed opcodes. Either data file may
+have an optional kernel and/or architecture suffix, as in 'opcodes-i686' or
+'references-2.6.9-32.ELsmp-x86_64'.
diff --git a/safety/data/opcodes-i686 b/safety/data/opcodes-i686
new file mode 100644
index 00000000..123fa2b0
--- /dev/null
+++ b/safety/data/opcodes-i686
@@ -0,0 +1,107 @@
+aaa
+aad
+aam
+aas
+adc[bwl]?
+add[bwl]?
+and[bwl]?
+bound[wl]?
+bsf[wl]?
+bsr[wl]?
+bswapl?
+btc[wl]?
+btr[wl]?
+bts[wl]?
+bt[wl]?
+call
+cbtw
+cbw
+cdq
+clc
+cld
+cli
+cltd
+cmc
+cmovn?(?:a|ae|b|be|c|e|g|ge|l|le|o|p|pe|po|s|z)[wl]?
+cmp[bwl]?
+cmps[bwl]?
+cmpxchg8b
+cmpxchg[bwl]?
+cpuid
+cwd
+cwde
+cwtd
+cwtl
+daa
+das
+dec[bwl]?
+div[bwl]?
+enter
+idiv[bwl]?
+imul[bwl]?
+inc[bwl]?
+je?cxz
+jmp
+jn?(?:a|ae|b|be|c|e|g|ge|l|le|o|p|pe|po|s|z)
+lcall
+lds[wl]?
+leave
+lea[wl]?
+les[wl]?
+lfence
+lfs[wl]?
+lgs[wl]?
+lods[bwl]?
+loopn?[ze]?
+lret
+lss[wl]?
+mfence
+movaps
+mov[bwl]?
+movs[bwl]?
+movsb[wl]?
+movswl?
+movzb[wl]?
+movzwl?
+mul[bwl]?
+neg[bwl]?
+nop
+not[bwl]?
+or[bwl]?
+pause
+popa[wl]?
+popf[wl]?
+pop[wl]?
+prefetch(?:t[012]|nta)
+pusha[wl]?
+pushf[wl]?
+push[wl]?
+rcl[bwl]?
+rcr[bwl]?
+rdmsr
+rdtsc
+ret
+rol[bwl]?
+ror[bwl]?
+sahf
+sal[bwl]?
+sar[bwl]?
+sbb[bwl]?
+scas[bwl]?
+setn?(?:a|ae|b|be|c|e|g|ge|l|le|o|p|pe|po|s|z)
+shl[bwl]?
+shld[bwl]?
+shr[bwl]?
+shrd[bwl]?
+smov[lw]?
+stc
+std
+sti
+stos[bwl]?
+sub[bwl]?
+test[bwl]?
+xadd[bwl]?
+xchg[bwl]?
+xlat
+xlatb
+xor[bwl]?
diff --git a/safety/data/opcodes-x86_64 b/safety/data/opcodes-x86_64
new file mode 100644
index 00000000..b89df879
--- /dev/null
+++ b/safety/data/opcodes-x86_64
@@ -0,0 +1,104 @@
+adc[bwlq]?
+add[bwlq]?
+and[bwlq]?
+boundl?
+bsf[wlq]?
+bsr[wlq]?
+bswap[lq]?
+btc[wlq]?
+btr[wlq]?
+bts[wlq]?
+bt[wlq]?
+callq?
+cbtw
+cbw
+cdq
+cdqe
+clc
+cld
+cli
+cltd
+cltq
+cmc
+cmovn?(?:a|ae|b|be|c|e|g|ge|l|le|o|p|pe|po|s|z)[wlq]?
+cmp[bwlq]?
+cmps[bwlq]?
+cmpxchg16b
+cmpxchg8b
+cmpxchg[bwlq]?
+cpuid
+cqo
+cqtd
+cqto
+cwd
+cwde
+cwtd
+cwtl
+dec[bwlq]?
+div[bwlq]?
+enterq?
+idiv[bwlq]?
+imul[bwlq]?
+inc[bwlq]?
+jcxz
+jmpq?
+jn?(?:a|ae|b|be|c|e|g|ge|l|le|o|p|pe|po|s|z)
+lcallq?
+leaveq?
+lea[wlq]?
+lfence
+lfs[wl]?
+lgs[wl]?
+lods[bwlq]?
+loopn?[ze]?
+lretq?
+lss[wl]?
+mfence
+movaps
+mov[bwlq]?
+movs[bwlq]?
+movsb[wlq]?
+movslq?
+movsw[lq]?
+movzb[wlq]?
+movzw[lq]?
+mul[bwlq]?
+neg[bwlq]?
+nop
+not[bwlq]?
+or[bwlq]?
+pause
+popf[wlq]?
+pop[wlq]?
+prefetch(?:t[012]|nta)
+pushf[wlq]?
+push[wlq]?
+rcl[bwlq]?
+rcr[bwlq]?
+rdmsr
+rdtsc
+retq?
+rol[bwlq]?
+ror[bwlq]?
+sahf
+sal[bwlq]?
+sar[bwlq]?
+sbb[bwlq]?
+scas[bwlq]?
+setn?(?:a|ae|b|be|c|e|g|ge|l|le|o|p|pe|po|s|z)
+shl[bwlq]?
+shld[bwlq]?
+shr[bwlq]?
+shrd[bwlq]?
+smov[lw]?
+stc
+std
+sti
+stos[bwlq]?
+sub[bwlq]?
+test[bwlq]?
+xadd[bwlq]?
+xchg[bwlq]?
+xlat
+xlatb
+xor[bwlq]?
diff --git a/safety/data/references b/safety/data/references
new file mode 100644
index 00000000..dc674417
--- /dev/null
+++ b/safety/data/references
@@ -0,0 +1,82 @@
+__alloc_percpu
+autoremove_wake_function
+__bitmap_weight
+cond_resched
+__const_udelay
+copy_from_user
+__copy_from_user_ll
+copy_to_user
+copy_user_generic
+cpu_callout_map
+cpu_online_map
+cpu_possible_map
+cpu_to_node
+create_proc_entry
+del_timer_sync
+do_gettimeofday
+__down_failed
+find_next_bit
+finish_wait
+free_percpu
+__get_user_4
+init_timer
+__init_timer_base
+jiffies
+kallsyms_lookup_name
+kfree
+__kmalloc
+kmalloc_node
+kmem_cache_alloc
+malloc_sizes
+memcmp
+memset
+__might_sleep
+__mod_timer
+mod_timer
+msleep
+node_online_map
+param_get_int
+param_get_long
+param_get_string
+param_set_copystring
+param_set_int
+param_set_long
+prepare_to_wait
+printk
+proc_mkdir
+proc_root
+_read_lock
+_read_trylock
+_read_unlock
+register_kprobe
+register_kretprobe
+register_profile_notifier
+register_timer_hook
+remove_proc_entry
+schedule
+schedule_delayed_work
+scnprintf
+simple_strtol
+snprintf
+_spin_lock
+_spin_lock_irqsave
+_spin_trylock
+_spin_unlock
+_spin_unlock_irqrestore
+sprintf
+strcmp
+strlcat
+strlcpy
+strncmp
+strncpy
+strsep
+unregister_kprobe
+unregister_kretprobe
+unregister_profile_notifier
+unregister_timer_hook
+__up_wakeup
+vscnprintf
+vsnprintf
+__wake_up
+_write_trylock
+_write_unlock
diff --git a/safety/safety.py b/safety/safety.py
new file mode 100755
index 00000000..4a2094f9
--- /dev/null
+++ b/safety/safety.py
@@ -0,0 +1,232 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim: noet sw=4 ts=4 enc=utf-8
+"A static safety-checker for SystemTap modules."
+
+# in python 2.4, set & frozenset are builtins
+# in python 2.3, the equivalents live in the 'sets' module
+from sys import hexversion as __hexversion
+if __hexversion < 0x020400f0:
+ from sets import Set as set, ImmutableSet as frozenset
+
+
+def main(argv):
+ """
+ CLI to the SystemTap static safety-checker.
+
+ Provides a command-line interface for running the SystemTap module
+ safety checker. Use '-h' or '--help' for a description of the
+ command-line options.
+
+ Returns the number of modules that failed the check.
+ """
+ bad = 0
+ (options, args) = __parse_args(argv[1:])
+ safe = StaticSafety(options.arch, options.release, options.datapath)
+ for m in args:
+ if not safe.check_module(m):
+ bad += 1
+ return bad
+
+
+def __parse_args(argv):
+ from optparse import OptionParser
+ parser = OptionParser(usage="usage: %prog [options] [module]...",
+ description=__doc__)
+ parser.add_option('--data-path', dest='datapath', metavar='PATH',
+ help='specify the whitelist data files [default: <script-dir>/data]')
+ parser.add_option('-m', '--machine', '--architecture', dest='arch',
+ help='specify the machine architecture of the target')
+ parser.add_option('-r', '--kernel-release', dest='release',
+ help='specify the kernel release running on the target')
+ return parser.parse_args(argv)
+
+
+class StaticSafety:
+ "Manage a safety-checking session."
+
+ def __init__(self, arch=None, release=None, datapath=None):
+ from os import uname
+ self.__arch = arch or uname()[4]
+ self.__release = release or uname()[2]
+ self.__build_data_path(datapath)
+ self.__build_search_suffixes()
+ self.__load_allowed_references()
+ self.__load_allowed_opcodes()
+
+ def __build_data_path(self, datapath):
+ "Determine where the data directory resides."
+ from sys import argv
+ from os.path import dirname, isdir, realpath
+ if datapath is None:
+ local = dirname(realpath(argv[0]))
+ self.__data_path = local + '/data'
+ else:
+ self.__data_path = datapath
+
+ if not isdir(self.__data_path):
+ raise StandardError(
+ "Can't find the data directory! (looking in %s)"
+ % self.__data_path)
+
+ def __build_search_suffixes(self):
+ "Construct arch & kernel-versioning search suffixes."
+ ss = set()
+
+ # add empty string
+ ss.add('')
+
+ # add architecture search path
+ archsfx = '-%s' % self.__arch
+ ss.add(archsfx)
+
+ # add full kernel-version-release (2.6.NN-FOOBAR) + arch
+ relsfx = '-%s' % self.__release
+ ss.add(relsfx)
+ ss.add(relsfx + archsfx)
+
+ # add kernel version (2.6.NN) + arch
+ dash_i = relsfx.rfind('-')
+ if dash_i > 0:
+ ss.add(relsfx[:dash_i])
+ ss.add(relsfx[:dash_i] + archsfx)
+
+ # start dropping decimals
+ dot_i = relsfx.rfind('.', 0, dash_i)
+ while dot_i > 0:
+ ss.add(relsfx[:dot_i])
+ ss.add(relsfx[:dot_i] + archsfx)
+ dot_i = relsfx.rfind('.', 0, dot_i)
+
+ self.__search_suffixes = frozenset(ss)
+
+ def __load_allowed_references(self):
+ "Build the list of allowed external references from the data files."
+ wr = set()
+ for sfx in self.__search_suffixes:
+ try:
+ refs = open(self.__data_path + '/references' + sfx)
+ for line in refs:
+ wr.add(line.rstrip())
+ refs.close()
+ except IOError:
+ pass
+ if not len(wr):
+ raise StandardError("No whitelisted references found!")
+ self.__white_references = frozenset(wr)
+
+ def __load_allowed_opcodes(self):
+ "Build the regular expression matcher for allowed opcodes from the data files."
+ from re import compile
+ wo = []
+ for sfx in self.__search_suffixes:
+ try:
+ opcs = open(self.__data_path + '/opcodes' + sfx)
+ for line in opcs:
+ wo.append(line.rstrip())
+ opcs.close()
+ except IOError:
+ pass
+ if not len(wo):
+ raise StandardError("No whitelisted opcodes found!")
+ self.__white_opcodes_re = compile(r'^(?:' + r'|'.join(wo) + r')$')
+
+ def __check_references(self, module):
+ "Check that all unresolved references in the module are allowed."
+ from os import popen
+ from re import compile
+
+ sym_re = compile(r'^([\w@.]+) [Uw]\s+$')
+ def check(line):
+ m = sym_re.match(line)
+ if m:
+ ref = m.group(1)
+ if ref not in self.__white_references:
+ print 'ERROR: Invalid reference to %s' % ref
+ return False
+ return True
+ print 'WARNING: Unmatched line:\n %s' % `line`
+ return True
+
+ command = 'nm --format=posix --no-sort --undefined-only ' + `module`
+ ok = True
+ nm = popen(command)
+ for line in nm:
+ ok &= check(line)
+ if nm.close():
+ ok = False
+ return ok
+
+ def __check_opcodes(self, module):
+ "Check that all disassembled opcodes in the module are allowed."
+ from os import popen
+ from re import compile
+
+ skip_ud2a = [0]
+
+ ignore_re = compile(r'^$|^\s+\.{3}$|^.*Disassembly of section|^.*file format')
+ opc = r'(?:(?:lock )|(?:repn?[ze]? )|(?:rex\w+ ))*(\w+)\b'
+ opc_re = compile(r'^[A-Fa-f\d]+ <([^>]+)> %s' % opc)
+ def check(line):
+ m = ignore_re.match(line)
+ if m:
+ return True
+ m = opc_re.match(line)
+ if m:
+ loc, opc = m.groups()
+ if opc == 'ud2a':
+ # The kernel abuses ud2a for BUG checks by following it
+ # directly with __LINE__ and __FILE__. Objdump doesn't
+ # know this though, so it tries to interpret the data as
+ # real instructions. Because x86(-64) instructions are
+ # variable-length, it's hard to tell when objdump is synced
+ # up again. We'll fast-forward to the next function
+ # boundary and hope things are better there.
+ for skip in objdump:
+ mskip = opc_re.match(skip)
+ if mskip:
+ locskip = mskip.group(1)
+ # a loc without an offset marks a new function
+ if '+' not in locskip:
+ return check(skip)
+ skip_ud2a[0] += 1
+ return True
+ elif not self.__white_opcodes_re.match(opc):
+ print "ERROR: Invalid opcode '%s' at <%s>" % (opc, loc)
+ return False
+ return True
+ print 'WARNING: Unmatched line:\n %s' % `line`
+ return True
+
+ command = 'objdump --disassemble --prefix-addresses ' + `module`
+ ok = True
+ objdump = popen(command)
+ for line in objdump:
+ ok &= check(line)
+ if objdump.close():
+ ok = False
+
+ if skip_ud2a[0]:
+ #print 'WARNING: Skipped %d lines due to ud2a corruption' % skip_ud2a[0]
+ pass
+
+ return ok
+
+ def check_module(self, module):
+ "Check a module for exclusively safe opcodes and external references."
+ from os.path import isfile
+ if not isfile(module):
+ print 'ERROR: %s is not a file!' % `module`
+ return False
+ res = self.__check_references(module) and self.__check_opcodes(module)
+ if res:
+ print 'PASS: %s' % module
+ else:
+ print 'FAIL: %s' % module
+ return res
+
+
+if __name__ == '__main__':
+ from sys import exit, argv
+ exit(main(argv))
+