diff options
author | David Malcolm <dmalcolm@redhat.com> | 2009-11-13 20:03:58 -0500 |
---|---|---|
committer | David Malcolm <dmalcolm@redhat.com> | 2009-11-13 20:03:58 -0500 |
commit | bca47923dcb4a31b53c60afbe8ccc1b9a82eec48 (patch) | |
tree | 7d807ca507e9d467592eb34cde8bb774b3390c7d | |
parent | 7e870a7a0ef5b871fef56d51bc9d8eb47518f4d8 (diff) | |
download | check-cpython-bca47923dcb4a31b53c60afbe8ccc1b9a82eec48.tar.gz check-cpython-bca47923dcb4a31b53c60afbe8ccc1b9a82eec48.tar.xz check-cpython-bca47923dcb4a31b53c60afbe8ccc1b9a82eec48.zip |
Count errors and exit with the count; cleanup spatch hooks; don't be fussy about const; implement more of getargs.c
-rw-r--r-- | pyarg-parsetuple.cocci | 87 | ||||
-rw-r--r-- | validate.py | 143 |
2 files changed, 189 insertions, 41 deletions
diff --git a/pyarg-parsetuple.cocci b/pyarg-parsetuple.cocci index 706a489..a33e678 100644 --- a/pyarg-parsetuple.cocci +++ b/pyarg-parsetuple.cocci @@ -1,38 +1,85 @@ +@initialize:python@ +""" +Analyze format strings, compare to vararg types actually passed + +FIXME: generalize this to varargs +""" +import sys +sys.path.append('.') +from validate import validate_types +num_errors = 0 + @ParseTuple_1@ -position p1; +position pos; expression args; expression fmt; type t1; t1 e1; @@ -PyArg_ParseTuple(args, fmt@p1, e1) - -@initialize:python@ -import sys -sys.path.append('.') -from validate import validate_types +PyArg_ParseTuple@pos(args, fmt, e1) @script:python@ +pos << ParseTuple_1.pos; args << ParseTuple_1.args; fmt << ParseTuple_1.fmt; t1 << ParseTuple_1.t1; -p1 << ParseTuple_1.p1; @@ -""" -Analyze format strings, compare to vararg types actually passed - -FIXME: generalize this to varargs -""" - -#print "args: %s" % args -#print "fmt: %s" % fmt -#print "var1: %s" % t1 -#print get_types(fmt.expr) - # For some reason, locations are coming as a 1-tuple containing a Location (from # coccilibs.elems), rather than the location itself # Hence we use p1[0], not p1 -validate_types(p1[0], fmt.expr, [t1]) +num_errors += validate_types(pos[0], fmt.expr, [t1]) + + + +@ParseTuple_2@ +position pos; +expression args; +expression fmt; +type t1; +t1 e1; +type t2; +t2 e2; +@@ + +PyArg_ParseTuple(args@pos, fmt, e1, e2) + +@script:python@ +args << ParseTuple_2.args; +fmt << ParseTuple_2.fmt; +pos << ParseTuple_2.pos; +t1 << ParseTuple_2.t1; +t2 << ParseTuple_2.t2; +@@ +num_errors += validate_types(pos[0], fmt.expr, [t1, t2]) + + + +@ParseTuple_3@ +position pos; +expression args; +expression fmt; +type t1; t1 e1; +type t2; t2 e2; +type t3; t3 e3; +@@ + +PyArg_ParseTuple(args@pos, fmt, e1, e2, e3) + +@script:python@ +pos << ParseTuple_3.pos; +args << ParseTuple_3.args; +fmt << ParseTuple_3.fmt; +pos << ParseTuple_3.pos; +t1 << ParseTuple_3.t1; +t2 << ParseTuple_3.t2; +t3 << ParseTuple_3.t3; +@@ +num_errors += validate_types(pos[0], fmt.expr, [t1, t2, t3]) + + +@script:python @ +@@ +sys.exit(num_errors) diff --git a/validate.py b/validate.py index 950e0e7..0c6860d 100644 --- a/validate.py +++ b/validate.py @@ -1,36 +1,117 @@ """ Hooks for validating CPython extension source code """ -def get_types(strfmt): +class CExtensionError(Exception): + # Base class for errors discovered by static analysis in C extension code + def __init__(self, location): + self.location = location + + def __str__(self): + return '%s:%s: %s' % (self.location.file, + self.location.line, + self._get_desc()) + + def _get_desc(self): + raise NotImplementedError + +class UnknownFormatChar(CExtensionError): + def __init__(self, location, ch): + CExtensionError.__init__(self, location) + self.ch = ch + + def _get_desc(self): + return "unknown format char: '%s'" % self.ch + +class UnhandledCode(UnknownFormatChar): + def _get_desc(self): + return "unhandled format code: '%s' (FIXME)" % self.ch + + +def get_types(location, strfmt): """ Generate a list of C type names from a PyArg_ParseTuple format string Compare to Python/getargs.c:vgetargs1 + FIXME: only implements a very small subset of the various cases; no tuples, etc """ result = [] i = 0 while i < len(strfmt): c = strfmt[i] - simple = {'i':'int', - 's':'char *'} - if c in simple: - result.append(simple[c] + ' *') + i += 1 + if i < len(strfmt): + next = strfmt[i] + else: + next = None + + # FIXME: '(', ')' + if c in [':', ';']: break - i += 1 - return result -class CExtensionError(Exception): - # Base class for errors discovered by static analysis in C extension code - def __init__(self, location): - self.location = location + if c =='|': + continue - def __str__(self): - return '%s:%s: %s' % (self.location.file, - self.location.line, - self._get_desc()) + # From convertsimple: + simple = {'b':'char', + 'B':'char', + 'h':'short', + 'H':'short', + 'i':'int', + 'I':'int', + 'n':'Py_ssize_t', + 'l':'long', + 'k':'unsigned long', + # L, K: FIXME + 'f':'float', + 'd':'double', + # D: FIXME, + 'c':'char', + } + if c in simple: + result.append(simple[c] + ' *') - def _get_desc(self): - raise NotImplementedError + elif c in ['s', 'z']: # string, possibly NULL/None + if next == '#': + result += ['const char * *', 'int *'] + i += 1 + elif next == '*': + result.append('Py_buffer *') + i += 1 + else: + result.append('const char * *') + # FIXME: seeing lots of (const char**) versus (char**) mismatches here + # do we care? + + elif c == 'e': + if next in ['s', 't']: + result += ['const char *', 'char * *'] + i += 1 + if i < len(strfmt): + if strfmt[i] == '#': + result.append('int *') + elif c == 'S': + result.append('PyObject * *') + elif c == 'U': + result.append('PyObject * *') + elif c == 'O': # object + if next == '!': + result += ['PyTypeObject * *', 'PyObject * *'] + i += 1 + elif next == '?': + raise UnhandledCode(location, c + next) # FIXME + elif next == '&': + raise UnhandledCode(location, c + next) # FIXME + else: + result.append('PyObject * *') + elif c == 'w': + raise UnhandledCode(location, c) # FIXME + elif c == 't': + if next == '#': + result += ['char * *', 'int *'] + i += 1 + else: + raise UnknownFormatChar(location, c) + return result class WrongNumberOfVars(CExtensionError): @@ -68,16 +149,36 @@ class MismatchingType(CExtensionError): self.exp_type, self.actual_type) + +def type_equality(t1, t2): + if t1 == t2: + return True + if t1.startswith('const char *'): + if t1 == 'const '+t2: + return True + if t2.startswith('const char *'): + if 'const '+t1 == t2: + return True + return False + def validate_types(location, format_string, actual_types): + if False: + print 'validate_types(%s, %s, %s)' % ( + repr(location), repr(format_string), repr(actual_types)) try: - exp_types = get_types(format_string) + exp_types = get_types(location, format_string[1:-1]) # strip leading and trailing " chars if len(actual_types) < len(exp_types): - raise NotEnoughVars(location, actual_types, exp_types) + raise NotEnoughVars(location, exp_types, actual_types) if len(actual_types) > len(exp_types): - raise TooManyVars(location, actual_types, exp_types) + raise TooManyVars(location, exp_types, actual_types) for i, (exp, actual) in enumerate(zip(exp_types, actual_types)): - if exp != actual: + if not type_equality(exp, actual): raise MismatchingType(location, i+1, exp, actual) except CExtensionError, err: print err + if True: + print 'validate_types(%s, %s, %s)' % ( + repr(location), repr(format_string), repr(actual_types)) + return 1 + return 0 |