# -*- coding: utf-8 -*- # # Copyright © 2009 Red Hat, Inc. # # This software is licensed to you under the GNU Lesser General Public # License, version 2.1 (LGPLv2.1). There is NO WARRANTY for this software, # express or implied, including the implied warranties of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. You should have received a copy of # LGPLv2.1 along with this software; if not, see # http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt # # Red Hat trademarks are not licensed under LGPLv2.1. No permission is # granted to use or replicate Red Hat trademarks that are incorporated in # this software or its documentation. # # Red Hat Author(s): David Hugh Malcolm """ Hooks for validating CPython extension source code """ class CExtensionError(Exception): # Base class for errors discovered by static analysis in C extension code def __init__(self, location): self.location = location def __str__(self): return '%s:%s:%s:%s' % (self.location.file, self.location.line, self.location.current_element, self._get_desc()) def _get_desc(self): # Hook for additional descriptive text about the error raise NotImplementedError class FormatStringError(CExtensionError): def __init__(self, location, format_string): CExtensionError.__init__(self, location) self.format_string = format_string class UnknownFormatChar(FormatStringError): def __init__(self, location, format_string, ch): FormatStringError.__init__(self, location, format_string) self.ch = ch def _get_desc(self): return "unknown format char in \"%s\": '%s'" % (self.format_string, self.ch) class UnhandledCode(UnknownFormatChar): def _get_desc(self): return "unhandled format code in \"%s\": '%s' (FIXME)" % (self.format_string, self.ch) def get_types(location, strfmt): """ Generate a list of C type names from a PyArg_ParseTuple format string Compare to Python/getargs.c:vgetargs1 FIXME: only implements a subset of the various cases; no tuples yet etc """ result = [] i = 0 while i < len(strfmt): c = strfmt[i] i += 1 if i < len(strfmt): next = strfmt[i] else: next = None if c in ['(', ')']: continue if c in [':', ';']: break if c =='|': continue # From convertsimple: simple = {'b':'char', 'B':'char', 'h':'short', 'H':'short', 'i':'int', 'I':'int', 'n':'Py_ssize_t', 'l':'long', 'k':'unsigned long', 'L':'PY_LONG_LONG', 'K':'unsigned PY_LONG_LONG', 'f':'float', 'd':'double', 'D':'Py_complex', 'c':'char', } if c in simple: result.append(simple[c] + ' *') elif c in ['s', 'z']: # string, possibly NULL/None if next == '#': if True: # FIXME: is PY_SSIZE_T_CLEAN defined? result += ['const char * *', 'Py_ssize_t *'] else: result += ['const char * *', 'int *'] i += 1 elif next == '*': result.append('Py_buffer *') i += 1 else: result.append('const char * *') # FIXME: seeing lots of (const char**) versus (char**) mismatches here # do we care? elif c == 'e': if next in ['s', 't']: result += ['const char *', 'char * *'] i += 1 if i < len(strfmt): if strfmt[i] == '#': result.append('int *') i+=1 elif c == 'S': result.append('PyObject * *') elif c == 'U': result.append('PyObject * *') elif c == 'O': # object if next == '!': result += ['PyTypeObject *', 'PyObject * *'] i += 1 elif next == '?': raise UnhandledCode(location, strfmt, c + next) # FIXME elif next == '&': # FIXME: can't really handle this case as is, fixing for fcntmodule.c result += ['int ( PyObject * object , int * target )', # converter 'int *'] # FIXME, anything i += 1 else: result.append('PyObject * *') elif c == 'w': if next == '#': result += ['char * *', 'Py_ssize_t *'] i += 1 elif next == '*': result.append('Py_buffer *') i += 1 else: result.append('char * *') elif c == 't': if next == '#': result += ['char * *', 'int *'] i += 1 else: raise UnknownFormatChar(location, strfmt, c) return result class WrongNumberOfVars(FormatStringError): def __init__(self, location, format_string, exp_types, num_args): FormatStringError.__init__(self, location, format_string) self.exp_types = exp_types self.num_args = num_args class NotEnoughVars(WrongNumberOfVars): def _get_desc(self): return 'Not enough arguments in "%s" : expected %i (%s), but got %i' % ( self.format_string, len(self.exp_types), self.exp_types, self.num_args) class TooManyVars(WrongNumberOfVars): def _get_desc(self): return 'Too many arguments in "%s": expected %i (%s), but got %i' % ( self.format_string, len(self.exp_types), self.exp_types, self.num_args) class MismatchingType(FormatStringError): def __init__(self, location, format_string, arg_num, exp_type, actual_type): super(self.__class__, self).__init__(location, format_string) self.arg_num = arg_num self.exp_type = exp_type self.actual_type = actual_type def _get_desc(self): return 'Mismatching type of argument %i in "%s": expected "%s" but got "%s"' % ( self.arg_num, self.format_string, self.exp_type, self.actual_type) def strip_prefix(t, prefix): if t.startswith(prefix): return t[len(prefix):] else: return t def simplify_type(t): # do we really care about char/const char mismatches?: t = strip_prefix(t, 'signed ') t = strip_prefix(t, 'unsigned ') t = strip_prefix(t, 'const ') return t def type_equality(t1, t2): t1 = simplify_type(t1) t2 = simplify_type(t2) return t1 == t2 def validate_type(location, format_string, index, actual_num_args, actual_type): if False: print 'validate_types(%s, %s, %s, %s, %s)' % ( repr(location), repr(format_string), repr(index), repr(actual_num_args), repr(actual_type)) try: exp_types = get_types(location, format_string[1:-1]) # strip leading and trailing " chars if actual_num_args < len(exp_types): raise NotEnoughVars(location, format_string, exp_types, actual_num_args) if actual_num_args > len(exp_types): raise TooManyVars(location, format_string, exp_types, actual_num_args) exp_type = exp_types[index] if not type_equality(exp_type, actual_type): raise MismatchingType(location, format_string, index+1, exp_type, actual_type) except CExtensionError, err: print err if False: print 'validate_types(%s, %s, %s, %s, %s)' % ( repr(location), repr(format_string), repr(index), repr(actual_num_args), repr(actual_type)) return 1 return 0 import unittest class TestArgParsing(unittest.TestCase): def assert_args(self, arg_str, exp_result): result = get_types(None, arg_str) self.assertEquals(result, exp_result) def test_simple_cases(self): self.assert_args('c', ['char *']) def test_socketmodule_socket_htons(self): self.assert_args('i:htons', ['int *']) def test_fcntlmodule_fcntl_flock(self): # FIXME: somewhat broken, we can't know what the converter callback is self.assert_args("O&i:flock", ['int ( PyObject * object , int * target )', 'int *', 'int *']) def test_posixmodule_listdir(self): self.assert_args("et#:listdir", ['const char *', 'char * *', 'int *']) def test_bsddb_DBSequence_set_range(self): self.assert_args("(LL):set_range", ['PY_LONG_LONG *', 'PY_LONG_LONG *']) if __name__ == '__main__': unittest.main()