diff options
Diffstat (limited to 'libpython.py')
-rw-r--r-- | libpython.py | 277 |
1 files changed, 191 insertions, 86 deletions
diff --git a/libpython.py b/libpython.py index 1ede863..e994af2 100644 --- a/libpython.py +++ b/libpython.py @@ -1,7 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # -# Copyright © 2010 Red Hat, Inc. +# Copyright © 2010 David Hugh Malcolm # # This software is licensed to you under the GNU Lesser General Public # License, version 2.1 (LGPLv2.1). There is NO WARRANTY for this software, @@ -10,11 +10,7 @@ # LGPLv2.1 along with this software; if not, see # http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt # -# Red Hat trademarks are not licensed under LGPLv2.1. No permission is -# granted to use or replicate Red Hat trademarks that are incorporated in -# this software or its documentation. -# -# Red Hat Author(s): David Hugh Malcolm <dmalcolm@redhat.com> +# Author: Dave Malcolm <dmalcolm@redhat.com> ''' From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb to be extended with Python code e.g. for library-specific data visualizations, @@ -49,30 +45,43 @@ TODO: better handling of "instance" import gdb +# Look up the gdb.Type for some standard types: +_type_char_ptr = gdb.lookup_type('char').pointer() # char* +_type_void_ptr = gdb.lookup_type('void').pointer() # void* +_type_size_t = gdb.lookup_type('size_t') + +SIZEOF_VOID_P = _type_void_ptr.sizeof + + +Py_TPFLAGS_HEAPTYPE = (1L << 9) + +Py_TPFLAGS_INT_SUBCLASS = (1L << 23) +Py_TPFLAGS_LONG_SUBCLASS = (1L << 24) +Py_TPFLAGS_LIST_SUBCLASS = (1L << 25) +Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26) +Py_TPFLAGS_STRING_SUBCLASS = (1L << 27) +Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28) +Py_TPFLAGS_DICT_SUBCLASS = (1L << 29) +Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30) +Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31) + + class NullPyObjectPtr(RuntimeError): pass + def safety_limit(val): # Given a integer value from the process being debugged, limit it to some # safety threshold so that arbitrary breakage within said process doesn't # break the gdb process too much (e.g. sizes of iterations, sizes of lists) return min(val, 100) + def safe_range(val): # As per range, but don't trust the value too much: cap it to a safety # threshold in case the data was corrupted return xrange(safety_limit(val)) -def is_py3k(): - # This code assumes that a libpython's DWARF data has actually been - # loaded by the point that this function is called - sym = gdb.lookup_symbol('PyBytes_Type')[0] - if sym: - #...then PyBytes_Type exists, assume this is libpython3.* - return True - else: - #...then PyBytes_Type doesn't exist, assume this is libpython2.* - return False class PyObjectPtr(object): """ @@ -87,7 +96,7 @@ class PyObjectPtr(object): """ _typename = 'PyObject' - def __init__(self, gdbval, cast_to = None): + def __init__(self, gdbval, cast_to=None): if cast_to: self._gdbval = gdbval.cast(cast_to) else: @@ -100,10 +109,10 @@ class PyObjectPtr(object): Various libpython types are defined using the "PyObject_HEAD" and "PyObject_VAR_HEAD" macros. - - In Python 2, this these are defined so that "ob_type" and (for a var + + In Python 2, this these are defined so that "ob_type" and (for a var object) "ob_size" are fields of the type in question. - + In Python 3, this is defined as an embedded PyVarObject type thus: PyVarObject ob_base; so that the "ob_size" field is located insize the "ob_base" field, and @@ -113,8 +122,9 @@ class PyObjectPtr(object): raise NullPyObjectPtr(self) if name == 'ob_type': - return self._gdbval.cast(PyObjectPtr.get_gdb_type()).dereference()[name] - + pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type()) + return pyo_ptr.dereference()[name] + if name == 'ob_size': try: # Python 2: @@ -122,7 +132,7 @@ class PyObjectPtr(object): except RuntimeError: # Python 3: return self._gdbval.dereference()['ob_base'][name] - + # General case: look it up inside the object: return self._gdbval.dereference()[name] @@ -132,6 +142,16 @@ class PyObjectPtr(object): def is_null(self): return 0 == long(self._gdbval) + def safe_tp_name(self): + try: + return self.type().field('tp_name').string() + except NullPyObjectPtr: + # NULL tp_name? + return 'unknown' + except RuntimeError: + # Can't even read the object at all? + return 'unknown' + def proxyval(self): ''' Scrape a value from the inferior process, and try to represent it @@ -143,12 +163,14 @@ class PyObjectPtr(object): For example, a PyIntObject* with ob_ival 42 in the inferior process should result in an int(42) in this process. ''' + class FakeRepr(object): """ Class representing a non-descript PyObject* value in the inferior process for when we don't have a custom scraper, intended to have a sane repr(). """ + def __init__(self, tp_name, address): self.tp_name = tp_name self.address = address @@ -156,43 +178,70 @@ class PyObjectPtr(object): def __repr__(self): return '<%s at remote 0x%x>' % (self.tp_name, self.address) - try: - tp_name = self.type().field('tp_name').string() - except NullPyObjectPtr: - # NULL tp_name? - tp_name = 'unknown' - except RuntimeError: - # Can't even read the object at all? - tp_name = 'unknown' - return FakeRepr(tp_name, + return FakeRepr(self.safe_tp_name(), long(self._gdbval)) @classmethod - def subclass_for_tp_name(cls, tp_name): - if tp_name == 'str': - if is_py3k(): - return PyUnicodeObjectPtr - else: - return PyStringObjectPtr - if tp_name == 'int': - if is_py3k(): - return PyLongObjectPtr - else: - return PyIntObjectPtr - - name_map = {'bool' : PyBoolObjectPtr, + def subclass_from_type(cls, t): + ''' + Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a + (PyTypeObject*), determine the corresponding subclass of PyObjectPtr + to use + + Ideally, we would look up the symbols for the global types, but that + isn't working yet: + (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value + Traceback (most recent call last): + File "<string>", line 1, in <module> + NotImplementedError: Symbol type not yet supported in Python scripts. + Error while executing Python code. + + For now, we use tp_flags, after doing some string comparisons on the + tp_name for some special-cases that don't seem to be visible through + flags + ''' + try: + tp_name = t.field('tp_name').string() + tp_flags = int(t.field('tp_flags')) + except RuntimeError: + # Handle any kind of error e.g. NULL ptrs by simply using the base + # class + return cls + + #print 'tp_flags = 0x%08x' % tp_flags + #print 'tp_name = %r' % tp_name + + name_map = {'bool': PyBoolObjectPtr, 'classobj': PyClassObjectPtr, - 'dict': PyDictObjectPtr, 'instance': PyInstanceObjectPtr, - 'list': PyListObjectPtr, - 'long': PyLongObjectPtr, 'NoneType': PyNoneStructPtr, - 'tuple': PyTupleObjectPtr, 'frame': PyFrameObjectPtr, - 'unicode': PyUnicodeObjectPtr, } if tp_name in name_map: return name_map[tp_name] + + if tp_flags & Py_TPFLAGS_HEAPTYPE: + return HeapTypeObjectPtr + + if tp_flags & Py_TPFLAGS_INT_SUBCLASS: + return PyIntObjectPtr + if tp_flags & Py_TPFLAGS_LONG_SUBCLASS: + return PyLongObjectPtr + if tp_flags & Py_TPFLAGS_LIST_SUBCLASS: + return PyListObjectPtr + if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: + return PyTupleObjectPtr + if tp_flags & Py_TPFLAGS_STRING_SUBCLASS: + return PyStringObjectPtr + if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: + return PyUnicodeObjectPtr + if tp_flags & Py_TPFLAGS_DICT_SUBCLASS: + return PyDictObjectPtr + #if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS: + # return something + #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS: + # return PyTypeObjectPtr + # Use the base class: return cls @@ -200,26 +249,11 @@ class PyObjectPtr(object): def from_pyobject_ptr(cls, gdbval): ''' Try to locate the appropriate derived class dynamically, and cast - the pointer accordingly: - For now, we just do string comparison on the tp_name - Other approaches: - (i) look up the symbols for the global types, but that isn't working yet: - (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value - Traceback (most recent call last): - File "<string>", line 1, in <module> - NotImplementedError: Symbol type not yet supported in Python scripts. - Error while executing Python code. - (ii) look at tp_flags, looking e.g. for Py_TPFLAGS_LIST_SUBCLASS however - this would rely on the values of those flags. - - So we go with the simple approach of looking at tp_name + the pointer accordingly. ''' - # try: p = PyObjectPtr(gdbval) - t = p.type() - tp_name = t.field('tp_name').string() - cls = cls.subclass_for_tp_name(tp_name) + cls = cls.subclass_from_type(p.type()) return cls(gdbval, cast_to=cls.get_gdb_type()) except RuntimeError: # Handle any kind of error e.g. NULL ptrs by simply using the base @@ -231,6 +265,68 @@ class PyObjectPtr(object): def get_gdb_type(cls): return gdb.lookup_type(cls._typename).pointer() + +class InstanceProxy(object): + + def __init__(self, cl_name, attrdict, address): + self.cl_name = cl_name + self.attrdict = attrdict + self.address = address + + def __repr__(self): + kwargs = ', '.join(["%s=%r" % (arg, val) + for arg, val in self.attrdict.iteritems()]) + return '<%s(%s) at remote 0x%x>' % (self.cl_name, + kwargs, self.address) + + +def _PyObject_VAR_SIZE(typeobj, nitems): + return ( ( typeobj.field('tp_basicsize') + + nitems * typeobj.field('tp_itemsize') + + (SIZEOF_VOID_P - 1) + ) & ~(SIZEOF_VOID_P - 1) + ).cast(_type_size_t) + +class HeapTypeObjectPtr(PyObjectPtr): + _typename = 'PyObject' + + def proxyval(self): + ''' + Support for new-style classes. + + Currently we just locate the dictionary using _PyObject_GetDictPtr, + ignoring descriptors + ''' + attr_dict = {} + + try: + typeobj = self.type() + dictoffset = int_from_int(typeobj.field('tp_dictoffset')) + if dictoffset != 0: + if dictoffset < 0: + type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer() + tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size']) + if tsize < 0: + tsize = -tsize + size = _PyObject_VAR_SIZE(typeobj, tsize) + dictoffset += size + assert dictoffset > 0 + assert dictoffset % SIZEOF_VOID_P == 0 + + dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset + PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer() + dictptr = dictptr.cast(PyObjectPtrPtr) + attr_dict = PyObjectPtr.from_pyobject_ptr(dictptr.dereference()).proxyval() + except RuntimeError: + # Corrupt data somewhere; fail safe + pass + + tp_name = self.safe_tp_name() + + # New-style class: + return InstanceProxy(tp_name, attr_dict, long(self._gdbval)) + + class PyBoolObjectPtr(PyObjectPtr): """ Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two @@ -244,6 +340,7 @@ class PyBoolObjectPtr(PyObjectPtr): else: return False + class PyClassObjectPtr(PyObjectPtr): """ Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj> @@ -251,6 +348,7 @@ class PyClassObjectPtr(PyObjectPtr): """ _typename = 'PyClassObject' + class PyCodeObjectPtr(PyObjectPtr): """ Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance @@ -258,6 +356,7 @@ class PyCodeObjectPtr(PyObjectPtr): """ _typename = 'PyCodeObject' + class PyDictObjectPtr(PyObjectPtr): """ Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance @@ -267,7 +366,7 @@ class PyDictObjectPtr(PyObjectPtr): def proxyval(self): result = {} - for i in safe_range(self.field('ma_mask')+1): + for i in safe_range(self.field('ma_mask') + 1): ep = self.field('ma_table') + i pvalue = PyObjectPtr.from_pyobject_ptr(ep['me_value']) if not pvalue.is_null(): @@ -275,20 +374,11 @@ class PyDictObjectPtr(PyObjectPtr): result[pkey.proxyval()] = pvalue.proxyval() return result + class PyInstanceObjectPtr(PyObjectPtr): _typename = 'PyInstanceObject' def proxyval(self): - class InstanceProxy(object): - def __init__(self, cl_name, attrdict, address): - self.cl_name = cl_name - self.attrdict = attrdict - self.address = address - - def __repr__(self): - kwargs = ', '.join(["%s=%r"%(arg, val) for arg, val in self.attrdict.iteritems()]) - return '<%s(%s) at remote 0x%x>' % (self.cl_name, kwargs, self.address) - # Get name of class: in_class = PyObjectPtr.from_pyobject_ptr(self.field('in_class')) cl_name = PyObjectPtr.from_pyobject_ptr(in_class.field('cl_name')).proxyval() @@ -296,8 +386,10 @@ class PyInstanceObjectPtr(PyObjectPtr): # Get dictionary of instance attributes: in_dict = PyObjectPtr.from_pyobject_ptr(self.field('in_dict')).proxyval() + # Old-style class: return InstanceProxy(cl_name, in_dict, long(self._gdbval)) + class PyIntObjectPtr(PyObjectPtr): _typename = 'PyIntObject' @@ -305,6 +397,7 @@ class PyIntObjectPtr(PyObjectPtr): result = int_from_int(self.field('ob_ival')) return result + class PyListObjectPtr(PyObjectPtr): _typename = 'PyListObject' @@ -318,6 +411,7 @@ class PyListObjectPtr(PyObjectPtr): for i in safe_range(int_from_int(self.field('ob_size')))] return result + class PyLongObjectPtr(PyObjectPtr): _typename = 'PyLongObject' @@ -329,7 +423,7 @@ class PyLongObjectPtr(PyObjectPtr): digit ob_digit[1]; }; - with this description: + with this description: The absolute value of a number is equal to SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i) Negative numbers are represented with ob_size < 0; @@ -351,34 +445,37 @@ class PyLongObjectPtr(PyObjectPtr): # FIXME: I haven't yet tested this case SHIFT = 30L - digits = [long(ob_digit[i]) * 2**(SHIFT*i) for i in safe_range(abs(ob_size))] + digits = [long(ob_digit[i]) * 2**(SHIFT*i) + for i in safe_range(abs(ob_size))] result = sum(digits) if ob_size < 0: result = -result return result + class PyNoneStructPtr(PyObjectPtr): """ Class wrapping a gdb.Value that's a PyObject* pointing to the singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type """ _typename = 'PyObject' + def proxyval(self): return None + class PyFrameObjectPtr(PyObjectPtr): _typename = 'PyFrameObject' + def __str__(self): fi = FrameInfo(self) return str(fi) + class PyStringObjectPtr(PyObjectPtr): _typename = 'PyStringObject' def __str__(self): - # Lookup the gdb.Type for "char*" - _type_char_ptr = gdb.lookup_type('char').pointer() - field_ob_sval = self.field('ob_sval') char_ptr = field_ob_sval.address.cast(_type_char_ptr) return char_ptr.string() @@ -386,6 +483,7 @@ class PyStringObjectPtr(PyObjectPtr): def proxyval(self): return str(self) + class PyTupleObjectPtr(PyObjectPtr): _typename = 'PyTupleObject' @@ -399,9 +497,11 @@ class PyTupleObjectPtr(PyObjectPtr): for i in safe_range(int_from_int(self.field('ob_size')))]) return result + class PyTypeObjectPtr(PyObjectPtr): _typename = 'PyTypeObject' + class PyUnicodeObjectPtr(PyObjectPtr): _typename = 'PyUnicodeObject' @@ -421,9 +521,11 @@ class PyUnicodeObjectPtr(PyObjectPtr): result = u''.join([unichr(ucs) for ucs in Py_UNICODEs]) return result + def int_from_int(gdbval): return int(str(gdbval)) + def stringify(val): # TODO: repr() puts everything on one line; pformat can be nicer, but # can lead to v.long results; this function isolates the choice @@ -433,6 +535,7 @@ def stringify(val): from pprint import pformat return pformat(val) + class FrameInfo: ''' Class representing all of the information we can scrape about a @@ -457,7 +560,7 @@ class FrameInfo: value = value.proxyval() #print 'value=%s' % value self.locals.append((str(name), value)) - + def __str__(self): return ('File %s, line %i, in %s (%s)' % (self.co_filename, @@ -466,6 +569,7 @@ class FrameInfo: ', '.join(['%s=%s' % (k, stringify(v)) for k, v in self.locals])) ) + class PyObjectPtrPrinter: "Prints a (PyObject*)" @@ -476,6 +580,7 @@ class PyObjectPtrPrinter: proxyval = PyObjectPtr.from_pyobject_ptr(self.gdbval).proxyval() return stringify(proxyval) + class PyFrameObjectPtrPrinter(PyObjectPtrPrinter): "Prints a (PyFrameObject*)" @@ -484,6 +589,7 @@ class PyFrameObjectPtrPrinter(PyObjectPtrPrinter): fi = FrameInfo(pyop) return str(fi) + def pretty_printer_lookup(gdbval): type = gdbval.type.unqualified() if type.code == gdb.TYPE_CODE_PTR: @@ -551,8 +657,7 @@ def pybt(): print fi, except RuntimeError: print '(unable to print python frame; corrupt data?)', - - + for i, gdbframe in enumerate(gdb.selected_thread().frames()): #print dir(gdbframe), gdbframe.name() if 'PyEval_EvalFrameEx' == gdbframe.name(): |