#!/usr/bin/python # -*- coding: utf-8 -*- # # Copyright © 2010 Red Hat, Inc. # # This software is licensed to you under the GNU Lesser General Public # License, version 2.1 (LGPLv2.1). There is NO WARRANTY for this software, # express or implied, including the implied warranties of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. You should have received a copy of # LGPLv2.1 along with this software; if not, see # http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt # # Red Hat trademarks are not licensed under LGPLv2.1. No permission is # granted to use or replicate Red Hat trademarks that are incorporated in # this software or its documentation. # # Red Hat Author(s): David Hugh Malcolm ''' From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb to be extended with Python code e.g. for library-specific data visualizations, such as for the C++ STL types. This python module deals with the case when the process being debugged (the "inferior process" in gdb parlance) is itself python, or more specifically, linked against libpython. In this situation, almost every item of data is a (PyObject*), and having the debugger merely print their addresses is not very enlightening. This module embeds knowledge about the implementation details of libpython so that we can emit useful visualizations e.g. a string, a list, a dict, a frame giving file/line information and the state of local variables In particular, given a gdb.Value corresponding to a PyObject* in the inferior process, we can generate a "proxy value" within the gdb process. For example, given a PyObject* in the inferior process that is in fact a PyListObject* holding three PyObject* that turn out to be PyStringObject* instances, we can generate a proxy value within the gdb process that is a list of strings: ["foo", "bar", "baz"] We try to defer all gdb.lookup_type() invocations until as late as possible: when the /usr/bin/python process starts in the debugger, the libpython.so hasn't been dynamically loaded yet, so none of the type names are known to the debugger TODO: - currently only tested against libpython2.6; make it work with libpython3.1 as well ''' import gdb class NullPyObjectPtr(RuntimeError): pass class PyObjectPtr(object): """ Class wrapping a gdb.Value that's a either a (PyObject*) within the inferior process, or some subclass pointer e.g. (PyStringObject*) There will be a subclass for every refined PyObject type that we care about. Note that at every stage the underlying pointer could be NULL, point to corrupt data, etc; this is the debugger, after all. """ def __init__(self, gdbval, cast_to = None): if cast_to: self._gdbval = gdbval.cast(cast_to) else: self._gdbval = gdbval def field(self, name): if self.is_null(): raise NullPyObjectPtr(self) return self._gdbval.dereference()[name] def type(self): return PyTypeObjectPtr(self.field('ob_type')) def is_null(self): return 0 == long(self._gdbval) def proxyval(self): ''' Scrape a value from the inferior process, and try to represent it within the gdb process, whilst (hopefully) avoiding crashes when the remote data is corrupt. Derived classes will override this. For example, a PyIntObject* with ob_ival 42 in the inferior process should result in an int(42) in this process. ''' class FakeRepr(object): """ Class representing a non-descript PyObject* value in the inferior process for when we don't have a custom scraper, intended to have a sane repr(). """ def __init__(self, tp_name, address): self.tp_name = tp_name self.address = address def __repr__(self): return '<%s at remote 0x%x>' % (self.tp_name, self.address) try: tp_name = self.type().field('tp_name').string() except NullPyObjectPtr: # NULL tp_name? tp_name = 'unknown' return FakeRepr(tp_name, long(self._gdbval)) @classmethod def from_pyobject_ptr(cls, gdbval): # Try to locate the appropriate derived class dynamically, and cast # the pointer accordingly: try: p = PyObjectPtr(gdbval) t = p.type() tp_name = t.field('tp_name').string() name_map = {'dict': PyDictObjectPtr, 'int': PyIntObjectPtr, 'list': PyListObjectPtr, 'str': PyStringObjectPtr, 'tuple': PyTupleObjectPtr, 'frame': PyFrameObjectPtr, } if tp_name in name_map: cls = name_map[tp_name] return cls(gdbval, cast_to=cls.get_gdb_type()) except RuntimeError: # Handle any kind of error e.g. NULL ptrs by simply using the base # class pass return cls(gdbval) @classmethod def get_gdb_type(cls): return gdb.lookup_type(cls._typename).pointer() class PyCodeObjectPtr(PyObjectPtr): """ Class wrapping a gdb.Value that's a PyCodeObject* i.e. a instance within the process being debugged. """ _typename = 'PyCodeObject' class PyDictObjectPtr(PyObjectPtr): """ Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance within the process being debugged. """ _typename = 'PyDictObject' def proxyval(self): result = {} for i in xrange(self.field('ma_mask')): ep = self.field('ma_table') + i pvalue = PyObjectPtr.from_pyobject_ptr(ep['me_value']) if not pvalue.is_null(): pkey = PyObjectPtr.from_pyobject_ptr(ep['me_key']) result[pkey.proxyval()] = pvalue.proxyval() return result class PyIntObjectPtr(PyObjectPtr): _typename = 'PyIntObject' class PyListObjectPtr(PyObjectPtr): _typename = 'PyListObject' def __getitem__(self, i): # Get the gdb.Value for the (PyObject*) with the given index: field_ob_item = self.field('ob_item') return field_ob_item[i] def proxyval(self): result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval() for i in range(int_from_int(self.field('ob_size')))] return result class PyFrameObjectPtr(PyObjectPtr): _typename = 'PyFrameObject' def __str__(self): print 'bar' fi = FrameInfo(self) return str(fi) class PyStringObjectPtr(PyObjectPtr): _typename = 'PyStringObject' def __str__(self): # Lookup the gdb.Type for "char*" _type_char_ptr = gdb.lookup_type('char').pointer() field_ob_sval = self.field('ob_sval') char_ptr = field_ob_sval.address.cast(_type_char_ptr) return char_ptr.string() def proxyval(self): return str(self) class PyTupleObjectPtr(PyObjectPtr): _typename = 'PyTupleObject' def __getitem__(self, i): # Get the gdb.Value for the (PyObject*) with the given index: field_ob_item = self.field('ob_item') return field_ob_item[i] def proxyval(self): result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval() for i in range(int_from_int(self.field('ob_size')))]) return result class PyTypeObjectPtr(PyObjectPtr): _typename = 'PyTypeObject' def int_from_int(gdbval): return int(str(gdbval)) class FrameInfo: ''' Class representing all of the information we can scrape about a PyFrameObject* ''' def __init__(self, fval): self.fval = fval self.co = PyCodeObjectPtr.from_pyobject_ptr(fval.field('f_code')) self.co_name = PyStringObjectPtr.from_pyobject_ptr(self.co.field('co_name')) self.co_filename = PyStringObjectPtr.from_pyobject_ptr(self.co.field('co_filename')) self.f_lineno = int_from_int(fval.field('f_lineno')) self.co_nlocals = int_from_int(self.co.field('co_nlocals')) self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames')) self.locals = [] # list of kv pairs f_localsplus = self.fval.field('f_localsplus') for i in xrange(min(self.co_nlocals, 200)): # arbitrary upper sanity limit in case co_nlocals is corrupt #print 'i=%i' % i value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i]) if not value.is_null(): name = PyStringObjectPtr.from_pyobject_ptr(self.co_varnames[i]) #print 'name=%s' % name value = value.proxyval() #print 'value=%s' % value self.locals.append((str(name), value)) def __str__(self): return ('File %s, line %i, in %s (%s)' % (self.co_filename, self.f_lineno, self.co_name, ', '.join(['%s=%s' % (k, repr(v)) for k, v in self.locals])) ) def pyframe(): fval = PyFrameObjectPtr.from_pyobject_ptr(gdb.selected_frame().read_var('f')) #.dereference() fi = FrameInfo(fval) print fi def mybt(): def print_EvalFrameEx(gdbframe): try: f = gdbframe.read_var('f') except ValueError: print '(unable to print python frame, could not access "f")', return try: fval = PyFrameObjectPtr.from_pyobject_ptr(f) #.dereference() fi = FrameInfo(fval) print fi, except RuntimeError: print '(unable to print python frame; corrupt data?)', for i, gdbframe in enumerate(gdb.selected_thread().frames()): #print dir(gdbframe), gdbframe.name() print '#%i' % i, if 'PyEval_EvalFrameEx' == gdbframe.name(): print_EvalFrameEx(gdbframe) print ' 0x%x in %s' % (gdbframe.pc(), gdbframe.name()) #pyframe() #if gdb.selected_thread(): # mybt() class PyObjectPtrPrinter: "Prints a (PyObject*)" def __init__ (self, gdbval): self.gdbval = gdbval def to_string (self): proxyval = PyObjectPtr.from_pyobject_ptr(self.gdbval).proxyval() return repr(proxyval) class PyFrameObjectPtrPrinter(PyObjectPtrPrinter): "Prints a (PyFrameObject*)" def to_string (self): pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval) fi = FrameInfo(pyop) return str(fi) def pretty_printer_lookup(gdbval): type = gdbval.type.unqualified() if type.code == gdb.TYPE_CODE_PTR: type = type.target().unqualified() t = str(type) if t == "PyObject": return PyObjectPtrPrinter(gdbval) elif t == "PyFrameObject": return PyFrameObjectPtrPrinter(gdbval) gdb.pretty_printers.append(pretty_printer_lookup) """ (gdb) python import sys sys.path.append('/home/david/coding/python-gdb') import libpython reload(libpython) end """ # TODO: # - write actual backtrace # - integrate into backtrace hook