From 5e921ad97097af6a01a93feacc40c661bbb3d96d Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 3 Feb 2010 16:22:20 -0500 Subject: Initial commit --- libpython.py | 332 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 332 insertions(+) create mode 100644 libpython.py diff --git a/libpython.py b/libpython.py new file mode 100644 index 0000000..b04f522 --- /dev/null +++ b/libpython.py @@ -0,0 +1,332 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright © 2010 Red Hat, Inc. +# +# This software is licensed to you under the GNU Lesser General Public +# License, version 2.1 (LGPLv2.1). There is NO WARRANTY for this software, +# express or implied, including the implied warranties of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. You should have received a copy of +# LGPLv2.1 along with this software; if not, see +# http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt +# +# Red Hat trademarks are not licensed under LGPLv2.1. No permission is +# granted to use or replicate Red Hat trademarks that are incorporated in +# this software or its documentation. +# +# Red Hat Author(s): David Hugh Malcolm +''' +From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb +to be extended with Python code e.g. for library-specific data visualizations, +such as for the C++ STL types. + +This python module deals with the case when the process being debugged (the +"inferior process" in gdb parlance) is itself python, or more specifically, +linked against libpython. In this situation, almost every item of data is a +(PyObject*), and having the debugger merely print their addresses is not very +enlightening. + +This module embeds knowledge about the implementation details of libpython so +that we can emit useful visualizations e.g. a string, a list, a dict, a frame +giving file/line information and the state of local variables + +In particular, given a gdb.Value corresponding to a PyObject* in the inferior +process, we can generate a "proxy value" within the gdb process. For example, +given a PyObject* in the inferior process that is in fact a PyListObject* +holding three PyObject* that turn out to be PyStringObject* instances, we can +generate a proxy value within the gdb process that is a list of strings: + ["foo", "bar", "baz"] + +We try to defer all gdb.lookup_type() invocations until as late as possible: +when the /usr/bin/python process starts in the debugger, the libpython.so +hasn't been dynamically loaded yet, so none of the type names are known to +the debugger + +TODO: + - currently only tested against libpython2.6; make it work with libpython3.1 as well +''' + +import gdb + +class NullPyObjectPtr(RuntimeError): + pass + +class PyObjectPtr(object): + """ + Class wrapping a gdb.Value that's a either a (PyObject*) within the + inferior process, or some subclass pointer e.g. (PyStringObject*) + + There will be a subclass for every refined PyObject type that we care + about. + + Note that at every stage the underlying pointer could be NULL, point + to corrupt data, etc; this is the debugger, after all. + """ + def __init__(self, gdbval, cast_to = None): + if cast_to: + self._gdbval = gdbval.cast(cast_to) + else: + self._gdbval = gdbval + + def field(self, name): + if self.is_null(): + raise NullPyObjectPtr(self) + return self._gdbval.dereference()[name] + + def type(self): + return PyTypeObjectPtr(self.field('ob_type')) + + def is_null(self): + return 0 == long(self._gdbval) + + def proxyval(self): + ''' + Scrape a value from the inferior process, and try to represent it + within the gdb process, whilst (hopefully) avoiding crashes when + the remote data is corrupt. + + Derived classes will override this. + + For example, a PyIntObject* with ob_ival 42 in the inferior process + should result in an int(42) in this process. + ''' + class FakeRepr(object): + """ + Class representing a non-descript PyObject* value in the inferior + process for when we don't have a custom scraper, intended to have + a sane repr(). + """ + def __init__(self, tp_name, address): + self.tp_name = tp_name + self.address = address + + def __repr__(self): + return '<%s at remote 0x%x>' % (self.tp_name, self.address) + + try: + tp_name = self.type().field('tp_name').string() + except NullPyObjectPtr: + # NULL tp_name? + tp_name = 'unknown' + return FakeRepr(tp_name, + long(self._gdbval)) + + @classmethod + def from_pyobject_ptr(cls, gdbval): + # Try to locate the appropriate derived class dynamically, and cast + # the pointer accordingly: + try: + p = PyObjectPtr(gdbval) + t = p.type() + tp_name = t.field('tp_name').string() + name_map = {'dict': PyDictObjectPtr, + 'int': PyIntObjectPtr, + 'list': PyListObjectPtr, + 'str': PyStringObjectPtr, + 'tuple': PyTupleObjectPtr, + 'frame': PyFrameObjectPtr, + } + if tp_name in name_map: + cls = name_map[tp_name] + return cls(gdbval, cast_to=cls.get_gdb_type()) + except RuntimeError: + # Handle any kind of error e.g. NULL ptrs by simply using the base + # class + pass + return cls(gdbval) + + @classmethod + def get_gdb_type(cls): + return gdb.lookup_type(cls._typename).pointer() + +class PyCodeObjectPtr(PyObjectPtr): + """ + Class wrapping a gdb.Value that's a PyCodeObject* i.e. a instance + within the process being debugged. + """ + _typename = 'PyCodeObject' + +class PyDictObjectPtr(PyObjectPtr): + """ + Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance + within the process being debugged. + """ + _typename = 'PyDictObject' + + def proxyval(self): + result = {} + for i in xrange(self.field('ma_mask')): + ep = self.field('ma_table') + i + pvalue = PyObjectPtr.from_pyobject_ptr(ep['me_value']) + if not pvalue.is_null(): + pkey = PyObjectPtr.from_pyobject_ptr(ep['me_key']) + result[pkey.proxyval()] = pvalue.proxyval() + return result + +class PyIntObjectPtr(PyObjectPtr): + _typename = 'PyIntObject' + +class PyListObjectPtr(PyObjectPtr): + _typename = 'PyListObject' + + def __getitem__(self, i): + # Get the gdb.Value for the (PyObject*) with the given index: + field_ob_item = self.field('ob_item') + return field_ob_item[i] + + def proxyval(self): + result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval() + for i in range(int_from_int(self.field('ob_size')))] + return result + +class PyFrameObjectPtr(PyObjectPtr): + _typename = 'PyFrameObject' + def __str__(self): + print 'bar' + fi = FrameInfo(self) + return str(fi) + +class PyStringObjectPtr(PyObjectPtr): + _typename = 'PyStringObject' + + def __str__(self): + # Lookup the gdb.Type for "char*" + _type_char_ptr = gdb.lookup_type('char').pointer() + + field_ob_sval = self.field('ob_sval') + char_ptr = field_ob_sval.address.cast(_type_char_ptr) + return char_ptr.string() + + def proxyval(self): + return str(self) + +class PyTupleObjectPtr(PyObjectPtr): + _typename = 'PyTupleObject' + + def __getitem__(self, i): + # Get the gdb.Value for the (PyObject*) with the given index: + field_ob_item = self.field('ob_item') + return field_ob_item[i] + + def proxyval(self): + result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval() + for i in range(int_from_int(self.field('ob_size')))]) + return result + +class PyTypeObjectPtr(PyObjectPtr): + _typename = 'PyTypeObject' + +def int_from_int(gdbval): + return int(str(gdbval)) + +class FrameInfo: + ''' + Class representing all of the information we can scrape about a + PyFrameObject* + ''' + def __init__(self, fval): + self.fval = fval + self.co = PyCodeObjectPtr.from_pyobject_ptr(fval.field('f_code')) + self.co_name = PyStringObjectPtr.from_pyobject_ptr(self.co.field('co_name')) + self.co_filename = PyStringObjectPtr.from_pyobject_ptr(self.co.field('co_filename')) + self.f_lineno = int_from_int(fval.field('f_lineno')) + self.co_nlocals = int_from_int(self.co.field('co_nlocals')) + self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames')) + self.locals = [] # list of kv pairs + f_localsplus = self.fval.field('f_localsplus') + for i in xrange(min(self.co_nlocals, 200)): # arbitrary upper sanity limit in case co_nlocals is corrupt + #print 'i=%i' % i + value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i]) + if not value.is_null(): + name = PyStringObjectPtr.from_pyobject_ptr(self.co_varnames[i]) + #print 'name=%s' % name + value = value.proxyval() + #print 'value=%s' % value + self.locals.append((str(name), value)) + + def __str__(self): + return ('File %s, line %i, in %s (%s)' + % (self.co_filename, + self.f_lineno, + self.co_name, + ', '.join(['%s=%s' % (k, repr(v)) for k, v in self.locals])) + ) + + +def pyframe(): + fval = PyFrameObjectPtr.from_pyobject_ptr(gdb.selected_frame().read_var('f')) #.dereference() + fi = FrameInfo(fval) + print fi + +def mybt(): + def print_EvalFrameEx(gdbframe): + try: + f = gdbframe.read_var('f') + except ValueError: + print '(unable to print python frame, could not access "f")', + return + + try: + fval = PyFrameObjectPtr.from_pyobject_ptr(f) #.dereference() + fi = FrameInfo(fval) + print fi, + except RuntimeError: + print '(unable to print python frame; corrupt data?)', + + + for i, gdbframe in enumerate(gdb.selected_thread().frames()): + #print dir(gdbframe), gdbframe.name() + print '#%i' % i, + if 'PyEval_EvalFrameEx' == gdbframe.name(): + print_EvalFrameEx(gdbframe) + print ' 0x%x in %s' % (gdbframe.pc(), gdbframe.name()) + +#pyframe() +#if gdb.selected_thread(): +# mybt() + +class PyObjectPtrPrinter: + "Prints a (PyObject*)" + + def __init__ (self, gdbval): + self.gdbval = gdbval + + def to_string (self): + proxyval = PyObjectPtr.from_pyobject_ptr(self.gdbval).proxyval() + return repr(proxyval) + +class PyFrameObjectPtrPrinter(PyObjectPtrPrinter): + "Prints a (PyFrameObject*)" + + def to_string (self): + pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval) + fi = FrameInfo(pyop) + return str(fi) + +def pretty_printer_lookup(gdbval): + type = gdbval.type.unqualified() + if type.code == gdb.TYPE_CODE_PTR: + type = type.target().unqualified() + t = str(type) + if t == "PyObject": + return PyObjectPtrPrinter(gdbval) + elif t == "PyFrameObject": + return PyFrameObjectPtrPrinter(gdbval) + + +gdb.pretty_printers.append(pretty_printer_lookup) + +""" +(gdb) python + +import sys +sys.path.append('/home/david/coding/python-gdb') +import libpython +reload(libpython) + +end +""" + +# TODO: +# - write actual backtrace +# - integrate into backtrace hook -- cgit