From e83edc5519ed716142f4695ba792332eb19df861 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Thu, 4 Feb 2010 12:31:07 -0500 Subject: Generalize to support Python 3 as well as Python 2; implement handlers for PyLongObject and PyUnicodeObject --- libpython.py | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 153 insertions(+), 18 deletions(-) diff --git a/libpython.py b/libpython.py index c42e0fd..8e3e0f5 100644 --- a/libpython.py +++ b/libpython.py @@ -42,8 +42,9 @@ when the /usr/bin/python process starts in the debugger, the libpython.so hasn't been dynamically loaded yet, so none of the type names are known to the debugger -TODO: - - currently only tested against libpython2.6; make it work with libpython3.1 as well +Tested with both libpython2.6 and libpython3.1 + +TODO: better handling of "instance" ''' import gdb @@ -51,6 +52,17 @@ import gdb class NullPyObjectPtr(RuntimeError): pass +def is_py3k(): + # This code assumes that a libpython's DWARF data has actually been + # loaded by the point that this function is called + sym = gdb.lookup_symbol('PyBytes_Type')[0] + if sym: + #...then PyBytes_Type exists, assume this is libpython3.* + return True + else: + #...then PyBytes_Type doesn't exist, assume this is libpython2.* + return False + class PyObjectPtr(object): """ Class wrapping a gdb.Value that's a either a (PyObject*) within the @@ -62,6 +74,8 @@ class PyObjectPtr(object): Note that at every stage the underlying pointer could be NULL, point to corrupt data, etc; this is the debugger, after all. """ + _typename = 'PyObject' + def __init__(self, gdbval, cast_to = None): if cast_to: self._gdbval = gdbval.cast(cast_to) @@ -69,8 +83,36 @@ class PyObjectPtr(object): self._gdbval = gdbval def field(self, name): + ''' + Get the gdb.Value for the given field within the PyObject, coping with + some python 2 versus python 3 differences. + + Various libpython types are defined using the "PyObject_HEAD" and + "PyObject_VAR_HEAD" macros. + + In Python 2, this these are defined so that "ob_type" and (for a var + object) "ob_size" are fields of the type in question. + + In Python 3, this is defined as an embedded PyVarObject type thus: + PyVarObject ob_base; + so that the "ob_size" field is located insize the "ob_base" field, and + the "ob_type" is most easily accessed by casting back to a (PyObject*). + ''' if self.is_null(): raise NullPyObjectPtr(self) + + if name == 'ob_type': + return self._gdbval.cast(PyObjectPtr.get_gdb_type()).dereference()[name] + + if name == 'ob_size': + try: + # Python 2: + return self._gdbval.dereference()[name] + except RuntimeError: + # Python 3: + return self._gdbval.dereference()['ob_base'][name] + + # General case: look it up inside the object: return self._gdbval.dereference()[name] def type(self): @@ -111,24 +153,56 @@ class PyObjectPtr(object): return FakeRepr(tp_name, long(self._gdbval)) + @classmethod + def subclass_for_tp_name(cls, tp_name): + if tp_name == 'str': + if is_py3k(): + return PyUnicodeObjectPtr + else: + return PyStringObjectPtr + if tp_name == 'int': + if is_py3k(): + return PyLongObjectPtr + else: + return PyIntObjectPtr + + name_map = {'dict': PyDictObjectPtr, + 'list': PyListObjectPtr, + 'long': PyLongObjectPtr, + 'tuple': PyTupleObjectPtr, + 'frame': PyFrameObjectPtr, + 'unicode': PyUnicodeObjectPtr, + } + if tp_name in name_map: + return name_map[tp_name] + # Use the base class: + return cls + @classmethod def from_pyobject_ptr(cls, gdbval): - # Try to locate the appropriate derived class dynamically, and cast - # the pointer accordingly: + ''' + Try to locate the appropriate derived class dynamically, and cast + the pointer accordingly: + For now, we just do string comparison on the tp_name + Other approaches: + (i) look up the symbols for the global types, but that isn't working yet: + (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value + Traceback (most recent call last): + File "", line 1, in + NotImplementedError: Symbol type not yet supported in Python scripts. + Error while executing Python code. + (ii) look at tp_flags, looking e.g. for Py_TPFLAGS_LIST_SUBCLASS however + this would rely on the values of those flags. + + So we go with the simple approach of looking at tp_name + ''' + # try: p = PyObjectPtr(gdbval) t = p.type() tp_name = t.field('tp_name').string() - name_map = {'dict': PyDictObjectPtr, - 'int': PyIntObjectPtr, - 'list': PyListObjectPtr, - 'str': PyStringObjectPtr, - 'tuple': PyTupleObjectPtr, - 'frame': PyFrameObjectPtr, - } - if tp_name in name_map: - cls = name_map[tp_name] - return cls(gdbval, cast_to=cls.get_gdb_type()) + cls = cls.subclass_for_tp_name(tp_name) + return cls(gdbval, cast_to=cls.get_gdb_type()) except RuntimeError: # Handle any kind of error e.g. NULL ptrs by simply using the base # class @@ -166,6 +240,10 @@ class PyDictObjectPtr(PyObjectPtr): class PyIntObjectPtr(PyObjectPtr): _typename = 'PyIntObject' + def proxyval(self): + result = int_from_int(self.field('ob_ival')) + return result + class PyListObjectPtr(PyObjectPtr): _typename = 'PyListObject' @@ -179,10 +257,48 @@ class PyListObjectPtr(PyObjectPtr): for i in range(int_from_int(self.field('ob_size')))] return result +class PyLongObjectPtr(PyObjectPtr): + _typename = 'PyLongObject' + + def proxyval(self): + ''' + Python's Include/longobjrep.h has this declaration: + struct _longobject { + PyObject_VAR_HEAD + digit ob_digit[1]; + }; + + with this description: + The absolute value of a number is equal to + SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i) + Negative numbers are represented with ob_size < 0; + zero is represented by ob_size == 0. + + where SHIFT can be either: + #define PyLong_SHIFT 30 + #define PyLong_SHIFT 15 + ''' + ob_size = long(self.field('ob_size')) + if ob_size == 0: + return 0L + + ob_digit = self.field('ob_digit') + + if gdb.lookup_type('digit').sizeof == 2: + SHIFT = 15L + else: + # FIXME: I haven't yet tested this case + SHIFT = 30L + + digits = [long(ob_digit[i]) * 2**(SHIFT*i) for i in xrange(abs(ob_size))] + result = sum(digits) + if ob_size < 0: + result = -result + return result + class PyFrameObjectPtr(PyObjectPtr): _typename = 'PyFrameObject' def __str__(self): - print 'bar' fi = FrameInfo(self) return str(fi) @@ -216,6 +332,25 @@ class PyTupleObjectPtr(PyObjectPtr): class PyTypeObjectPtr(PyObjectPtr): _typename = 'PyTypeObject' +class PyUnicodeObjectPtr(PyObjectPtr): + _typename = 'PyUnicodeObject' + + def proxyval(self): + # From unicodeobject.h: + # Py_ssize_t length; /* Length of raw Unicode data in buffer */ + # Py_UNICODE *str; /* Raw Unicode buffer */ + field_length = long(self.field('length')) + field_str = self.field('str') + + # Gather a list of ints from the Py_UNICODE array; these are either + # UCS-2 or UCS-4 code points: + Py_UNICODEs = [int(field_str[i]) for i in xrange(field_length)] + + # Convert the int code points to unicode characters, and generate a + # local unicode instance: + result = u''.join([unichr(ucs) for ucs in Py_UNICODEs]) + return result + def int_from_int(gdbval): return int(str(gdbval)) @@ -236,8 +371,8 @@ class FrameInfo: def __init__(self, fval): self.fval = fval self.co = PyCodeObjectPtr.from_pyobject_ptr(fval.field('f_code')) - self.co_name = PyStringObjectPtr.from_pyobject_ptr(self.co.field('co_name')) - self.co_filename = PyStringObjectPtr.from_pyobject_ptr(self.co.field('co_filename')) + self.co_name = PyObjectPtr.from_pyobject_ptr(self.co.field('co_name')) + self.co_filename = PyObjectPtr.from_pyobject_ptr(self.co.field('co_filename')) self.f_lineno = int_from_int(fval.field('f_lineno')) self.co_nlocals = int_from_int(self.co.field('co_nlocals')) self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames')) @@ -247,7 +382,7 @@ class FrameInfo: #print 'i=%i' % i value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i]) if not value.is_null(): - name = PyStringObjectPtr.from_pyobject_ptr(self.co_varnames[i]) + name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i]) #print 'name=%s' % name value = value.proxyval() #print 'value=%s' % value -- cgit