summaryrefslogtreecommitdiffstats
path: root/libpython.py
blob: c42e0fd36a3409286691490ad4fe2121242c17e5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright © 2010 Red Hat, Inc.
#
# This software is licensed to you under the GNU Lesser General Public
# License, version 2.1 (LGPLv2.1). There is NO WARRANTY for this software,
# express or implied, including the implied warranties of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. You should have received a copy of
# LGPLv2.1 along with this software; if not, see
# http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
#
# Red Hat trademarks are not licensed under LGPLv2.1. No permission is
# granted to use or replicate Red Hat trademarks that are incorporated in
# this software or its documentation.
# 
# Red Hat Author(s): David Hugh Malcolm <dmalcolm@redhat.com>
'''
From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
to be extended with Python code e.g. for library-specific data visualizations,
such as for the C++ STL types.

This python module deals with the case when the process being debugged (the
"inferior process" in gdb parlance) is itself python, or more specifically,
linked against libpython.  In this situation, almost every item of data is a
(PyObject*), and having the debugger merely print their addresses is not very
enlightening.

This module embeds knowledge about the implementation details of libpython so
that we can emit useful visualizations e.g. a string, a list, a dict, a frame
giving file/line information and the state of local variables

In particular, given a gdb.Value corresponding to a PyObject* in the inferior
process, we can generate a "proxy value" within the gdb process.  For example,
given a PyObject* in the inferior process that is in fact a PyListObject*
holding three PyObject* that turn out to be PyStringObject* instances, we can
generate a proxy value within the gdb process that is a list of strings:
  ["foo", "bar", "baz"]

We try to defer all gdb.lookup_type() invocations until as late as possible:
when the /usr/bin/python process starts in the debugger, the libpython.so
hasn't been dynamically loaded yet, so none of the type names are known to
the debugger

TODO:
  - currently only tested against libpython2.6; make it work with libpython3.1 as well
'''

import gdb

class NullPyObjectPtr(RuntimeError):
    pass

class PyObjectPtr(object):
    """
    Class wrapping a gdb.Value that's a either a (PyObject*) within the
    inferior process, or some subclass pointer e.g. (PyStringObject*)

    There will be a subclass for every refined PyObject type that we care
    about.

    Note that at every stage the underlying pointer could be NULL, point
    to corrupt data, etc; this is the debugger, after all.
    """
    def __init__(self, gdbval, cast_to = None):
        if cast_to:
                self._gdbval = gdbval.cast(cast_to)
        else:
            self._gdbval = gdbval

    def field(self, name):
        if self.is_null():
            raise NullPyObjectPtr(self)
        return self._gdbval.dereference()[name]

    def type(self):
        return PyTypeObjectPtr(self.field('ob_type'))

    def is_null(self):
        return 0 == long(self._gdbval)

    def proxyval(self):
        '''
        Scrape a value from the inferior process, and try to represent it
        within the gdb process, whilst (hopefully) avoiding crashes when
        the remote data is corrupt.

        Derived classes will override this.

        For example, a PyIntObject* with ob_ival 42 in the inferior process
        should result in an int(42) in this process.
        '''
        class FakeRepr(object):
            """
            Class representing a non-descript PyObject* value in the inferior
            process for when we don't have a custom scraper, intended to have
            a sane repr().
            """
            def __init__(self, tp_name, address):
                self.tp_name = tp_name
                self.address = address

            def __repr__(self):
                return '<%s at remote 0x%x>' % (self.tp_name, self.address)

        try:
            tp_name = self.type().field('tp_name').string()
        except NullPyObjectPtr:
            # NULL tp_name?
            tp_name = 'unknown'
        return FakeRepr(tp_name,
                        long(self._gdbval))

    @classmethod
    def from_pyobject_ptr(cls, gdbval):
        # Try to locate the appropriate derived class dynamically, and cast
        # the pointer accordingly:
        try:
            p = PyObjectPtr(gdbval)
            t = p.type()
            tp_name = t.field('tp_name').string()
            name_map = {'dict': PyDictObjectPtr,
                        'int': PyIntObjectPtr,
                        'list': PyListObjectPtr,
                        'str': PyStringObjectPtr,
                        'tuple': PyTupleObjectPtr,
                        'frame': PyFrameObjectPtr,
                        }
            if tp_name in name_map:
                cls = name_map[tp_name]
                return cls(gdbval, cast_to=cls.get_gdb_type())
        except RuntimeError:
            # Handle any kind of error e.g. NULL ptrs by simply using the base
            # class
            pass
        return cls(gdbval)

    @classmethod
    def get_gdb_type(cls):
        return gdb.lookup_type(cls._typename).pointer()

class PyCodeObjectPtr(PyObjectPtr):
    """
    Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
    within the process being debugged.
    """
    _typename = 'PyCodeObject'

class PyDictObjectPtr(PyObjectPtr):
    """
    Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
    within the process being debugged.
    """
    _typename = 'PyDictObject'

    def proxyval(self):
        result = {}
        for i in xrange(self.field('ma_mask')):
            ep = self.field('ma_table') + i
            pvalue = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
            if not pvalue.is_null():
                pkey = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
                result[pkey.proxyval()] = pvalue.proxyval()
        return result

class PyIntObjectPtr(PyObjectPtr):
    _typename = 'PyIntObject'

class PyListObjectPtr(PyObjectPtr):
    _typename = 'PyListObject'

    def __getitem__(self, i):
        # Get the gdb.Value for the (PyObject*) with the given index:
        field_ob_item = self.field('ob_item')
        return field_ob_item[i]

    def proxyval(self):
        result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval()
                  for i in range(int_from_int(self.field('ob_size')))]
        return result

class PyFrameObjectPtr(PyObjectPtr):
    _typename = 'PyFrameObject'
    def __str__(self):
        print 'bar'
        fi = FrameInfo(self)
        return str(fi)

class PyStringObjectPtr(PyObjectPtr):
    _typename = 'PyStringObject'

    def __str__(self):
        # Lookup the gdb.Type for "char*"
        _type_char_ptr = gdb.lookup_type('char').pointer()

        field_ob_sval = self.field('ob_sval')
        char_ptr = field_ob_sval.address.cast(_type_char_ptr)
        return char_ptr.string()

    def proxyval(self):
        return str(self)

class PyTupleObjectPtr(PyObjectPtr):
    _typename = 'PyTupleObject'

    def __getitem__(self, i):
        # Get the gdb.Value for the (PyObject*) with the given index:
        field_ob_item = self.field('ob_item')
        return field_ob_item[i]

    def proxyval(self):
        result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval()
                        for i in range(int_from_int(self.field('ob_size')))])
        return result

class PyTypeObjectPtr(PyObjectPtr):
    _typename = 'PyTypeObject'

def int_from_int(gdbval):
    return int(str(gdbval))

def stringify(val):
    # TODO: repr() puts everything on one line; pformat can be nicer, but
    # can lead to v.long results; this function isolates the choice
    if True:
        return repr(val)
    else:
        from pprint import pformat
        return pformat(val)

class FrameInfo:
    '''
    Class representing all of the information we can scrape about a
    PyFrameObject*
    '''
    def __init__(self, fval):
        self.fval = fval
        self.co = PyCodeObjectPtr.from_pyobject_ptr(fval.field('f_code'))
        self.co_name = PyStringObjectPtr.from_pyobject_ptr(self.co.field('co_name'))
        self.co_filename = PyStringObjectPtr.from_pyobject_ptr(self.co.field('co_filename'))
        self.f_lineno = int_from_int(fval.field('f_lineno'))
        self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
        self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
        self.locals = [] # list of kv pairs
        f_localsplus = self.fval.field('f_localsplus')
        for i in xrange(min(self.co_nlocals, 200)): # arbitrary upper sanity limit in case co_nlocals is corrupt
            #print 'i=%i' % i
            value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
            if not value.is_null():
                name = PyStringObjectPtr.from_pyobject_ptr(self.co_varnames[i])
                #print 'name=%s' % name
                value = value.proxyval()
                #print 'value=%s' % value
                self.locals.append((str(name), value))
        
    def __str__(self):
        return ('File %s, line %i, in %s (%s)'
                % (self.co_filename,
                   self.f_lineno,
                   self.co_name,
                   ', '.join(['%s=%s' % (k, stringify(v)) for k, v in self.locals]))
                )


def pyframe():
    fval = PyFrameObjectPtr.from_pyobject_ptr(gdb.selected_frame().read_var('f')) #.dereference()
    fi = FrameInfo(fval)
    print fi

def mybt():
    def print_EvalFrameEx(gdbframe):
        try:
            f = gdbframe.read_var('f')
        except ValueError:
            print '(unable to print python frame, could not access "f")',
            return

        try:
            fval = PyFrameObjectPtr.from_pyobject_ptr(f) #.dereference()
            fi = FrameInfo(fval)
            print fi, 
        except RuntimeError:
            print '(unable to print python frame; corrupt data?)',
                    
        
    for i, gdbframe in enumerate(gdb.selected_thread().frames()):
        #print dir(gdbframe), gdbframe.name()
        print '#%i' % i, 
        if 'PyEval_EvalFrameEx' == gdbframe.name():
            print_EvalFrameEx(gdbframe)
        print '  0x%x in %s' % (gdbframe.pc(), gdbframe.name())

#pyframe()
#if gdb.selected_thread():
#    mybt()
        
class PyObjectPtrPrinter:
    "Prints a (PyObject*)"

    def __init__ (self, gdbval):
        self.gdbval = gdbval

    def to_string (self):
        proxyval = PyObjectPtr.from_pyobject_ptr(self.gdbval).proxyval()
        return stringify(proxyval)

class PyFrameObjectPtrPrinter(PyObjectPtrPrinter):
    "Prints a (PyFrameObject*)"

    def to_string (self):
        pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
        fi = FrameInfo(pyop)
        return str(fi)

def pretty_printer_lookup(gdbval):
    type = gdbval.type.unqualified()
    if type.code == gdb.TYPE_CODE_PTR:
        type = type.target().unqualified()
        t = str(type)
        if t == "PyObject":
            return PyObjectPtrPrinter(gdbval)
        elif t == "PyFrameObject":
            return PyFrameObjectPtrPrinter(gdbval)


gdb.pretty_printers.append(pretty_printer_lookup)

"""
(gdb) python

import sys
sys.path.append('/home/david/coding/python-gdb')
import libpython
reload(libpython)

end
"""

# TODO:
#  - write actual backtrace
#  - integrate into backtrace hook