roles/fedmsg/base/templates/logging.py.j2


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247

# Setup fedmsg logging.

# All of these modules are just used by the ContextInjector below.
import inspect
import hashlib
import logging
import os
import socket
import traceback

psutil = None
try:
    import psutil
except (OSError, ImportError):
    # We run into issues when trying to import psutil from inside mod_wsgi on
    # rhel7.  If we hit that here, then just fail quietly.
    # https://github.com/jmflinuxtx/kerneltest-harness/pull/17#issuecomment-48007837
    pass


radio_silence = """
*** %i instances of this error seen.  No more mail will be sent. ***
""".strip()

seen_errors = {}  # This could be a default dict.
error_limit = 100


class ContextInjector(logging.Filter):
    """ Logging filter that adds context to log records.

    Filters are typically used to "filter" log records.  They declare a filter
    method that can return True or False.  Only records with 'True' will
    actually be logged.

    Here, we somewhat abuse the concept of a filter.  We for the most part
    return true, but we use the opportunity to hang important contextual
    information on the log record to later be used by the logging Formatter.  We
    don't normally want to see all this stuff in normal log records, but we *do*
    want to see it when we are emailed error messages.  Seeing an error, but not
    knowing which host it comes from, is not that useful.

    After we've seen an error ~100 times, we stop sending email to avoid choking
    the world.

    http://docs.python.org/2/howto/logging-cookbook.html#filters-contextual
    """

    def filter(self, record):
        current_process = ContextInjector.get_current_process()
        current_hostname = socket.gethostname()

        record.host = current_hostname
        record.proc = current_process
        record.pid = current_process.pid
        record.proc_name = current_process.name
        record.command_line = current_process.cmdline
        # These are callabls on more modern versions of psutil.
        if callable(record.proc_name):
            record.proc_name = record.proc_name()
        if callable(record.command_line):
            record.command_line = record.command_line()
        record.command_line = " ".join(record.command_line)
        record.callstack = self.format_callstack()
        record.farewell = ""

        key = hashlib.sha256(record.callstack).hexdigest()
        if not key in seen_errors:
            seen_errors[key] = 0

        if seen_errors[key] > error_limit:
            return False

        seen_errors[key] += 1

        if seen_errors[key] > error_limit:
            record.farewell = radio_silence % error_limit

        msg_id = ""
        try:
            msg = self.get_msg_object()
            if isinstance(msg, dict):
                if 'msg_id' in msg:
                    msg_id = msg['msg_id']
                elif 'msg' in msg:
                    if 'msg_id' in msg['msg']:
                        msg_id = msg['msg']['msg_id']
        except:
            pass

        record.msg_id = msg_id

        return True

    @staticmethod
    def get_msg_object():
        """ Return the current request object

        This is insane.

        There is no way to know the actual fedmsg message at this point in
        the code, so we're crawling our way down the call stack until we
        find the first place with a 'msg' local instance variable and
        attempt to extract the msg_id from it (later).

        Please forgive me (and Ralph, the original author of this code).
        """

        for frame in (f[0] for f in reversed(inspect.stack())):
            if 'msg' in frame.f_locals:
                return frame.f_locals['msg']

        # This code is reached if there's no Request. Most common case is trac-admin
        return None

    @staticmethod
    def format_callstack():
        for i, frame in enumerate(f[0] for f in inspect.stack()):
            if not '__name__' in frame.f_globals:
                continue
            modname = frame.f_globals['__name__'].split('.')[0]
            if modname != "logging":
                break

        def _format_frame(frame):
            return '  File "%s", line %i in %s\n    %s' % (frame)

        stack = traceback.extract_stack()
        stack = stack[:-i]
        return "\n".join([_format_frame(frame) for frame in stack])

    @staticmethod
    def get_current_process():
        mypid = os.getpid()

        if not psutil:
            raise OSError("Could not import psutil for %r" % mypid)

        for proc in psutil.process_iter():
            if proc.pid == mypid:
                return proc

        # This should be impossible.
        raise ValueError("Could not find process %r" % mypid)

    @classmethod
    def __json__(cls):
        """ We need to be jsonifiable for "fedmsg-config" """
        return {'name': 'ContextInjector'}


hefty_format = """Message
-------
[%(asctime)s][%(name)10s %(levelname)7s]
```python
%(message)s
```
%(farewell)s

Process Details
---------------
- host:     %(host)s
- PID:      %(pid)s
- name:     %(proc_name)s
- command:  %(command_line)s
- msg_id:   %(msg_id)s

Callstack that lead to the logging statement
--------------------------------------------
```python
%(callstack)s
```
"""


# See the following for constraints on this format http://bit.ly/Xn1WDn
config = dict(
    logging=dict(
        version=1,
        formatters=dict(
            bare={
                "datefmt": "%Y-%m-%d %H:%M:%S",
                {% if inventory_hostname.startswith(('bodhi-backend01', 'bodhi-backend03')) %}
                # Here we use a different format just so we can include
                # threadName.  We set the threadName in the bodhi masher to some
                # useful strings that will tell us which branch is doing what.
                # See https://github.com/fedora-infra/bodhi/commit/66d94094
                "format": "[%(asctime)s][%(name)10s %(levelname)7s]%(threadName)s %(message)s"
                {% else %}
                "format": "[%(asctime)s][%(name)10s %(levelname)7s] %(message)s"
                {% endif %}
            },
            hefty={
                "datefmt": "%Y-%m-%d %H:%M:%S",
                "format": hefty_format,
            },
        ),
        filters=dict(
            context={
                # This "()" syntax in the stdlib doesn't seem to be documented
                # anywhere.  I had to read
                # /usr/lib64/python2.7/logging/config.py to figure it out.
                "()": ContextInjector,
            },
        ),
        handlers=dict(
            console={
                "class": "logging.StreamHandler",
                "formatter": "bare",
                "level": "{{fedmsg_loglevel}}",
                "stream": "ext://sys.stdout",
            },
            mailer={
                "class": "logging.handlers.SMTPHandler",
                "formatter": "hefty",
                "filters": ["context"],
                "level": "ERROR",
                "mailhost": "bastion.phx2.fedoraproject.org",
                "fromaddr": "fedmsg@fedoraproject.org",
                "toaddrs": [
                {% for recipient in fedmsg_error_recipients %}
                    "{{recipient}}",
                {% endfor %}
                ],
                "subject": "fedmsg error log %s" % socket.gethostname(),
            },
        ),
        loggers=dict(
            fedmsg={
                "level": "{{fedmsg_loglevel}}",
                "propagate": False,
                "handlers": ["console", "mailer"],
            },
            moksha={
                "level": "{{fedmsg_loglevel}}",
                "propagate": False,
                "handlers": ["console", "mailer"],
            },
        ),
        # Root handler that deals with log messages that are not handled by the
        # loggers explicitly configured in the `loggers` dict.
        root={
            "level": "{{fedmsg_loglevel}}",
            "handlers": ["console", "mailer"],
        },
    ),
)