diff options
Diffstat (limited to 'ipsilon')
-rw-r--r-- | ipsilon/util/log.py | 238 |
1 files changed, 238 insertions, 0 deletions
diff --git a/ipsilon/util/log.py b/ipsilon/util/log.py index fb2c620..a010b16 100644 --- a/ipsilon/util/log.py +++ b/ipsilon/util/log.py @@ -4,6 +4,244 @@ import cherrypy import inspect +import cStringIO + +def log_request_response(): + '''Log the contents of the request and subsequent response. + + This is run as a tool hook and should be run as the last hook + (on_end_resource) just before the server writes the response to + the client. The tool is registered like this: + + cherrypy.tools.log_request_response = \ + cherrypy.Tool('on_end_resource', log_request_response) + + Then the logging can be enabled or disabled via the config option, + for example to turn the logging on via a config dict add this + key/value pair: + + 'tools.log_request_response.on': True, + + or for a config file add this line: + + tools.log_request_response.on = True + + At first blush it would seem easy to log the request received and + the response emitted, just hook those locations and output the raw + data. Unfortunately there are no such locations in cherrypy where + you have access to the raw input and output data. The complicating + factors are: + + * Input data is consumed by the Request object off a file object + which cannot seek back (i.e. rewind), once the data is read it + cannot be read again. Therefore any attempt to log the raw input + will starve read operations done by the Request object to read + the headers and body. + + * Automatic consumption and processing of the body contents is + enabled by default (controlled by the + request.process_request_body config option). You generally do + not want to turn these automatic request processors off because + they provide valuable input processing useful to the request + processing pipeline (e.g. when Content-Type is + application/x-www-form-urlencoded, or multipart/form-data or + multipart). Most cherrypy page handlers expect this + pre-processing to have been performed and the 'cooked' data to + be availabe on the Request object. Thus rather than logging the + raw input HTTP which will have been consumed by the request + processing logic we are forced into logging only the 'cooked' + values available to us after the request has been read and + processed. + + * The response body may not be available if response streaming is + enabled. This is because control is passed directly to the + object writing the data to the client bypassing the normal + cherrypy hooks. Fortunately streaming is not recommended and we + can expect it will be disabled. When streaming is disabled the + response body can be composed as: + + - string + - list of strings + - a generator yielding chunks of strings + + When the response body is a string or list of strings logging + the body and then passing down the pipeline to be written to the + client is trivial. However when part of the body is produced by + a generator we must run the generator to produce that part of + the body and store it as a string. This is an issue equivalent + to not being able to re-read a file object as seen in the input + situation. Once the generator has run it cannot be run + again. Therefore we consume all the body output, store it in a + string, log it and then replace the request body contents with + the body string we formed. It's this body string which is + subsequently sent down the processing pipeline to be written to + the client. + + ''' + + #--- Begin local functions --- + + def indent_text(text, level=0, indent=' '): + ''' + Input is a block of text potentially containing newlines which + seperate the text into a sequence of lines. The text block is + split into individual lines and indented according to the + indentation level. The width of the indent is controlled by + the optional indent parameter. + + The result is a single block of text where each of the + original lines of text are indented. + ''' + + f = cStringIO.StringIO() + + lines = text.split('\n') + + # Do not output trailing newline + if lines and lines[-1] == '': + lines.pop(-1) + + for line in lines: + f.write(indent*level) + f.write(line) + f.write('\n') + + string = f.getvalue() + f.close() + return string + + def print_part(part): + ''' + Format a cherrypy._cpreqbody.Part object into a string. + + When the request Content-Type is a multipart cherrypy splits + each part of the multipart into a Part object containing + information about the part and it's content. + ''' + f = cStringIO.StringIO() + + f.write(indent_text('Name = %s\n' % part.name)) + if part.headers: + f.write(indent_text('Headers:\n')) + for name, value in part.headers.items(): + f.write(indent_text('%s: %s\n' % (name, value), 1)) + + f.write(indent_text("Body:\n")) + f.write(indent_text(part.fullvalue(), 1)) + + string = f.getvalue() + f.close() + return string + + def collapse_body(body): + '''The cherrypy response body can be: + + * string + * list of strings + * generator yielding a string + + Generators are typically used for file contents but any + cherrypy response is permitted to use a generator to provide + the body of the response. + + Strings and lists of strings are immediately available and + stored in the request object. During normal cherrypy + processing when writing the response to the client response + data which is provided by a generator will be iterated over + and written to the client. In order for us to be able to log + all the response data prior to it being sent to the client we + must also iterate over the generator provided content, however + this exhausts the generator making it unavailable to be + written to the client. + + To solve this problem we collect all the response data. Now we + have the full body contents, we can log it and then set this + as the new body contents for remainder of the processing + pipeline to act upon (i.e. sent to the client) + ''' + f = cStringIO.StringIO() + + for chunk in body: + f.write(chunk) + + string = f.getvalue() + f.close() + return string + + #--- End local functions --- + + f = cStringIO.StringIO() + request = cherrypy.serving.request + remote = request.remote + + # + # Log the Request + # + f.write(indent_text("<Request> [%s] %s\n" % \ + (remote.name or remote.ip, request.request_line), 0)) + + # Request Headers + if request.headers: + f.write(indent_text("Headers:\n", 1)) + for name, value in request.headers.items(): + f.write(indent_text("%s: %s\n" % (name, value), 2)) + + # Request parameters from URL query string and + # x-www-form-urlencoded POST data + if request.body.params: + f.write(indent_text("Params:\n", 1)) + for name, value in request.body.params.items(): + # Multi-valued paramater is in a list + if isinstance(value, list): + for i, item in enumerate(value): + # Might be a multipart Part object, if so format it + if isinstance(item, cherrypy._cpreqbody.Part): + f.write(indent_text("%s[%s]:\n" % (name, i), 2)) + f.write(indent_text(print_part(item), 3)) + else: + # Not a mulitpart, just write it as a string + f.write(indent_text("%s[%s]: %s\n" % (name, i, item), 2)) + else: + # Just a string value + f.write(indent_text("%s: %s\n" % (name, value), 2)) + + # If the body is multipart format each of the parts + if request.body.parts: + f.write(indent_text("Body Parts:\n")) + for i, part in enumerate(request.body.parts): + f.write(indent_text("Part %s name=%s:\n" % (i, part.name), 3)) + f.write(indent_text(print_part(part), 4)) + + # + # Log the Response + # + response = cherrypy.response + f.write(indent_text("<Response> %s\n" % response.status, 0)) + + # Log the response headers + if response.header_list: + f.write(indent_text("Headers:\n", 1)) + for name, value in response.header_list: + f.write(indent_text("%s: %s\n" % (name, value), 2)) + + # Log the response body + # + # We can only do this if the response is not streaming because we have + # no way to hook the streaming content. + f.write(indent_text("Body:\n", 1)) + + if response.stream: + f.write(indent_text("body omitted because response is streaming\n", 2)) + else: + response.body = collapse_body(response.body) + for chunk in response.body: + f.write(indent_text(chunk, 2)) + + string = f.getvalue() + f.close() + print string + +cherrypy.tools.log_request_response = cherrypy.Tool('on_end_resource', log_request_response) class Log(object): |