diff options
Diffstat (limited to 'ipapython/cookie.py')
-rw-r--r-- | ipapython/cookie.py | 699 |
1 files changed, 699 insertions, 0 deletions
diff --git a/ipapython/cookie.py b/ipapython/cookie.py new file mode 100644 index 000000000..b45cb2b11 --- /dev/null +++ b/ipapython/cookie.py @@ -0,0 +1,699 @@ +# Authors: +# John Dennis <jdennis@redhat.com> +# +# Copyright (C) 2012 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import re +import time +import datetime +from urllib2 import urlparse +from calendar import timegm +from ipapython.ipa_log_manager import log_mgr + +''' +Core Python has two cookie libraries, Cookie.py targeted to server +side and cookielib.py targeted to client side. So why this module and +not use the standard libraries? + +Cookie.py has some serious bugs, it cannot correctly parse the +HttpOnly, Secure, and Expires cookie attributes (more of a client side +need and not what it was designed for). Since we utilize those +attributes that makes Cookie.py a non-starter. Plus it's API awkard +and limited (we would have to build more on top of it). + +The Cookie.py bug reports are: + +http://bugs.python.org/issue3073 +http://bugs.python.org/issue16611 + +cookielib.py has a lot of good featuress, a nice API and covers all +the relevant RFC's as well as actual practice in the field. However +cookielib.py is tighly integrated with urllib2 and it's not possible +to use most of the features of cookielib without simultaneously using +urllib2. Unfortunataely we only use httplib because of our dependency +on xmlrpclib. Without urllib2 cookielib is a non-starter. + +This module is a minimal implementation of Netscape cookies which +works equally well on either the client or server side. It's API is +easy to use with cookie attributes as class properties which can be +read or set easily. The Cookie object automatically converts Expires +and Max-Age attributes into datetime objects for easy time +comparision. Cookies in strings can easily be parsed, including +multiple cookies in the HTTP_COOKIE envionment variable. + +The cookie RFC is silent on any escaping requirements for cookie +contents as such this module does not provide any automated support +escaping and unescapin. + +''' + +#------------------------------------------------------------------------------- + +# FIXME: The use of properties for the attributes timestamp, expires +# and max_age produce a pylint error which is a false positive, this +# is a known bug in pylint (http://www.logilab.org/ticket/89092, +# http://www.logilab.org/ticket/89786) after the pylint bug is fixed +# the disables for E0202 should be removed. + +class Cookie(object): + ''' + A Cookie object has the following attributes: + + key + The name of the cookie + value + The value of the cookie + + A Cookie also supports these predefined optional attributes. If an + optional attribute is not set on the cookie it's value is None. + + domain + Restrict cookie usage to this domain + path + Restrict cookie usage to this path or below + expires + Cookie is invalid after this UTC timestamp + max_age + Cookie is invalid this many seconds in the future. + Has precedence over the expires attribute. + secure + Cookie should only be returned on secure (i.e. SSL/TLS) + connections. + httponly + Cookie is intended only for HTTP communication, it can + never be utilized in any other context (e.g. browser + Javascript). + + See the documentation of get_expiration() for an explanation of + how the expires and max-age attributes interact as well as the + role of the timestamp attribute. Expiration values are stored as + datetime objects for easy manipulation and comparision. + + There are two ways to instantiate a Cookie object. Either directly + via the constructor or by calling the class function parse() which + returns a list of Cookie objects found in a string. + + To create a cookie to sent to a client: + + Example: + + cookie = Cookie('session', session_id, + domain=my_domain, path=mypath, + httpOnly=True, secure=True, expires=expiration) + headers.append(('Set-Cookie', str(cookie))) + + + To receive cookies from a request: + + Example: + + cookies = Cookie.parse(response.getheader('Set-Cookie'), request_url) + + ''' + + class Expired(ValueError): + pass + + class URLMismatch(ValueError): + pass + + # regexp to split fields at a semi-colon + field_re = re.compile(r';\s*') + + # regexp to locate a key/value pair + kv_pair_re = re.compile(r'^\s*([a-zA-Z0-9\!\#\$\%\&\'\*\+\-\.\^\_\`\|\~]+)\s*=\s*(.*?)\s*$', re.IGNORECASE) + + # Reserved attribute names, maps from lower case protocol name to + # object attribute name + attrs = {'domain' : 'domain', + 'path' : 'path', + 'max-age' : 'max_age', + 'expires' : 'expires', + 'secure' : 'secure', + 'httponly' : 'httponly'} + + @classmethod + def datetime_to_time(cls, dt): + ''' + Timestamps (timestamp & expires) are stored as datetime + objects in UTC. It's non-obvious how to convert a naive UTC + datetime into a unix time value (seconds since the epoch + UTC). That functionality is oddly missing from the datetime + and time modules. This utility provides that missing + functionality. + ''' + # Use timegm from the calendar module + return timegm(dt.utctimetuple()) + + @classmethod + def datetime_to_string(cls, dt=None): + ''' + Given a datetime object in UTC generate RFC 1123 date string. + ''' + + # Try to verify dt is specified as UTC. If utcoffset is not + # available we'll just have to assume the caller is using the + # correct timezone. + utcoffset = dt.utcoffset() + if utcoffset is not None and utcoffset.total_seconds() != 0.0: + raise ValueError("timezone is not UTC") + + # At this point we've validated as much as possible the + # timezone is UTC or GMT but we can't use the %Z timezone + # format specifier because the timezone in the string must be + # 'GMT', not something equivalent to GMT, so hardcode the GMT + # timezone string into the format. + + return datetime.datetime.strftime(dt, '%a, %d %b %Y %H:%M:%S GMT') + + @classmethod + def parse_datetime(cls, s): + ''' + Parse a RFC 822, RFC 1123 date string, return a datetime aware object in UTC. + Accommodates some non-standard formats found in the wild. + ''' + + formats = ['%a, %d %b %Y %H:%M:%S', + '%a, %d-%b-%Y %H:%M:%S', + '%a, %d-%b-%y %H:%M:%S', + '%a, %d %b %y %H:%M:%S', + ] + s = s.strip() + + # strptime does not read the time zone and generate a tzinfo + # object to insert in the datetime object so there is little point + # in specifying a %Z format, instead verify GMT is specified and + # generate the datetime object as if it were UTC. + + if not s.endswith(' GMT'): + raise ValueError("http date string '%s' does not end with GMT time zone" % s) + s = s[:-4] + + dt = None + for format in formats: + try: + dt = datetime.datetime(*(time.strptime(s, format)[0:6])) + break + except Exception: + continue + + if dt is None: + raise ValueError("unable to parse expires datetime '%s'" % s) + + return dt + + @classmethod + def normalize_url_path(cls, url_path): + ''' + Given a URL path, possibly empty, return a path consisting + only of directory components. The URL path must end with a + trailing slash for the last path element to be considered a + directory. Also the URL path must begin with a slash. Empty + input returns '/'. + + Examples: + + '' -> '/' + '/' -> '/' + 'foo' -> '/' + 'foo/' -> '/' + '/foo -> '/' + '/foo/' -> '/foo' + '/foo/bar' -> '/foo' + '/foo/bar/' -> '/foo/bar' + ''' + url_path = url_path.lower() + + if not url_path: + return '/' + + if not url_path.startswith('/'): + return '/' + + if url_path.count('/') <= 1: + return'/' + + return url_path[:url_path.rindex('/')] + + + @classmethod + def parse(cls, cookie_string, request_url=None): + ''' + Given a string containing one or more cookies (the + HTTP_COOKIES environment variable typically contains multiple + cookies) parse the string and return a list of Cookie objects + found in the string. + ''' + + # Our list of returned cookies + cookies = [] + + # Split the input string at semi-colon boundaries, we call this a + # field. A field may either be a single keyword or a key=value + # pair. + fields = Cookie.field_re.split(cookie_string) + + # The input string may have multiple cookies inside it. This is + # common when the string comes from a HTTP_COOKIE environment + # variable. All the cookies will be contenated, separated by a + # semi-colon. Semi-colons are also the separator between + # attributes in a cookie. + # + # To distinguish between two adjacent cookies in a string we + # have to locate the key=value pair at the start of a + # cookie. Unfortunately cookies have attributes that also look + # like key/value pairs, the only way to distinguish a cookie + # attribute from a cookie is the fact the attribute names are + # reserved. A cookie attribute may either be a key/value pair + # or a single key (e.g. HttpOnly). As we scan the cookie we + # first identify the key=value (cookie name, cookie + # value). Then we continue scanning, if a bare key or + # key/value pair follows and is a known reserved keyword than + # that's an attribute belonging to the current cookie. As soon + # as we see a key/value pair whose key is not reserved we know + # we've found a new cookie. Bare keys (no value) can never + # start a new cookie. + + # Iterate over all the fields and emit a new cookie whenever the + # next field is not a known attribute. + cookie = None + for field in fields: + match = Cookie.kv_pair_re.search(field) + if match: + key = match.group(1) + value = match.group(2) + # Double quoted value? + if value[0] == '"': + if value[-1] == '"': + value = value[1:-1] + else: + raise ValueError("unterminated quote in '%s'" % value) + kv_pair = True + else: + key = field + value = True # True because bare keys are boolean flags + kv_pair = False + + is_attribute = key.lower() in Cookie.attrs + + # First cookie found, create new cookie object + if cookie is None and kv_pair and not is_attribute: + cookie = Cookie(key, value) + + # If start of new cookie then flush previous cookie and create + # a new one (it's a new cookie because it's a key/value pair + # whose key is not a reserved keyword). + elif cookie and kv_pair and not is_attribute: + if request_url is not None: + cookie.normalize(request_url) + cookies.append(cookie) + cookie = Cookie(key, value) + + # If it's a reserved keyword add that as an attribute to the + # current cookie being scanned. + elif cookie and is_attribute: + cookie.__set_attr(key, value) + # If we've found a non-empty single token that's not a + # reserved keyword it's an error. An empty token can occur + # when there are two adjacent semi-colons (i.e. "; ;"). + # We don't consider empty tokens an error. + elif key: + raise ValueError("unknown cookie token '%s'" % key) + + # Flush out final cookie + if cookie: + if request_url is not None: + cookie.normalize(request_url) + cookies.append(cookie) + + return cookies + + @classmethod + def get_named_cookie_from_string(cls, cookie_string, cookie_name, request_url=None): + ''' + A cookie string may contain multiple cookies, parse the cookie + string and return the last cookie in the string matching the + cookie name or None if not found. + + This is basically a utility wrapper around the parse() class + method which iterates over what parse() returns looking for + the specific cookie. + + When cookie_name appears more than once the last instance is + returned rather than the first because the ordering sequence + makes the last instance the current value. + ''' + + target_cookie = None + + cookies = cls.parse(cookie_string) + for cookie in cookies: + if cookie.key == cookie_name: + target_cookie = cookie + + if request_url is not None: + target_cookie.normalize(request_url) + return target_cookie + + + def __init__(self, key, value, domain=None, path=None, max_age=None, expires=None, + secure=None, httponly=None, timestamp=None): + + log_mgr.get_logger(self, True) + + self.key = key + self.value = value + self.domain = domain + self.path = path + self.max_age = max_age + self.expires = expires + self.secure = secure + self.httponly = httponly + self.timestamp = timestamp + + @property + def timestamp(self): #pylint: disable=E0202 + ''' + The UTC moment at which cookie was received for purposes of + computing the expiration given a Max-Age offset. The + expiration will be timestamp + max_age. The timestamp value + will aways be a datetime object. + + By default the timestamp will be the moment the Cookie object + is created as this often corresponds to the moment the cookie + is received (the intent of the Max-Age attribute). But becuase + it's sometimes desirable to force a specific moment for + purposes of computing the expiration from the Max-Age the + Cookie timestamp can be updated. + + Setting a value of None causes the timestamp to be set to the + current UTC time (now). You may also assign with a numeric + UNIX timestamp (seconds since the epoch UTC) or a formatted time + sting, in all cases the value will be converted to a datetime + object. + ''' + return self._timestamp + + @timestamp.setter + def timestamp(self, value): #pylint: disable=E0202 + if value is None: + self._timestamp = None + elif isinstance(value, datetime.datetime): + self._timestamp = value + elif isinstance(value, (int, long, float)): + self._timestamp = datetime.datetime.utcfromtimestamp(value) + elif isinstance(value, basestring): + self._timestamp = Cookie.parse_datetime(value) + else: + raise TypeError('value must be datetime, int, long, float, basestring or None, not %s' % \ + value.__class__.__name__) + + @property + def expires(self): #pylint: disable=E0202 + ''' + The expiration timestamp (in UTC) as a datetime object for the + cookie, or None if not set. + + You may assign a value of None, a datetime object, a numeric + UNIX timestamp (seconds since the epoch UTC) or formatted time + string (the latter two will be converted to a datetime object. + ''' + return self._expires + + @expires.setter + def expires(self, value): #pylint: disable=E0202 + if value is None: + self._expires = None + elif isinstance(value, datetime.datetime): + self._expires = value + elif isinstance(value, (int, long, float)): + self._expires = datetime.datetime.utcfromtimestamp(value) + elif isinstance(value, basestring): + self._expires = Cookie.parse_datetime(value) + else: + raise TypeError('value must be datetime, int, long, float, basestring or None, not %s' % \ + value.__class__.__name__) + + @property + def max_age(self): #pylint: disable=E0202 + ''' + The lifetime duration of the cookie. Computed as an offset + from the cookie's timestamp. + ''' + return self._max_age + + @max_age.setter + def max_age(self, value): #pylint: disable=E0202 + if value is None: + self._max_age = None + else: + try: + self._max_age = int(value) + except Exception: + raise ValueError("Max-Age value '%s' not convertable to integer" % value) + + def __set_attr(self, name, value): + ''' + Sets one of the predefined cookie attributes. + ''' + attr_name = Cookie.attrs.get(name.lower(), None) + if attr_name is None: + raise ValueError("unknown cookie attribute '%s'" % name) + setattr(self, attr_name, value) + + def __str__(self): + components = [] + + components.append("%s=%s" % (self.key, self.value)) + + if self.domain is not None: + components.append("Domain=%s" % self.domain) + + if self.path is not None: + components.append("Path=%s" % self.path) + + if self.max_age is not None: + components.append("Max-Age=%s" % self.max_age) + + if self.expires is not None: + components.append("Expires=%s" % Cookie.datetime_to_string(self.expires)) + + if self.secure: + components.append("Secure") + + if self.httponly: + components.append("HttpOnly") + + return '; '.join(components) + + def get_expiration(self): + ''' + Return the effective expiration of the cookie as a datetime + object or None if no expiration is defined. Expiration may be + defined either by the "Expires" timestamp attribute or the + "Max-Age" duration attribute. If both are set "Max-Age" takes + precedence. If neither is set the cookie has no expiration and + None will be returned. + + "Max-Age" specifies the number of seconds in the future from when the + cookie is received until it expires. Effectively it means + adding "Max-Age" seconds to a timestamp to arrive at an + expiration. By default the timestamp used to mark the arrival + of the cookie is set to the moment the cookie object is + created. However sometimes it is desirable to adjust the + received timestamp to something other than the moment of + object creation, therefore you can explicitly set the arrival + timestamp used in the "Max-Age" calculation. + + "Expires" specifies an explicit timestamp. + + If "Max-Age" is set a datetime object is returned which is the + sum of the arrival timestamp and "Max-Age". + + If "Expires" is set a datetime object is returned matching the + timestamp specified as the "Expires" value. + + If neither is set None is returned. + ''' + + if self.max_age is not None: + return self.timestamp + datetime.timedelta(seconds=self.max_age) + + if self.expires is not None: + return self.expires + + return None + + def normalize_expiration(self): + ''' + An expiration may be specified either with an explicit + timestamp in the "Expires" attribute or via an offset + specified witht the "Max-Age" attribute. The "Max-Age" + attribute has precedence over "Expires" if both are + specified. + + This method normalizes the expiration of the cookie such that + only a "Expires" attribute remains after consideration of the + "Max-Age" attribute. This is useful when storing the cookie + for future reference. + ''' + + self.expires = self.get_expiration() + self.max_age = None + return self.expires + + def set_defaults_from_url(self, url): + ''' + If cookie domain and path attributes are not specified then + they assume defaults from the request url the cookie was + received from. + ''' + + scheme, domain, path, params, query, fragment = urlparse.urlparse(url) + + if self.domain is None: + self.domain = domain.lower() + + if self.path is None: + self.path = self.normalize_url_path(path) + + + def normalize(self, url): + ''' + Missing cookie attributes will receive default values derived + from the request URL. The expiration value is normalized. + ''' + + self.set_defaults_from_url(url) + self.normalize_expiration() + + def http_cookie(self): + ''' + Return a string with just the key and value (no attributes). + This is appropriate for including in a HTTP Cookie header. + ''' + return '%s=%s;' % (self.key, self.value) + + def http_return_ok(self, url): + ''' + Tests to see if a cookie should be returned when a request is + sent to a specific URL. + + * The request url's host must match the cookie's doman + otherwise raises Cookie.URLMismatch. + + * The path in the request url must contain the cookie's path + otherwise raises Cookie.URLMismatch. + + * If the cookie defines an expiration date then the current + time must be less or equal to the cookie's expiration + timestamp. Will raise Cookie.Expired if a defined expiration + is not valid. + + If the test fails Cookie.Expired or Cookie.URLMismatch will be raised, + otherwise True is returned. + + ''' + + def domain_valid(url_domain, cookie_domain): + ''' + Compute domain component and perform test per + RFC 6265, Section 5.1.3. "Domain Matching" + ''' + # FIXME: At the moment we can't import from ipalib at the + # module level because of a dependency loop (cycle) in the + # import. Our module layout needs to be refactored. + from ipalib.util import validate_domain_name + try: + validate_domain_name(url_domain) + except Exception, e: + return False + + if cookie_domain is None: + return True + + url_domain = url_domain.lower() + cookie_domain = cookie_domain.lower() + + if url_domain == cookie_domain: + return True + + if url_domain.endswith(cookie_domain): + if cookie_domain.startswith('.'): + return True + + return False + + def path_valid(url_path, cookie_path): + ''' + Compute path component and perform test per + RFC 6265, Section 5.1.4. "Paths and Path-Match" + ''' + + if cookie_path is None: + return True + + cookie_path = cookie_path.lower() + request_path = self.normalize_url_path(url_path) + + if cookie_path == request_path: + return True + + if cookie_path and request_path.startswith(cookie_path): + if cookie_path.endswith('/'): + return True + + tail = request_path[len(cookie_path):] + if tail.startswith('/'): + return True + + return False + + cookie_name = self.key + + url_scheme, url_domain, url_path, url_params, url_query, url_fragment = urlparse.urlparse(url) + + cookie_expiration = self.get_expiration() + if cookie_expiration is not None: + now = datetime.datetime.utcnow() + if cookie_expiration < now: + raise Cookie.Expired("cookie named '%s'; expired at %s'" % \ + (cookie_name, + self.datetime_to_string(cookie_expiration))) + + if not domain_valid(url_domain, self.domain): + raise Cookie.URLMismatch("cookie named '%s'; it's domain '%s' does not match URL domain '%s'" % \ + (cookie_name, self.domain, url_domain)) + + if not path_valid(url_path, self.path): + raise Cookie.URLMismatch("cookie named '%s'; it's path '%s' does not contain the URL path '%s'" % \ + (cookie_name, self.path, url_path)) + + url_scheme = url_scheme.lower() + + if self.httponly: + if url_scheme not in ('http', 'https'): + raise Cookie.URLMismatch("cookie named '%s'; is restricted to HTTP but it's URL scheme is '%s'" % \ + (cookie_name, url_scheme)) + + if self.secure: + if url_scheme not in ('https',): + raise Cookie.URLMismatch("cookie named '%s'; is restricted to secure transport but it's URL scheme is '%s'" % \ + (cookie_name, url_scheme)) + + + return True |