summaryrefslogtreecommitdiffstats
path: root/ipapython/cookie.py
diff options
context:
space:
mode:
Diffstat (limited to 'ipapython/cookie.py')
-rw-r--r--ipapython/cookie.py699
1 files changed, 699 insertions, 0 deletions
diff --git a/ipapython/cookie.py b/ipapython/cookie.py
new file mode 100644
index 000000000..b45cb2b11
--- /dev/null
+++ b/ipapython/cookie.py
@@ -0,0 +1,699 @@
+# Authors:
+# John Dennis <jdennis@redhat.com>
+#
+# Copyright (C) 2012 Red Hat
+# see file 'COPYING' for use and warranty information
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import re
+import time
+import datetime
+from urllib2 import urlparse
+from calendar import timegm
+from ipapython.ipa_log_manager import log_mgr
+
+'''
+Core Python has two cookie libraries, Cookie.py targeted to server
+side and cookielib.py targeted to client side. So why this module and
+not use the standard libraries?
+
+Cookie.py has some serious bugs, it cannot correctly parse the
+HttpOnly, Secure, and Expires cookie attributes (more of a client side
+need and not what it was designed for). Since we utilize those
+attributes that makes Cookie.py a non-starter. Plus it's API awkard
+and limited (we would have to build more on top of it).
+
+The Cookie.py bug reports are:
+
+http://bugs.python.org/issue3073
+http://bugs.python.org/issue16611
+
+cookielib.py has a lot of good featuress, a nice API and covers all
+the relevant RFC's as well as actual practice in the field. However
+cookielib.py is tighly integrated with urllib2 and it's not possible
+to use most of the features of cookielib without simultaneously using
+urllib2. Unfortunataely we only use httplib because of our dependency
+on xmlrpclib. Without urllib2 cookielib is a non-starter.
+
+This module is a minimal implementation of Netscape cookies which
+works equally well on either the client or server side. It's API is
+easy to use with cookie attributes as class properties which can be
+read or set easily. The Cookie object automatically converts Expires
+and Max-Age attributes into datetime objects for easy time
+comparision. Cookies in strings can easily be parsed, including
+multiple cookies in the HTTP_COOKIE envionment variable.
+
+The cookie RFC is silent on any escaping requirements for cookie
+contents as such this module does not provide any automated support
+escaping and unescapin.
+
+'''
+
+#-------------------------------------------------------------------------------
+
+# FIXME: The use of properties for the attributes timestamp, expires
+# and max_age produce a pylint error which is a false positive, this
+# is a known bug in pylint (http://www.logilab.org/ticket/89092,
+# http://www.logilab.org/ticket/89786) after the pylint bug is fixed
+# the disables for E0202 should be removed.
+
+class Cookie(object):
+ '''
+ A Cookie object has the following attributes:
+
+ key
+ The name of the cookie
+ value
+ The value of the cookie
+
+ A Cookie also supports these predefined optional attributes. If an
+ optional attribute is not set on the cookie it's value is None.
+
+ domain
+ Restrict cookie usage to this domain
+ path
+ Restrict cookie usage to this path or below
+ expires
+ Cookie is invalid after this UTC timestamp
+ max_age
+ Cookie is invalid this many seconds in the future.
+ Has precedence over the expires attribute.
+ secure
+ Cookie should only be returned on secure (i.e. SSL/TLS)
+ connections.
+ httponly
+ Cookie is intended only for HTTP communication, it can
+ never be utilized in any other context (e.g. browser
+ Javascript).
+
+ See the documentation of get_expiration() for an explanation of
+ how the expires and max-age attributes interact as well as the
+ role of the timestamp attribute. Expiration values are stored as
+ datetime objects for easy manipulation and comparision.
+
+ There are two ways to instantiate a Cookie object. Either directly
+ via the constructor or by calling the class function parse() which
+ returns a list of Cookie objects found in a string.
+
+ To create a cookie to sent to a client:
+
+ Example:
+
+ cookie = Cookie('session', session_id,
+ domain=my_domain, path=mypath,
+ httpOnly=True, secure=True, expires=expiration)
+ headers.append(('Set-Cookie', str(cookie)))
+
+
+ To receive cookies from a request:
+
+ Example:
+
+ cookies = Cookie.parse(response.getheader('Set-Cookie'), request_url)
+
+ '''
+
+ class Expired(ValueError):
+ pass
+
+ class URLMismatch(ValueError):
+ pass
+
+ # regexp to split fields at a semi-colon
+ field_re = re.compile(r';\s*')
+
+ # regexp to locate a key/value pair
+ kv_pair_re = re.compile(r'^\s*([a-zA-Z0-9\!\#\$\%\&\'\*\+\-\.\^\_\`\|\~]+)\s*=\s*(.*?)\s*$', re.IGNORECASE)
+
+ # Reserved attribute names, maps from lower case protocol name to
+ # object attribute name
+ attrs = {'domain' : 'domain',
+ 'path' : 'path',
+ 'max-age' : 'max_age',
+ 'expires' : 'expires',
+ 'secure' : 'secure',
+ 'httponly' : 'httponly'}
+
+ @classmethod
+ def datetime_to_time(cls, dt):
+ '''
+ Timestamps (timestamp & expires) are stored as datetime
+ objects in UTC. It's non-obvious how to convert a naive UTC
+ datetime into a unix time value (seconds since the epoch
+ UTC). That functionality is oddly missing from the datetime
+ and time modules. This utility provides that missing
+ functionality.
+ '''
+ # Use timegm from the calendar module
+ return timegm(dt.utctimetuple())
+
+ @classmethod
+ def datetime_to_string(cls, dt=None):
+ '''
+ Given a datetime object in UTC generate RFC 1123 date string.
+ '''
+
+ # Try to verify dt is specified as UTC. If utcoffset is not
+ # available we'll just have to assume the caller is using the
+ # correct timezone.
+ utcoffset = dt.utcoffset()
+ if utcoffset is not None and utcoffset.total_seconds() != 0.0:
+ raise ValueError("timezone is not UTC")
+
+ # At this point we've validated as much as possible the
+ # timezone is UTC or GMT but we can't use the %Z timezone
+ # format specifier because the timezone in the string must be
+ # 'GMT', not something equivalent to GMT, so hardcode the GMT
+ # timezone string into the format.
+
+ return datetime.datetime.strftime(dt, '%a, %d %b %Y %H:%M:%S GMT')
+
+ @classmethod
+ def parse_datetime(cls, s):
+ '''
+ Parse a RFC 822, RFC 1123 date string, return a datetime aware object in UTC.
+ Accommodates some non-standard formats found in the wild.
+ '''
+
+ formats = ['%a, %d %b %Y %H:%M:%S',
+ '%a, %d-%b-%Y %H:%M:%S',
+ '%a, %d-%b-%y %H:%M:%S',
+ '%a, %d %b %y %H:%M:%S',
+ ]
+ s = s.strip()
+
+ # strptime does not read the time zone and generate a tzinfo
+ # object to insert in the datetime object so there is little point
+ # in specifying a %Z format, instead verify GMT is specified and
+ # generate the datetime object as if it were UTC.
+
+ if not s.endswith(' GMT'):
+ raise ValueError("http date string '%s' does not end with GMT time zone" % s)
+ s = s[:-4]
+
+ dt = None
+ for format in formats:
+ try:
+ dt = datetime.datetime(*(time.strptime(s, format)[0:6]))
+ break
+ except Exception:
+ continue
+
+ if dt is None:
+ raise ValueError("unable to parse expires datetime '%s'" % s)
+
+ return dt
+
+ @classmethod
+ def normalize_url_path(cls, url_path):
+ '''
+ Given a URL path, possibly empty, return a path consisting
+ only of directory components. The URL path must end with a
+ trailing slash for the last path element to be considered a
+ directory. Also the URL path must begin with a slash. Empty
+ input returns '/'.
+
+ Examples:
+
+ '' -> '/'
+ '/' -> '/'
+ 'foo' -> '/'
+ 'foo/' -> '/'
+ '/foo -> '/'
+ '/foo/' -> '/foo'
+ '/foo/bar' -> '/foo'
+ '/foo/bar/' -> '/foo/bar'
+ '''
+ url_path = url_path.lower()
+
+ if not url_path:
+ return '/'
+
+ if not url_path.startswith('/'):
+ return '/'
+
+ if url_path.count('/') <= 1:
+ return'/'
+
+ return url_path[:url_path.rindex('/')]
+
+
+ @classmethod
+ def parse(cls, cookie_string, request_url=None):
+ '''
+ Given a string containing one or more cookies (the
+ HTTP_COOKIES environment variable typically contains multiple
+ cookies) parse the string and return a list of Cookie objects
+ found in the string.
+ '''
+
+ # Our list of returned cookies
+ cookies = []
+
+ # Split the input string at semi-colon boundaries, we call this a
+ # field. A field may either be a single keyword or a key=value
+ # pair.
+ fields = Cookie.field_re.split(cookie_string)
+
+ # The input string may have multiple cookies inside it. This is
+ # common when the string comes from a HTTP_COOKIE environment
+ # variable. All the cookies will be contenated, separated by a
+ # semi-colon. Semi-colons are also the separator between
+ # attributes in a cookie.
+ #
+ # To distinguish between two adjacent cookies in a string we
+ # have to locate the key=value pair at the start of a
+ # cookie. Unfortunately cookies have attributes that also look
+ # like key/value pairs, the only way to distinguish a cookie
+ # attribute from a cookie is the fact the attribute names are
+ # reserved. A cookie attribute may either be a key/value pair
+ # or a single key (e.g. HttpOnly). As we scan the cookie we
+ # first identify the key=value (cookie name, cookie
+ # value). Then we continue scanning, if a bare key or
+ # key/value pair follows and is a known reserved keyword than
+ # that's an attribute belonging to the current cookie. As soon
+ # as we see a key/value pair whose key is not reserved we know
+ # we've found a new cookie. Bare keys (no value) can never
+ # start a new cookie.
+
+ # Iterate over all the fields and emit a new cookie whenever the
+ # next field is not a known attribute.
+ cookie = None
+ for field in fields:
+ match = Cookie.kv_pair_re.search(field)
+ if match:
+ key = match.group(1)
+ value = match.group(2)
+ # Double quoted value?
+ if value[0] == '"':
+ if value[-1] == '"':
+ value = value[1:-1]
+ else:
+ raise ValueError("unterminated quote in '%s'" % value)
+ kv_pair = True
+ else:
+ key = field
+ value = True # True because bare keys are boolean flags
+ kv_pair = False
+
+ is_attribute = key.lower() in Cookie.attrs
+
+ # First cookie found, create new cookie object
+ if cookie is None and kv_pair and not is_attribute:
+ cookie = Cookie(key, value)
+
+ # If start of new cookie then flush previous cookie and create
+ # a new one (it's a new cookie because it's a key/value pair
+ # whose key is not a reserved keyword).
+ elif cookie and kv_pair and not is_attribute:
+ if request_url is not None:
+ cookie.normalize(request_url)
+ cookies.append(cookie)
+ cookie = Cookie(key, value)
+
+ # If it's a reserved keyword add that as an attribute to the
+ # current cookie being scanned.
+ elif cookie and is_attribute:
+ cookie.__set_attr(key, value)
+ # If we've found a non-empty single token that's not a
+ # reserved keyword it's an error. An empty token can occur
+ # when there are two adjacent semi-colons (i.e. "; ;").
+ # We don't consider empty tokens an error.
+ elif key:
+ raise ValueError("unknown cookie token '%s'" % key)
+
+ # Flush out final cookie
+ if cookie:
+ if request_url is not None:
+ cookie.normalize(request_url)
+ cookies.append(cookie)
+
+ return cookies
+
+ @classmethod
+ def get_named_cookie_from_string(cls, cookie_string, cookie_name, request_url=None):
+ '''
+ A cookie string may contain multiple cookies, parse the cookie
+ string and return the last cookie in the string matching the
+ cookie name or None if not found.
+
+ This is basically a utility wrapper around the parse() class
+ method which iterates over what parse() returns looking for
+ the specific cookie.
+
+ When cookie_name appears more than once the last instance is
+ returned rather than the first because the ordering sequence
+ makes the last instance the current value.
+ '''
+
+ target_cookie = None
+
+ cookies = cls.parse(cookie_string)
+ for cookie in cookies:
+ if cookie.key == cookie_name:
+ target_cookie = cookie
+
+ if request_url is not None:
+ target_cookie.normalize(request_url)
+ return target_cookie
+
+
+ def __init__(self, key, value, domain=None, path=None, max_age=None, expires=None,
+ secure=None, httponly=None, timestamp=None):
+
+ log_mgr.get_logger(self, True)
+
+ self.key = key
+ self.value = value
+ self.domain = domain
+ self.path = path
+ self.max_age = max_age
+ self.expires = expires
+ self.secure = secure
+ self.httponly = httponly
+ self.timestamp = timestamp
+
+ @property
+ def timestamp(self): #pylint: disable=E0202
+ '''
+ The UTC moment at which cookie was received for purposes of
+ computing the expiration given a Max-Age offset. The
+ expiration will be timestamp + max_age. The timestamp value
+ will aways be a datetime object.
+
+ By default the timestamp will be the moment the Cookie object
+ is created as this often corresponds to the moment the cookie
+ is received (the intent of the Max-Age attribute). But becuase
+ it's sometimes desirable to force a specific moment for
+ purposes of computing the expiration from the Max-Age the
+ Cookie timestamp can be updated.
+
+ Setting a value of None causes the timestamp to be set to the
+ current UTC time (now). You may also assign with a numeric
+ UNIX timestamp (seconds since the epoch UTC) or a formatted time
+ sting, in all cases the value will be converted to a datetime
+ object.
+ '''
+ return self._timestamp
+
+ @timestamp.setter
+ def timestamp(self, value): #pylint: disable=E0202
+ if value is None:
+ self._timestamp = None
+ elif isinstance(value, datetime.datetime):
+ self._timestamp = value
+ elif isinstance(value, (int, long, float)):
+ self._timestamp = datetime.datetime.utcfromtimestamp(value)
+ elif isinstance(value, basestring):
+ self._timestamp = Cookie.parse_datetime(value)
+ else:
+ raise TypeError('value must be datetime, int, long, float, basestring or None, not %s' % \
+ value.__class__.__name__)
+
+ @property
+ def expires(self): #pylint: disable=E0202
+ '''
+ The expiration timestamp (in UTC) as a datetime object for the
+ cookie, or None if not set.
+
+ You may assign a value of None, a datetime object, a numeric
+ UNIX timestamp (seconds since the epoch UTC) or formatted time
+ string (the latter two will be converted to a datetime object.
+ '''
+ return self._expires
+
+ @expires.setter
+ def expires(self, value): #pylint: disable=E0202
+ if value is None:
+ self._expires = None
+ elif isinstance(value, datetime.datetime):
+ self._expires = value
+ elif isinstance(value, (int, long, float)):
+ self._expires = datetime.datetime.utcfromtimestamp(value)
+ elif isinstance(value, basestring):
+ self._expires = Cookie.parse_datetime(value)
+ else:
+ raise TypeError('value must be datetime, int, long, float, basestring or None, not %s' % \
+ value.__class__.__name__)
+
+ @property
+ def max_age(self): #pylint: disable=E0202
+ '''
+ The lifetime duration of the cookie. Computed as an offset
+ from the cookie's timestamp.
+ '''
+ return self._max_age
+
+ @max_age.setter
+ def max_age(self, value): #pylint: disable=E0202
+ if value is None:
+ self._max_age = None
+ else:
+ try:
+ self._max_age = int(value)
+ except Exception:
+ raise ValueError("Max-Age value '%s' not convertable to integer" % value)
+
+ def __set_attr(self, name, value):
+ '''
+ Sets one of the predefined cookie attributes.
+ '''
+ attr_name = Cookie.attrs.get(name.lower(), None)
+ if attr_name is None:
+ raise ValueError("unknown cookie attribute '%s'" % name)
+ setattr(self, attr_name, value)
+
+ def __str__(self):
+ components = []
+
+ components.append("%s=%s" % (self.key, self.value))
+
+ if self.domain is not None:
+ components.append("Domain=%s" % self.domain)
+
+ if self.path is not None:
+ components.append("Path=%s" % self.path)
+
+ if self.max_age is not None:
+ components.append("Max-Age=%s" % self.max_age)
+
+ if self.expires is not None:
+ components.append("Expires=%s" % Cookie.datetime_to_string(self.expires))
+
+ if self.secure:
+ components.append("Secure")
+
+ if self.httponly:
+ components.append("HttpOnly")
+
+ return '; '.join(components)
+
+ def get_expiration(self):
+ '''
+ Return the effective expiration of the cookie as a datetime
+ object or None if no expiration is defined. Expiration may be
+ defined either by the "Expires" timestamp attribute or the
+ "Max-Age" duration attribute. If both are set "Max-Age" takes
+ precedence. If neither is set the cookie has no expiration and
+ None will be returned.
+
+ "Max-Age" specifies the number of seconds in the future from when the
+ cookie is received until it expires. Effectively it means
+ adding "Max-Age" seconds to a timestamp to arrive at an
+ expiration. By default the timestamp used to mark the arrival
+ of the cookie is set to the moment the cookie object is
+ created. However sometimes it is desirable to adjust the
+ received timestamp to something other than the moment of
+ object creation, therefore you can explicitly set the arrival
+ timestamp used in the "Max-Age" calculation.
+
+ "Expires" specifies an explicit timestamp.
+
+ If "Max-Age" is set a datetime object is returned which is the
+ sum of the arrival timestamp and "Max-Age".
+
+ If "Expires" is set a datetime object is returned matching the
+ timestamp specified as the "Expires" value.
+
+ If neither is set None is returned.
+ '''
+
+ if self.max_age is not None:
+ return self.timestamp + datetime.timedelta(seconds=self.max_age)
+
+ if self.expires is not None:
+ return self.expires
+
+ return None
+
+ def normalize_expiration(self):
+ '''
+ An expiration may be specified either with an explicit
+ timestamp in the "Expires" attribute or via an offset
+ specified witht the "Max-Age" attribute. The "Max-Age"
+ attribute has precedence over "Expires" if both are
+ specified.
+
+ This method normalizes the expiration of the cookie such that
+ only a "Expires" attribute remains after consideration of the
+ "Max-Age" attribute. This is useful when storing the cookie
+ for future reference.
+ '''
+
+ self.expires = self.get_expiration()
+ self.max_age = None
+ return self.expires
+
+ def set_defaults_from_url(self, url):
+ '''
+ If cookie domain and path attributes are not specified then
+ they assume defaults from the request url the cookie was
+ received from.
+ '''
+
+ scheme, domain, path, params, query, fragment = urlparse.urlparse(url)
+
+ if self.domain is None:
+ self.domain = domain.lower()
+
+ if self.path is None:
+ self.path = self.normalize_url_path(path)
+
+
+ def normalize(self, url):
+ '''
+ Missing cookie attributes will receive default values derived
+ from the request URL. The expiration value is normalized.
+ '''
+
+ self.set_defaults_from_url(url)
+ self.normalize_expiration()
+
+ def http_cookie(self):
+ '''
+ Return a string with just the key and value (no attributes).
+ This is appropriate for including in a HTTP Cookie header.
+ '''
+ return '%s=%s;' % (self.key, self.value)
+
+ def http_return_ok(self, url):
+ '''
+ Tests to see if a cookie should be returned when a request is
+ sent to a specific URL.
+
+ * The request url's host must match the cookie's doman
+ otherwise raises Cookie.URLMismatch.
+
+ * The path in the request url must contain the cookie's path
+ otherwise raises Cookie.URLMismatch.
+
+ * If the cookie defines an expiration date then the current
+ time must be less or equal to the cookie's expiration
+ timestamp. Will raise Cookie.Expired if a defined expiration
+ is not valid.
+
+ If the test fails Cookie.Expired or Cookie.URLMismatch will be raised,
+ otherwise True is returned.
+
+ '''
+
+ def domain_valid(url_domain, cookie_domain):
+ '''
+ Compute domain component and perform test per
+ RFC 6265, Section 5.1.3. "Domain Matching"
+ '''
+ # FIXME: At the moment we can't import from ipalib at the
+ # module level because of a dependency loop (cycle) in the
+ # import. Our module layout needs to be refactored.
+ from ipalib.util import validate_domain_name
+ try:
+ validate_domain_name(url_domain)
+ except Exception, e:
+ return False
+
+ if cookie_domain is None:
+ return True
+
+ url_domain = url_domain.lower()
+ cookie_domain = cookie_domain.lower()
+
+ if url_domain == cookie_domain:
+ return True
+
+ if url_domain.endswith(cookie_domain):
+ if cookie_domain.startswith('.'):
+ return True
+
+ return False
+
+ def path_valid(url_path, cookie_path):
+ '''
+ Compute path component and perform test per
+ RFC 6265, Section 5.1.4. "Paths and Path-Match"
+ '''
+
+ if cookie_path is None:
+ return True
+
+ cookie_path = cookie_path.lower()
+ request_path = self.normalize_url_path(url_path)
+
+ if cookie_path == request_path:
+ return True
+
+ if cookie_path and request_path.startswith(cookie_path):
+ if cookie_path.endswith('/'):
+ return True
+
+ tail = request_path[len(cookie_path):]
+ if tail.startswith('/'):
+ return True
+
+ return False
+
+ cookie_name = self.key
+
+ url_scheme, url_domain, url_path, url_params, url_query, url_fragment = urlparse.urlparse(url)
+
+ cookie_expiration = self.get_expiration()
+ if cookie_expiration is not None:
+ now = datetime.datetime.utcnow()
+ if cookie_expiration < now:
+ raise Cookie.Expired("cookie named '%s'; expired at %s'" % \
+ (cookie_name,
+ self.datetime_to_string(cookie_expiration)))
+
+ if not domain_valid(url_domain, self.domain):
+ raise Cookie.URLMismatch("cookie named '%s'; it's domain '%s' does not match URL domain '%s'" % \
+ (cookie_name, self.domain, url_domain))
+
+ if not path_valid(url_path, self.path):
+ raise Cookie.URLMismatch("cookie named '%s'; it's path '%s' does not contain the URL path '%s'" % \
+ (cookie_name, self.path, url_path))
+
+ url_scheme = url_scheme.lower()
+
+ if self.httponly:
+ if url_scheme not in ('http', 'https'):
+ raise Cookie.URLMismatch("cookie named '%s'; is restricted to HTTP but it's URL scheme is '%s'" % \
+ (cookie_name, url_scheme))
+
+ if self.secure:
+ if url_scheme not in ('https',):
+ raise Cookie.URLMismatch("cookie named '%s'; is restricted to secure transport but it's URL scheme is '%s'" % \
+ (cookie_name, url_scheme))
+
+
+ return True