summaryrefslogtreecommitdiffstats
path: root/x509/base64utils.py
blob: 56e3f2b2992240862f09660ce0ece1a21fd85e93 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
# vim: tabstop=4 shiftwidth=4 softtabstop=4

# Copyright 2013 Red Hat, Inc.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

import cStringIO
import re
import string
import urllib

"""

Python provides the base64 module as a core module but this is mostly
limited to encoding and decoding base64 and it's variants. It is often
useful to be able to perform other operations on base64 text. This
module is meant to be used in conjunction with the core base64 module.

Standarized base64 is defined in
RFC-4648 "The Base16, Base32, and Base64 Data Encodings".

This module provides the following base64 utility functionality:

    * tests if text is valid base64
    * filter whitespace from base64
    * convert base64 between different alphabets
    * Handle padding issues
        - test if base64 is padded
        - removes padding
        - restores padding
    * wraps base64 text into formatted blocks
        - via iterator
        - return formatted string

"""


class InvalidBase64Error(ValueError):
    pass

base64_alphabet_re = re.compile(r'^[^A-Za-z0-9+/=]+$')
base64url_alphabet_re = re.compile(r'^[^A-Za-z0-9---_=]+$')

base64_non_alphabet_re = re.compile(r'[^A-Za-z0-9+/=]+')
base64url_non_alphabet_re = re.compile(r'[^A-Za-z0-9---_=]+')

_strip_formatting_re = re.compile(r'\s+')

_base64_to_base64url_trans = string.maketrans('+/', '-_')
_base64url_to_base64_trans = string.maketrans('-_', '+/')


def is_valid_base64(text):
    """Test if input text can be base64 decoded.

    :param text: input base64 text
    :type text: string
    :returns: bool -- True if text can be decoded as base64, False otherwise
    """

    text = filter_formatting(text)

    if base64_non_alphabet_re.search(text):
        return False

    try:
        return base64_is_padded(text)
    except InvalidBase64Error:
        return False


def is_valid_base64url(text):
    """Test if input text can be base64url decoded.

    :param text: input base64 text
    :type text: string
    :returns: bool -- True if text can be decoded as base64url,
              False otherwise
    """

    text = filter_formatting(text)

    if base64url_non_alphabet_re.search(text):
        return False

    try:
        return base64_is_padded(text)
    except InvalidBase64Error:
        return False


def filter_formatting(text):
    """Return base64 text without any formatting, just the base64.

    Base64 text is often formatted with whitespace, line endings,
    etc. This function strips out any formatting, the result will
    contain only base64 characters.

    Note, this function does not filter out all non-base64 alphabet
    characters, it only removes characters used for formatting.

    :param text: input text to filter
    :type text: string
    :returns: string -- filtered text without formatting
    """
    return _strip_formatting_re.sub('', text)


def base64_to_base64url(text):
    """Convert base64 text to base64url text.

    base64url text is designed to be safe for use in filenames and
    URL's. It is defined in RFC-4648 Section 5.

    base64url differs from base64 in the last two alphabet characters
    at index 62 and 63, these are sometimes referred as the
    altchars. The '+' character at index 62 is replaced by '-'
    (hyphen) and the '/' character at index 63 is replaced by '_'
    (underscore).

    This function only translates the altchars, non-alphabet
    characters are not filtered out.

    WARNING
    -------

    base64url continues to use the '=' pad character which is NOT URL
    safe. RFC-4648 suggests two alternate methods to deal with this.

    percent-encode
        percent-encode the pad character (e.g. '=' becomes
        '%3D'). This makes the base64url text fully safe. But
        percent-enconding has the downside of requiring
        percent-decoding prior to feeding the base64url text into a
        base64url decoder since most base64url decoders do not
        recognize %3D as a pad character and most decoders require
        correct padding.

    no-padding
        padding is not strictly necessary to decode base64 or
        base64url text, the pad can be computed from the input text
        length. However many decoders demand padding and will consider
        non-padded text to be malformed. If one wants to omit the
        trailing pad character(s) for use in URL's it can be added back
        using the base64_assure_padding() function.

    This function makes no decisions about which padding methodolgy to
    use. One can either call base64_strip_padding() to remove any pad
    characters (restoring later with base64_assure_padding()) or call
    base64url_percent_encode() to percent-encode the pad characters.

    :param text: input base64 text
    :type text: string
    :returns: string -- base64url text
    """
    return text.translate(_base64_to_base64url_trans)


def base64url_to_base64(text):
    """Convert base64url text to base64 text.

    See base64_to_base64url() for a description of base64url text and
    it's issues.

    This function does NOT handle percent-encoded pad characters, they
    will be left intact. If the input base64url text is
    percent-encoded you should call

    :param text: text in base64url alphabet
    :type text: string
    :returns: string -- text in base64 alphabet

    """
    return text.translate(_base64url_to_base64_trans)


def base64_is_padded(text, pad='='):
    """Test if the text is base64 padded.

    The input text must be in a base64 alphabet. The pad must be a
    single character. If the text has been percent-encoded (e.g. pad
    is the string '%3D') you must convert the text back to a base64
    alphabet (e.g. if percent-encoded use the function
    base64url_percent_decode()).

    :param text: text containing ONLY characters in a base64 alphabet
    :type text: string
    :param pad: pad character (must be single character) (default: '=')
    :type pad: string
    :returns: bool -- True if padded, False otherwise
    :raises: ValueError, InvalidBase64Error
    """

    if len(pad) != 1:
        raise ValueError(_('pad must be single character'))

    text_len = len(text)
    if text_len > 0 and text_len % 4 == 0:
        pad_index = text.find(pad)
        if pad_index >= 0 and pad_index < text_len - 2:
            raise InvalidBase64Error(_('text is multiple of 4, '
                                       'but pad "%s" occurs before '
                                       '2nd to last char') % pad)
        if pad_index == text_len - 2 and text[-1] != pad:
            raise InvalidBase64Error(_('text is multiple of 4, '
                                       'but pad "%s" occurs before '
                                       'non-pad last char') % pad)
        return True

    if text.find(pad) >= 0:
        raise InvalidBase64Error(_('text is not a multiple of 4, '
                                   'but contains pad "%s"') % pad)
    return False


def base64url_percent_encode(text):
    """Percent-encode base64url padding.

    The input text should only contain base64url alphabet
    characters. Any non-base64url alphabet characters will also be
    subject to percent-encoding.

    :param text: text containing ONLY characters in the base64url alphabet
    :type text: string
    :returns: string -- percent-encoded base64url text
    :raises: InvalidBase64Error
    """

    if len(text) % 4 != 0:
        raise InvalidBase64Error(_('padded base64url text must be '
                                   'multiple of 4 characters'))

    return urllib.quote(text)


def base64url_percent_decode(text):
    """Percent-decode base64url padding.

    The input text should only contain base64url alphabet
    characters and the percent-encoded pad character. Any other
    percent-encoded characters will be subject to percent-decoding.

    :param text: base64url alphabet text
    :type text: string
    :returns: string -- percent-decoded base64url text
    """

    decoded_text = urllib.unquote(text)

    if len(decoded_text) % 4 != 0:
        raise InvalidBase64Error(_('padded base64url text must be '
                                   'multiple of 4 characters'))

    return decoded_text


def base64_strip_padding(text, pad='='):
    """Remove padding from input base64 text.

    :param text: text containing ONLY characters in a base64 alphabet
    :type text: string
    :param pad: pad character (must be single character) (default: '=')
    :type pad: string
    :returns: string -- base64 text without padding
    :raises: ValueError
    """
    if len(pad) != 1:
        raise ValueError(_('pad must be single character'))

    # Can't be padded if text is less than 4 characters.
    if len(text) < 4:
        return text

    if text[-1] == pad:
        if text[-2] == pad:
            return text[0:-2]
        else:
            return text[0:-1]
    else:
        return text


def base64_assure_padding(text, pad='='):
    """Assure the input text ends with padding.

    Base64 text is normally expected to be a multple of 4
    characters. Each 4 character base64 sequence produces 3 octets of
    binary data. If the binary data is not a multiple of 3 the base64
    text is padded at the end with a pad character such that is is
    always a multple of 4. Padding is ignored and does not alter the
    binary data nor it's length.

    In some circumstances is is desirable to omit the padding
    character due to transport encoding conflicts. Base64 text can
    still be correctly decoded if the length of the base64 text
    (consisting only of characters in the desired base64 alphabet) is
    known, padding is not absolutely necessary.

    Some base64 decoders demand correct padding or one may wish to
    format RFC compliant base64, this function performs this action.

    Input is assumed to consist only of members of a base64
    alphabet (i.e no whitepace). Iteration yields a sequence of lines.
    The line does NOT terminate with a line ending.

    Use the filter_formatting() function to assure the input text
    contains only the members of the alphabet.

    If the text ends with the pad it is assumed to already be
    padded. Otherwise the binary length is computed from the input
    text length and correct number of pad characters are appended.

    :param text: text containing ONLY characters in a base64 alphabet
    :type text: string
    :param pad: pad character (must be single character) (default: '=')
    :type pad: string
    :returns: string -- input base64 text with padding
    :raises: ValueError
    """

    if len(pad) != 1:
        raise ValueError(_('pad must be single character'))

    if text.endswith(pad):
        return text

    n = len(text) % 4
    if n == 0:
        return text

    n = 4 - n
    padding = pad * n
    return text + padding


def base64_wrap_iter(text, width=64):
    """Fold text into lines of text with max line length.

    Input is assumed to consist only of members of a base64
    alphabet (i.e no whitepace). Iteration yields a sequence of lines.
    The line does NOT terminate with a line ending.

    Use the filter_formatting() function to assure the input text
    contains only the members of the alphabet.

    :param text: text containing ONLY characters in a base64 alphabet
    :type text: string
    :param width: number of characters in each wrapped line (default: 64)
    :type width: int
    :returns: generator -- sequence of lines of base64 text.
    """

    for x in xrange(0, len(text), width):
        yield text[x:x + width]


def base64_wrap(text, width=64):
    """Fold text into lines of text with max line length.

    Input is assumed to consist only of members of a base64
    alphabet (i.e no whitepace). Fold the text into lines whose
    line length is width chars long, terminate each line with line
    ending (default is '\n'). Return the wrapped text as a single
    string.

    Use the filter_formatting() function to assure the input text
    contains only the members of the alphabet.

    :param text: text containing ONLY characters in a base64 alphabet
    :type text: string
    :param width: number of characters in each wrapped line (default: 64)
    :type width: int
    :returns: string -- wrapped text.
    """

    buf = cStringIO.StringIO()

    for line in base64_wrap_iter(text, width):
        buf.write(line)
        buf.write('\n')

    text = buf.getvalue()
    buf.close()
    return text