summaryrefslogtreecommitdiffstats
path: root/khashmir/khash.py
blob: 2750a7dbfea3851376421437906e5e7aa4c2e064 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# The contents of this file are subject to the BitTorrent Open Source License
# Version 1.1 (the License).  You may not copy or use this file, in either
# source code or executable form, except in compliance with the License.  You
# may obtain a copy of the License at http://www.bittorrent.com/license/.
#
# Software distributed under the License is distributed on an AS IS basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied.  See the License
# for the specific language governing rights and limitations under the
# License.

from sha import sha
from random import randint

#this is ugly, hopefully os.entropy will be in 2.4
try:
    from entropy import entropy
except ImportError:
    def entropy(n):
        s = ''
        for i in range(n):
            s += chr(randint(0,255))
        return s

def intify(hstr):
    """20 bit hash, big-endian -> long python integer"""
    assert len(hstr) == 20
    return long(hstr.encode('hex'), 16)

def stringify(num):
    """long int -> 20-character string"""
    str = hex(num)[2:]
    if str[-1] == 'L':
        str = str[:-1]
    if len(str) % 2 != 0:
        str = '0' + str
    str = str.decode('hex')
    return (20 - len(str)) *'\x00' + str
    
def distance(a, b):
    """distance between two 160-bit hashes expressed as 20-character strings"""
    return intify(a) ^ intify(b)


def newID():
    """returns a new pseudorandom globally unique ID string"""
    h = sha()
    h.update(entropy(20))
    return h.digest()

def newIDInRange(min, max):
    return stringify(randRange(min,max))
    
def randRange(min, max):
    return min + intify(newID()) % (max - min)
    
def newTID():
    return randRange(-2**30, 2**30)

### Test Cases ###
import unittest

class NewID(unittest.TestCase):
    def testLength(self):
        self.assertEqual(len(newID()), 20)
    def testHundreds(self):
        for x in xrange(100):
            self.testLength

class Intify(unittest.TestCase):
    known = [('\0' * 20, 0),
            ('\xff' * 20, 2L**160 - 1),
            ]
    def testKnown(self):
        for str, value in self.known: 
            self.assertEqual(intify(str),  value)
    def testEndianessOnce(self):
        h = newID()
        while h[-1] == '\xff':
            h = newID()
        k = h[:-1] + chr(ord(h[-1]) + 1)
        self.assertEqual(intify(k) - intify(h), 1)
    def testEndianessLots(self):
        for x in xrange(100):
            self.testEndianessOnce()

class Disantance(unittest.TestCase):
    known = [
            (("\0" * 20, "\xff" * 20), 2**160L -1),
            ((sha("foo").digest(), sha("foo").digest()), 0),
            ((sha("bar").digest(), sha("bar").digest()), 0)
            ]
    def testKnown(self):
        for pair, dist in self.known:
            self.assertEqual(distance(pair[0], pair[1]), dist)
    def testCommutitive(self):
        for i in xrange(100):
            x, y, z = newID(), newID(), newID()
            self.assertEqual(distance(x,y) ^ distance(y, z), distance(x, z))
        
class RandRange(unittest.TestCase):
    def testOnce(self):
        a = intify(newID())
        b = intify(newID())
        if a < b:
            c = randRange(a, b)
            self.assertEqual(a <= c < b, 1, "output out of range %d  %d  %d" % (b, c, a))
        else:
            c = randRange(b, a)
            assert b <= c < a, "output out of range %d  %d  %d" % (b, c, a)

    def testOneHundredTimes(self):
        for i in xrange(100):
            self.testOnce()



if __name__ == '__main__':
    unittest.main()