1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
# forkbomb is a module that partitions arbitrary workloads
# among N seperate forks, for a configurable N, and
# collates results upon return, as if it never forked.
#
# Copyright 2007, Red Hat, Inc
# Michael DeHaan <mdehaan@redhat.com>
#
# This software may be freely redistributed under the terms of the GNU
# general public license.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import os
import random # for testing only
import time # for testing only
import shelve
import bsddb
import sys
import tempfile
import fcntl
DEFAULT_FORKS = 4
DEFAULT_CACHE_DIR = "/var/lib/func"
def __get_storage(dir):
"""
Return a tempfile we can use for storing data.
"""
dir = os.path.expanduser(dir)
if not os.path.exists(dir):
os.makedirs(dir)
return tempfile.mktemp(suffix='', prefix='asynctmp', dir=dir)
def __access_buckets(filename,clear,new_key=None,new_value=None):
"""
Access data in forkbomb cache, potentially clearing or
modifying it as required.
"""
internal_db = bsddb.btopen(filename, 'c', 0644 )
handle = open(filename,"r")
fcntl.flock(handle.fileno(), fcntl.LOCK_EX)
storage = shelve.BsdDbShelf(internal_db)
if clear:
storage.clear()
storage.close()
fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
return {}
if not storage.has_key("data"):
storage["data"] = {}
else:
pass
if new_key is not None:
# bsdb is a bit weird about this
newish = storage["data"].copy()
newish[new_key] = new_value
storage["data"] = newish
rc = storage["data"].copy()
storage.close()
fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
return rc
def __bucketize(pool, slots):
"""
Given a pre-existing list of X number of tasks, partition
them into a hash of Y number of slots.
"""
buckets = {}
count = 0
for key in pool:
count = count + 1
slot = count % slots
if not buckets.has_key(slot):
buckets[slot] = []
buckets[slot].append(key)
return buckets
def __with_my_bucket(bucket_number,buckets,what_to_do,filename):
"""
Process all tasks assigned to a given fork, and save
them in the shelf.
"""
things_in_my_bucket = buckets[bucket_number]
results = {}
for thing in things_in_my_bucket:
(nkey,nvalue) = what_to_do(bucket_number,buckets,thing)
__access_buckets(filename,False,nkey,nvalue)
def __forkbomb(mybucket,buckets,what_to_do,filename):
"""
Recursive function to spawn of a lot of worker forks.
"""
nbuckets = len(buckets)
pid = os.fork()
if pid != 0:
if mybucket < (nbuckets-1):
__forkbomb(mybucket+1,buckets,what_to_do,filename)
try:
os.waitpid(pid,0)
except OSError, ose:
if ose.errno == 10:
pass
else:
raise ose
else:
__with_my_bucket(mybucket,buckets,what_to_do,filename)
sys.exit(0)
def __demo(bucket_number, buckets, my_item):
"""
This is a demo handler for test purposes.
It just multiplies all numbers by 1000, but slowly.
"""
# print ">> I am fork (%s) and I am processing item (%s)" % (bucket_number, my_item)
# just to verify forks are not sequential
sleep = random.randrange(0,4)
time.sleep(sleep)
return (my_item, my_item * 1000)
def batch_run(pool,callback,nforks=DEFAULT_FORKS,cachedir=DEFAULT_CACHE_DIR):
"""
Given an array of items (pool), call callback in each one, but divide
the workload over nfork forks. Temporary files used during the
operation will be created in cachedir and subsequently deleted.
"""
if nforks <= 1:
# modulus voodoo gets crazy otherwise and bad things happen
nforks = 2
shelf_file = __get_storage(cachedir)
__access_buckets(shelf_file,True,None)
buckets = __bucketize(pool, nforks)
__forkbomb(1,buckets,callback,shelf_file)
rc = __access_buckets(shelf_file,False,None)
os.remove(shelf_file)
return rc
def __test(nforks=4,sample_size=20):
pool = xrange(0,sample_size)
print batch_run(pool,__demo,nforks=nforks)
if __name__ == "__main__":
__test()
|