func/jobthing.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196

# jobthing is a module that allows for background execution of a task, and
# getting status of that task.  The ultimate goal is to allow ajaxyness
# of GUI apps using Func, and also for extremely long running tasks that
# we don't want to block on as called by scripts using the FunC API.  The
# CLI should not use this.
#
# Copyright 2007, Red Hat, Inc
# Michael DeHaan <mdehaan@redhat.com>
#
# This software may be freely redistributed under the terms of the GNU
# general public license.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

import os
import random # for testing only
import time   # for testing only
import shelve
import bsddb
import sys
import tempfile
import fcntl
import forkbomb
import utils

JOB_ID_RUNNING = 0
JOB_ID_FINISHED = 1
JOB_ID_LOST_IN_SPACE = 2
JOB_ID_PARTIAL = 3

# how long to retain old job records in the job id database
RETAIN_INTERVAL = 60 * 60    

# where to store the internal job id database
CACHE_DIR = "/var/lib/func"

def __update_status(jobid, status, results, clear=False):
    return __access_status(jobid=jobid, status=status, results=results, write=True)

def __get_status(jobid):
    return __access_status(jobid=jobid, write=False)

def purge_old_jobs():
    return __access_status(purge=True)

def __purge_old_jobs(storage):
    """
    Deletes jobs older than RETAIN_INTERVAL seconds.  
    MINOR FIXME: this probably should be a more intelligent algorithm that only
    deletes jobs if the database is too big and then only the oldest jobs
    but this will work just as well.
    """
    nowtime = time.time()
    for x in storage.keys():
        # minion jobs have "-minion" in the job id so disambiguation so we need to remove that
        jobkey = x.replace("-","").replace("minion","")
        create_time = float(jobkey)
        if nowtime - create_time > RETAIN_INTERVAL:
            del storage[x]

def __access_status(jobid=0, status=0, results=0, clear=False, write=False, purge=False):

    dir = os.path.expanduser(CACHE_DIR)
    if not os.path.exists(dir):
        os.makedirs(dir)
    filename = os.path.join(dir,"status-%s" % os.getuid()) 

    internal_db = bsddb.btopen(filename, 'c', 0644 )
    handle = open(filename,"r")
    fcntl.flock(handle.fileno(), fcntl.LOCK_EX)
    storage = shelve.BsdDbShelf(internal_db)


    if clear:
        storage.clear()
        storage.close()
        fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
        return {}
    
    if purge or write:
        __purge_old_jobs(storage)

    if write:
        results = utils.remove_exceptions(results)
        # print "DEBUG: status=%s" % status
        # print "DEBUG: results=%s" % results
        storage[str(jobid)] = (status, results)
        rc = jobid
    elif not purge:
        if storage.has_key(str(jobid)):
            # tuple of (status, results)

            rc = storage[str(jobid)]
        else:
            rc = (JOB_ID_LOST_IN_SPACE, 0)
    else:
        rc = 0

    storage.close()
    fcntl.flock(handle.fileno(), fcntl.LOCK_UN)

    return rc

def batch_run(server, process_server, nforks):
    """
    This is the method used by the overlord side usage of jobthing.
    Minion side usage will use minion_async_run instead.

    Given an array of items (pool), call callback in each one, but divide
    the workload over nfork forks.  Temporary files used during the
    operation will be created in cachedir and subsequently deleted.    
    """
   
    job_id = time.time()
    pid = os.fork()
    if pid != 0:
        #print "DEBUG: UPDATE STATUS: r1: %s" % job_id
        __update_status(job_id, JOB_ID_RUNNING, -1)
        return job_id
    else:
        # kick off the job
        __update_status(job_id, JOB_ID_RUNNING,  -1)
        results = forkbomb.batch_run(server, process_server, nforks)
        
        # we now have a list of job id's for each minion, kill the task
        __update_status(job_id, JOB_ID_PARTIAL, results)
        sys.exit(0)

def minion_async_run(function_ref, args):
    """
    This is a simpler invocation for minion side async usage.
    """
    # to avoid confusion of job id's (we use the same job database)
    # minion jobs contain the string "minion".  
    job_id = "%s-minion" % time.time()
    pid = os.fork()
    if pid != 0:
        __update_status(job_id, JOB_ID_RUNNING, -1)
        return job_id
    else:
        __update_status(job_id, JOB_ID_RUNNING,  -1)
        results = function_ref(*args)
        __update_status(job_id, JOB_ID_FINISHED, results)
        sys.exit(0)

def job_status(jobid, client_class=None):
 
    # NOTE: client_class is here to get around some evil circular reference
    # type stuff.  This is intended to be called by minions (who can leave it None)
    # or by the Client module code (which does not need to be worried about it).  API
    # users should not be calling jobthing.py methods directly.
   
    got_status = __get_status(jobid)

    # if the status comes back as JOB_ID_PARTIAL what we have is actually a hash
    # of hostname/minion-jobid pairs.  Instantiate a client handle for each and poll them
    # for their actual status, filling in only the ones that are actually done.

    (interim_rc, interim_results) = got_status

    if interim_rc == JOB_ID_PARTIAL:

        partial_results = {}

        for host in interim_results.keys():

            minion_job = interim_results[host]
            client = client_class(host, noglobs=True, async=False)
            # print "DEBUG: client: %s" % client_class
            minion_result = client.jobs.job_status(minion_job)
            # print "DEBUG: minion: %s" % minion_result
            (minion_interim_rc, minion_interim_result) = minion_result

            some_missing = False
            if minion_interim_rc == JOB_ID_FINISHED:
                partial_results[host] = minion_interim_result 
            else: 

                some_missing = True

        if some_missing:
            return (JOB_ID_PARTIAL, partial_results)
        else:
            return (JOB_ID_FINISHED, partial_results)

    else:
        return got_status
   
    # of job id's on the minion in results.

if __name__ == "__main__":
    __test()