diff options
author | Michal Minar <miminar@redhat.com> | 2013-11-19 09:37:16 +0100 |
---|---|---|
committer | Michal Minar <miminar@redhat.com> | 2013-11-19 10:25:00 +0100 |
commit | 21ea2fe00a690bbc01ab241eea60d1d0a3f4349b (patch) | |
tree | 2814e0b88147815c81b65a976349f3825c59a81d /src/software | |
parent | f99c9cbfdd68e16516bc527db2b9c8cfc2b83343 (diff) | |
download | openlmi-providers-21ea2fe00a690bbc01ab241eea60d1d0a3f4349b.tar.gz openlmi-providers-21ea2fe00a690bbc01ab241eea60d1d0a3f4349b.tar.xz openlmi-providers-21ea2fe00a690bbc01ab241eea60d1d0a3f4349b.zip |
software: do not allow infinite resurrections
Some jobs may not be completable on some machines. With current
algorithm, in case of worker process failure, the terminated process is
resurrected and job is restarted. This patch takes into account the
number of resurrections done during processing of single job. If this
number exceeds some limit, job is thrown away and exception is raised.
Resolves: rhbz#1031132
Diffstat (limited to 'src/software')
-rw-r--r-- | src/software/lmi/software/yumdb/__init__.py | 37 |
1 files changed, 28 insertions, 9 deletions
diff --git a/src/software/lmi/software/yumdb/__init__.py b/src/software/lmi/software/yumdb/__init__.py index 54013f1..e27278a 100644 --- a/src/software/lmi/software/yumdb/__init__.py +++ b/src/software/lmi/software/yumdb/__init__.py @@ -59,6 +59,11 @@ from lmi.software.yumdb import errors LOG = cmpi_logging.get_logger(__name__) +#: Number of times the worker process will be resurrected for a completion of +#: single job. If the process dies afterwards (while still doing the same job) +#: an exception will be raised. +MAX_RESURRECTIONS = 1 + # ***************************************************************************** # Utilities # ***************************************************************************** @@ -201,6 +206,8 @@ class YumDB(singletonmixin.Singleton): self._expected = [] # {job_id : reply, ... } self._replies = {} + # {job_id : number_of_resurrections} + self._resurrections = {} LOG().trace_info('YumDB initialized') # ************************************************************************* @@ -215,14 +222,16 @@ class YumDB(singletonmixin.Singleton): """ if not self._worker.is_alive(): if self._worker.exitcode < 0: - LOG().error("[jobid=%d] worker" - " process(pid=%d) killed by signal %s", job.jobid, + msg = "worker process(pid=%d) killed by signal %s" % ( self._worker.pid, get_signal_name(-self._process.exitcode)) else: - LOG().error("[jobid=%d] worker" - " process(pid=%d) is dead - exit code: %d", - job.jobid, self._process.pid, self._worker.exitcode) + msg = "worker process(pid=%d) is dead - exit code: %d" % ( + self._process.pid, self._worker.exitcode) + LOG().error("[jobid=%d] %s", job.jobid, msg) with self._reply_lock: + if not job.jobid in self._resurrections: + self._resurrections[job.jobid] = 0 + self._resurrections[job.jobid] += 1 self._process = None LOG().error("[jobid=%d] starting new worker process", job.jobid) self._expected = [] @@ -235,10 +244,18 @@ class YumDB(singletonmixin.Singleton): self._worker.uplink.put(new_session_job) reply = self._worker.downlink.get() log_reply_error(new_session_job, reply) - self._worker.uplink.put(job) - self._expected.append(job.jobid) - # other waiting processes need to resend their requests - self._reply_cond.notifyAll() + if self._resurrections[job.jobid] > MAX_RESURRECTIONS: + LOG().warn("[jobid=%d] process has been resurrected maximum" + " number of times (%d times), cancelling job", job.jobid, + MAX_RESURRECTIONS) + self._reply_cond.notifyAll() + raise errors.TransactionError( + "failed to complete job: %s" % (msg)) + else: + self._worker.uplink.put(job) + self._expected.append(job.jobid) + # other waiting processes need to resend their requests + self._reply_cond.notifyAll() else: LOG().info("[jobid=%d] process is running, waiting some more", job.jobid) @@ -267,6 +284,7 @@ class YumDB(singletonmixin.Singleton): LOG().debug("[jobid=%d] received desired reply", job.jobid) with self._reply_lock: self._expected.remove(job.jobid) + self._resurrections.pop(job.jobid, None) self._reply_cond.notifyAll() return jobout else: @@ -308,6 +326,7 @@ class YumDB(singletonmixin.Singleton): job.jobid) try: self._expected.remove(job.jobid) + self._resurrections.pop(job.jobid, False) except ValueError: LOG().warn("[jobid=%d] reply not in expected list", job.jobid) |