summaryrefslogtreecommitdiffstats
path: root/src/software
diff options
context:
space:
mode:
authorMichal Minar <miminar@redhat.com>2013-11-19 09:37:16 +0100
committerMichal Minar <miminar@redhat.com>2013-11-19 10:25:00 +0100
commit21ea2fe00a690bbc01ab241eea60d1d0a3f4349b (patch)
tree2814e0b88147815c81b65a976349f3825c59a81d /src/software
parentf99c9cbfdd68e16516bc527db2b9c8cfc2b83343 (diff)
downloadopenlmi-providers-21ea2fe00a690bbc01ab241eea60d1d0a3f4349b.tar.gz
openlmi-providers-21ea2fe00a690bbc01ab241eea60d1d0a3f4349b.tar.xz
openlmi-providers-21ea2fe00a690bbc01ab241eea60d1d0a3f4349b.zip
software: do not allow infinite resurrections
Some jobs may not be completable on some machines. With current algorithm, in case of worker process failure, the terminated process is resurrected and job is restarted. This patch takes into account the number of resurrections done during processing of single job. If this number exceeds some limit, job is thrown away and exception is raised. Resolves: rhbz#1031132
Diffstat (limited to 'src/software')
-rw-r--r--src/software/lmi/software/yumdb/__init__.py37
1 files changed, 28 insertions, 9 deletions
diff --git a/src/software/lmi/software/yumdb/__init__.py b/src/software/lmi/software/yumdb/__init__.py
index 54013f1..e27278a 100644
--- a/src/software/lmi/software/yumdb/__init__.py
+++ b/src/software/lmi/software/yumdb/__init__.py
@@ -59,6 +59,11 @@ from lmi.software.yumdb import errors
LOG = cmpi_logging.get_logger(__name__)
+#: Number of times the worker process will be resurrected for a completion of
+#: single job. If the process dies afterwards (while still doing the same job)
+#: an exception will be raised.
+MAX_RESURRECTIONS = 1
+
# *****************************************************************************
# Utilities
# *****************************************************************************
@@ -201,6 +206,8 @@ class YumDB(singletonmixin.Singleton):
self._expected = []
# {job_id : reply, ... }
self._replies = {}
+ # {job_id : number_of_resurrections}
+ self._resurrections = {}
LOG().trace_info('YumDB initialized')
# *************************************************************************
@@ -215,14 +222,16 @@ class YumDB(singletonmixin.Singleton):
"""
if not self._worker.is_alive():
if self._worker.exitcode < 0:
- LOG().error("[jobid=%d] worker"
- " process(pid=%d) killed by signal %s", job.jobid,
+ msg = "worker process(pid=%d) killed by signal %s" % (
self._worker.pid, get_signal_name(-self._process.exitcode))
else:
- LOG().error("[jobid=%d] worker"
- " process(pid=%d) is dead - exit code: %d",
- job.jobid, self._process.pid, self._worker.exitcode)
+ msg = "worker process(pid=%d) is dead - exit code: %d" % (
+ self._process.pid, self._worker.exitcode)
+ LOG().error("[jobid=%d] %s", job.jobid, msg)
with self._reply_lock:
+ if not job.jobid in self._resurrections:
+ self._resurrections[job.jobid] = 0
+ self._resurrections[job.jobid] += 1
self._process = None
LOG().error("[jobid=%d] starting new worker process", job.jobid)
self._expected = []
@@ -235,10 +244,18 @@ class YumDB(singletonmixin.Singleton):
self._worker.uplink.put(new_session_job)
reply = self._worker.downlink.get()
log_reply_error(new_session_job, reply)
- self._worker.uplink.put(job)
- self._expected.append(job.jobid)
- # other waiting processes need to resend their requests
- self._reply_cond.notifyAll()
+ if self._resurrections[job.jobid] > MAX_RESURRECTIONS:
+ LOG().warn("[jobid=%d] process has been resurrected maximum"
+ " number of times (%d times), cancelling job", job.jobid,
+ MAX_RESURRECTIONS)
+ self._reply_cond.notifyAll()
+ raise errors.TransactionError(
+ "failed to complete job: %s" % (msg))
+ else:
+ self._worker.uplink.put(job)
+ self._expected.append(job.jobid)
+ # other waiting processes need to resend their requests
+ self._reply_cond.notifyAll()
else:
LOG().info("[jobid=%d] process is running, waiting some more",
job.jobid)
@@ -267,6 +284,7 @@ class YumDB(singletonmixin.Singleton):
LOG().debug("[jobid=%d] received desired reply", job.jobid)
with self._reply_lock:
self._expected.remove(job.jobid)
+ self._resurrections.pop(job.jobid, None)
self._reply_cond.notifyAll()
return jobout
else:
@@ -308,6 +326,7 @@ class YumDB(singletonmixin.Singleton):
job.jobid)
try:
self._expected.remove(job.jobid)
+ self._resurrections.pop(job.jobid, False)
except ValueError:
LOG().warn("[jobid=%d] reply not in expected list",
job.jobid)