extras-buildsys/server Builder.py,1.25,1.26

Daniel Williams (dcbw) fedora-extras-commits at redhat.com
Sun Jan 22 06:00:42 UTC 2006


Author: dcbw

Update of /cvs/fedora/extras-buildsys/server
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv29361/server

Modified Files:
	Builder.py 
Log Message:
2006-01-22  Dan Williams  <dcbw at redhat.com>

    * server/Builder.py
        - Move most of the building_jobs() logic into another thread to
            combat hanging issues




Index: Builder.py
===================================================================
RCS file: /cvs/fedora/extras-buildsys/server/Builder.py,v
retrieving revision 1.25
retrieving revision 1.26
diff -u -r1.25 -r1.26
--- Builder.py	29 Nov 2005 06:41:51 -0000	1.25
+++ Builder.py	22 Jan 2006 06:00:35 -0000	1.26
@@ -34,6 +34,34 @@
 SUSPEND_TIMEOUT = 'timeout'
 SUSPEND_HARD_ERROR = 'hard-error'
 
+# HACK: This class is a hack to work around SSL hanging issues,
+# which cause the whole server to grind to a halt
+class BuildingJobsCheck(threading.Thread):
+    def __init__(self, server, address):
+        self._server = server
+        self._address = address
+
+        self.done = False
+        self.failed = False
+
+        threading.Thread.__init__(self)
+        self.setName("BuildingJobsCheck: %s" % address)
+
+    def run(self):
+        jobs = {}
+        free_slots = 0
+        try:
+            (jobs, free_slots) = self._server.building_jobs()
+        except (socket.error, socket.timeout, OpenSSL.SSL.SysCallError, OpenSSL.SSL.Error, xmlrpclib.ProtocolError):
+            self.failed = True
+        except xmlrpclib.Fault, e:
+            print "Builder Error (%s) in _building_jobs(): builder replied '%s'" % (self._address, e)
+            self.failed = True
+        self.jobs = jobs
+        self.free_slots = free_slots
+        self.done = True
+
+
 class Builder(threading.Thread):
     """ Tracks all jobs on a builder instance """
 
@@ -65,16 +93,16 @@
             certs['ca_cert'] = self._server_cfg.get_str("SSL", "ca_cert")
             certs['peer_ca_cert'] = self._server_cfg.get_str("SSL", "ca_cert")
 
-        self._server = XMLRPCServerProxy.PlgXMLRPCServerProxy(self._address, certs, timeout=20)
+        self._server = XMLRPCServerProxy.PlgXMLRPCServerProxy(self._address, certs)
         self._server_lock = threading.Lock()
 
+        threading.Thread.__init__(self)
+        self.setName("Builder: %s" % address)
+
         (self._alive, target_list) = self._ping_builder()
         if self._alive:
             self._init_builder(target_list)
 
-        threading.Thread.__init__(self)
-        self.setName("Builder: %s" % address)
-
     def _init_builder(self, target_list):
         self._target_list = target_list
 
@@ -99,18 +127,28 @@
         return num_slots
 
     def _building_jobs(self):
-        jobs = {}
-        try:
-            (jobs, free_slots) = self._server.building_jobs()
-            self._unavail_count = 0
-            self._alive = True
-            self._free_slots = free_slots
-        except (socket.error, socket.timeout, OpenSSL.SSL.SysCallError, OpenSSL.SSL.Error, xmlrpclib.ProtocolError):
-            self._unavail_count = self._unavail_count + 1
-        except xmlrpclib.Fault, e:
-            print "Builder Error (%s) in _building_jobs(): builder replied '%s'" % (self.address, e)
-            self._unavail_count = self._unavail_count + 1
-        return jobs
+        bjc = BuildingJobsCheck(self._server, self.address)
+
+        curtime = time.time()
+        bjc.start()
+
+        # Give the check 10s, otherwise screw it
+        while time.time() - curtime < 10:
+            if bjc.done:
+                break
+            time.sleep(0.5)
+
+        if bjc.done:
+            if not bjc.failed:
+                self._unavail_count = 0
+                self._alive = True
+                self._free_slots = bjc.free_slots
+                return bjc.jobs
+            else:
+                # Error of some kind
+                self._unavail_count = self._unavail_count + 1
+
+        return {}
 
     def _ping_builder(self):
         target_list = []
@@ -309,7 +347,7 @@
                     else:
                         # Wait and ping again
                         self._ping_timeout = time.time()
-
+
                     # Reset current ping interval to default
                     self._cur_ping_interval = self._BUILDER_PING_INTERVAL
                     self._ping_now = False




More information about the fedora-extras-commits mailing list