extras-buildsys/server ArchJob.py, 1.27, 1.28 Builder.py, 1.34, 1.35 PackageJob.py, 1.45, 1.46

Daniel Williams (dcbw) fedora-extras-commits at redhat.com
Wed May 3 04:04:31 UTC 2006


Author: dcbw

Update of /cvs/fedora/extras-buildsys/server
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv15275/server

Modified Files:
	ArchJob.py Builder.py PackageJob.py 
Log Message:
2006-05-03  Dan Williams  <dcbw at redhat.com>

    More builder/server comm rework;
    - Add job status command
    - implement repo unlock command
    - fix bugs




Index: ArchJob.py
===================================================================
RCS file: /cvs/fedora/extras-buildsys/server/ArchJob.py,v
retrieving revision 1.27
retrieving revision 1.28
diff -u -r1.27 -r1.28
--- ArchJob.py	20 Mar 2006 12:47:01 -0000	1.27
+++ ArchJob.py	3 May 2006 04:04:28 -0000	1.28
@@ -38,12 +38,10 @@
 class ArchJob:
     """ Tracks a single build instance for a single arch on a builder """
 
-    def __init__(self, builder, cfg, server, par_job, jobid, target_dict):
+    def __init__(self, builder, par_job, jobid, target_dict):
         self.par_job = par_job
-        self.builder = builder
+        self._builder = builder
         self._repo = par_job.repo()
-        self._server = server
-        self._use_ssl = cfg.get_bool("Builders", "use_ssl")
         self.jobid = jobid
         self._status = 'starting'
         self._builder_status = ''
@@ -61,15 +59,6 @@
         self._die_lock = threading.Lock()
         self._prepping = False
 
-        # SSL certificate and key filenames
-        if self._use_ssl:
-            self._certs = {}
-            self._certs['key_and_cert'] = cfg.get_str("SSL", "server_key_and_cert")
-            self._certs['ca_cert'] = cfg.get_str("SSL", "ca_cert")
-            self._certs['peer_ca_cert'] = cfg.get_str("SSL", "ca_cert")
-        else:
-            self._certs = None
-
     def failure_noticed(self):
         return self._failure_noticed
 
@@ -98,12 +87,15 @@
     def arch(self):
         return self._target_dict['arch']
 
+    def builder(self):
+        return self._builder
+
     def _to_dict(self):
         attrdict = {}
         attrdict['jobid'] = self.jobid
         attrdict['parent_uid'] = self.par_job.uid
         attrdict['arch'] = self._target_dict['arch']
-        addr = self.builder.address()
+        (ip, addr) = self._builder.address()
         # for some reason, splithost doesn't like the protocol
         # method, you have to give it a string starting with "//"
         if addr.startswith("http"):
@@ -137,22 +129,6 @@
             self.par_job.bm.queue_archjob_status_update(self.jobid, attrdict)
             del attrdict
 
-    def _send_repo_unlocked(self):
-        success = False
-        try:
-            self._server.repo_unlocked(self.jobid)
-            success = True
-        except socket.error, e:
-            if not CommonErrors.canIgnoreSocketError(e):
-                print "%s (%s/%s): [ %s ] Unknown error sending repo unlocked: '%s'" % (self.par_job.uid,
-                            self.par_job.package, self._target_dict['arch'], self.builder.address(), e)
-        except socket.timeout, e:
-            print "%s (%s/%s): [ %s ] Timeout sending repo unlocked: '%s'.  Trying again." % (self.par_job.uid,
-                        self.par_job.package, self._target_dict['arch'], self.builder.address(), e)
-        except xmlrpclib.ProtocolError, e:
-            pass
-        return success
-
     def _dl_files(self):
         files = []
         success = False
@@ -162,10 +138,10 @@
         except socket.error, e:
             if not CommonErrors.canIgnoreSocketError(e):
                 print "%s (%s/%s): [ %s ] Unknown error getting file list: '%s'" % (self.par_job.uid,
-                            self.par_job.package, self._target_dict['arch'], self.builder.address(), e)
+                            self.par_job.package, self._target_dict['arch'], self._builder.address(), e)
         except socket.timeout, e:
             print "%s (%s/%s): [ %s ] Timeout getting file list: '%s'" % (self.par_job.uid,
-                        self.par_job.package, self._target_dict['arch'], self.builder.address(), e)
+                        self.par_job.package, self._target_dict['arch'], self._builder.address(), e)
         except xmlrpclib.ProtocolError, e:
             pass
         return (success, files)
@@ -193,10 +169,10 @@
     def _status_repo_unlock(self):
         # Builder will be in 'downloaded' state until
         # it notices that the repo has been unlocked 
-        if self._send_repo_unlocked():
-            self._prepping = True
-            if self._builder_status != 'downloaded':
-                self._set_status('running')
+        self._builder.unlock_repo_for_job(self._uniqid)
+        self._prepping = True
+        if self._builder_status != 'downloaded':
+            self._set_status('running')
 
     def _status_running(self):
         if self._builder_status != 'prepping':
@@ -279,7 +255,7 @@
                     dl_dict[DL_STATUS] = STATUS_INPROGRESS
                 except FileDownloader.FileNameException, e:
                     print "%s (%s/%s): [ %s ] Bad file name error getting %s: '%s'" % (self.par_job.uid,
-                                self.par_job.package, self._target_dict['arch'], self.builder.address(), url, e)
+                                self.par_job.package, self._target_dict['arch'], self._builder.address(), url, e)
                     # Hard error, we don't retry this one
                     dl_dict[DL_STATUS] = STATUS_ERROR
                 break


Index: Builder.py
===================================================================
RCS file: /cvs/fedora/extras-buildsys/server/Builder.py,v
retrieving revision 1.34
retrieving revision 1.35
diff -u -r1.34 -r1.35
--- Builder.py	28 Apr 2006 03:17:41 -0000	1.34
+++ Builder.py	3 May 2006 04:04:28 -0000	1.35
@@ -181,6 +181,41 @@
         builder_dict['free_slots'] = self._free_slots
         return builder_dict
 
+    def _handle_building_jobs(self, cmd):
+        building_jobs = cmd.jobs()
+        reported_uniqids = []
+        new_cmds = []
+        print "Building Jobs: %s" % building_jobs
+        for item in building_jobs:
+            (uniqid, status) = cmd.get_job(item)
+            try:
+                job = self._jobs[uniqid]
+                job.set_builder_job_status(status)
+#                reported_uniqids.append(uniqid)
+            except KeyError:
+                pass
+
+            # We have to check jobs that weren't reported
+            # as 'building' by the builder, since the job
+            # may have finished on the builder and was
+            # removed from the building job list before we
+            # were able to know that it was done.  HACK
+            self._prepping_jobs = False
+            for jobid in self._jobs.keys():
+                # If the builder didn't report this job as building,
+                # and its not done, explicitly get its status
+                job = self._jobs[jobid]
+                if jobid not in reported_uniqids and job.get_status() != 'done':
+                    new_cmds.append(Commands.PlgCommandJobStatus(jobid, self._seq_gen.next()))                    
+
+                # Check for prepping jobs
+                if job.prepping():
+                    self._prepping_jobs = True
+
+        del reported_uniqids
+        print "New Commands: %s" % new_cmds
+        return new_cmds
+
 # HACK: This class is a hack to work around SSL hanging issues,
 # which cause the whole server to grind to a halt
 class BuildingJobsCheck(threading.Thread):
@@ -450,6 +485,18 @@
     def _init_builder(self, target_list):
         self._target_list = target_list
 
+    def unlock_repo_for_job(self, uniqid):
+        """Called by an archjob to request the sending of a RepoUnlocked
+        command to the builder for a particular archjob."""
+
+        cmd = Commands.PlgCommandUnlockRepo(uniqid, self._seq_gen.next())
+        self._lock.acquire()
+        self._cmd_queue.append(cmd)
+        self._lock.release()
+
+    def request_job_files(self, archjob):
+        pass
+
     def _handle_new_job_ack(self, ack):
         """Handle a NewJobAck command by finding the original command
         sent to the builder, removing it from the command queue, and notifying
@@ -457,16 +504,18 @@
 
         old_cmd = None
         self._lock.acquire()
-        for old_cmd in self._cmd_queue:
-            if old_cmd.seq() == ack.acked_seq() and isinstance(old_cmd, Commands.PlgCommandNewJobReq):
-                self._cmd_queue.remove(old_cmd)
+        for queued_cmd in self._cmd_queue:
+            if queued_cmd.seq() == ack.acked_seq() and isinstance(queued_cmd, Commands.PlgCommandNewJobReq):
+                old_cmd = queued_cmd
+                self._cmd_queue.remove(queued_cmd)
                 break
         self._lock.release()
 
         if old_cmd:
             parent = old_cmd.parent_job()
-            archjob = ArchJob.ArchJob(self, parent, ack.archjob_id(), old_cmd.target_dict())
-            self._jobs[jobid] = archjob
+            archjob_id = ack.archjob_id()
+            archjob = ArchJob.ArchJob(self, parent, archjob_id, old_cmd.target_dict())
+            self._jobs[archjob_id] = archjob
             parent.add_arch_job(archjob)
 
     def _dispatch_command(self, cmd):
@@ -482,6 +531,13 @@
             self._lock.release()
         elif isinstance(cmd, Commands.PlgCommandNewJobAck):
             self._handle_new_job_ack(cmd)
+        elif isinstance(cmd, Commands.PlgCommandBuildingJobs):
+            status_reqs = self._handle_building_jobs(cmd)
+            # Add any additional status requests onto our pending command queue
+            if len(status_reqs) > 0:
+                self._lock.acquire()
+                self._cmd_queue = self._cmd_queue + status_reqs
+                self._lock.release()
         else:
             print "Builder Error (%s): unhandled command '%s'" % (self._address, cmd.name())
 


Index: PackageJob.py
===================================================================
RCS file: /cvs/fedora/extras-buildsys/server/PackageJob.py,v
retrieving revision 1.45
retrieving revision 1.46
diff -u -r1.45 -r1.46
--- PackageJob.py	24 Mar 2006 01:33:31 -0000	1.45
+++ PackageJob.py	3 May 2006 04:04:28 -0000	1.46
@@ -672,7 +672,7 @@
             t.start()
 
         self._archjobs_lock.release()
-        log("%s (%s/%s): %s - UID is %s" % (self.uid, self.package, jobarch, job.builder.address(), job.jobid))
+        log("%s (%s/%s): %s - UID is %s" % (self.uid, self.package, jobarch, job.builder().address(), job.jobid))
 
     def remove_arch_job(self, job):
         """ Removes an arch job when its builder is no longer responding """
@@ -801,7 +801,7 @@
                             msg = "Job failed on arch %s\n" % jobarch
                         elif job.download_failed():
                             msg = "Job failed on arch %s: couldn't download result files from builder '%s'.\n " \
-                            "Please contact the build system administrator." % (jobarch, job.builder.address())
+                            "Please contact the build system administrator." % (jobarch, job.builder().address())
                         elif job.internal_failure():
                             msg = "Job failed on arch %s: there was an internal build system failure.\n " \
                             "Please contact the build system administrator." % jobarch




More information about the fedora-extras-commits mailing list