extras-buildsys-temp/automation2/server CONFIG.py, NONE, 1.1 aw_manager.py, 1.2, 1.3 bm_server.py, 1.1, 1.2 buildjob.py, 1.1, 1.2 buildmaster.py, 1.1, 1.2 bm_server_config.py, 1.1, NONE

Daniel Williams (dcbw) fedora-extras-commits at redhat.com
Thu May 12 16:48:17 UTC 2005


Author: dcbw

Update of /cvs/fedora/extras-buildsys-temp/automation2/server
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv27643/server

Modified Files:
	aw_manager.py bm_server.py buildjob.py buildmaster.py 
Added Files:
	CONFIG.py 
Removed Files:
	bm_server_config.py 
Log Message:
2005-05-12  Dan Williams  <dcbw at redhat.com>

    * Add ability to drop ArchWelders and the jobs they are currently building,
        and to find them again when kicked to do so.  To do this, config file for
        the server got changed to CONFIG so we can reload(CONFIG) and get new
        builders on the fly




--- NEW FILE CONFIG.py ---
# Configuration file for buildmaster.py

config_opts = {}
config_opts['hostname'] = "localhost"
config_opts['email_to_domain'] = "redhat.com"
config_opts['email_from'] = "buildsys at fedoraproject.org"
config_opts['stages_root'] = "/rpmbuild/extras/stages"
config_opts['pkg_cvs_root'] = ":gserver:cvs.devel.redhat.com:/cvs/dist"
config_opts['pkg_cvs_rsh'] = "/usr/kerberos/bin/krsh"
config_opts['cvs_cmd'] = "/usr/bin/cvs"
config_opts['make_cmd'] = "/usr/bin/make"
config_opts['tmpdir'] = "/tmp"
config_opts['redhat_internal_cvs'] = 1
config_opts['log_url'] = "http://foo.foo.org/logs/"

config_opts['targets'] = {  'FC-3' : ['i386', 'x86_64'],
                            'devel' : ['i386']
                         }

config_opts['builders'] = [ 'http://127.0.0.1:8888' ]

def get(key):
    if config_opts.has_key(key):
        return config_opts[key]
    else:
        print "Bad request for key '%s'" % (key)
        exit (1)


Index: aw_manager.py
===================================================================
RCS file: /cvs/fedora/extras-buildsys-temp/automation2/server/aw_manager.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- aw_manager.py	12 May 2005 00:42:14 -0000	1.2
+++ aw_manager.py	12 May 2005 16:48:15 -0000	1.3
@@ -18,13 +18,22 @@
 import time
 import string
 import xmlrpclib
-from bm_server_config import CONFIG
+import CONFIG
 import base64
+import sys
+import socket
+
+def status_is_finished(status):
+    if status == 'failed' or status == 'done' or status == 'killed':
+        return True
+    return False
+
 
 class ArchWelderJob:
     """ Tracks a single build instance for a single arch on an ArchWelder """
 
-    def __init__(self, awi, server, srpm, target, mydir, arch):
+    def __init__(self, parent_job, awi, server, srpm, target, mydir, arch):
+        self.parent_job = parent_job
         self.awi = awi
         self.jobid = None
         self.status = None
@@ -47,7 +56,7 @@
             return True
 
     def update_status(self):
-        if self.status == 'failed' or self.status == 'done':
+        if status_is_finished(self.status):
             return
 
         status = ''
@@ -58,6 +67,11 @@
         self.status = status
         return self.status
 
+    def server_gone(self):
+        if status_is_finished(self.status):
+            return
+        self.parent_job.job_server_gone(self)
+
     def die(self):
         self._server.die(self.jobid)
         self.status = 'killed'
@@ -73,11 +87,13 @@
 class ArchWelderInstance:
     """ Tracks an single arch on an ArchWelder """
 
-    def __init__(self, address, arch):
+    def __init__(self, awm, address, arch):
+        self._awm = awm
         self._jobs = []
         self._arch = arch
         self._address = address
         self._server = xmlrpclib.Server(self._address)
+        self._unavail_count = 0
         self._cur_job = self._get_cur_job()
 
     def arch(self):
@@ -86,8 +102,8 @@
     def address(self):
         return self._address
     
-    def new_job(self, srpm, target, mydir):
-        return ArchWelderJob(self, self._server, srpm, target, mydir, self._arch)
+    def new_job(self, parent_job, srpm, target, mydir):
+        return ArchWelderJob(parent_job, self, self._server, srpm, target, mydir, self._arch)
 
     def track_job(self, job):
         self._jobs.append(job)
@@ -96,9 +112,14 @@
         cur_job = None
         try:
             cur_job = self._server.get_cur_job()
-        except Exception, e:
-            print "XMLAW: got error '%s' from AW during get_cur_job()" % e
+        except socket.error, e:
+            # Check for "Connection refused" or "Connection reset by peer"
+            if e[0] == 111 or e[0] == 104:
+                self._unavail_count = self._unavail_count + 1
+            else:
+                print "XMLAW: got error '%s' from AW during get_cur_job()" % e
         else:
+            self._unavail_count = 0
             if cur_job == 0:
                 cur_job = None
         return cur_job
@@ -109,6 +130,17 @@
         # Update status of all jobs
         for j in self._jobs:
             j.update_status()
+
+        # If we haven't been able to contact the ArchWelder for a bit, kill build
+        # jobs on this ArchWelder
+        if self._unavail_count > 2:
+            for job in self._jobs:
+                job.server_gone()
+                del job
+            # Return 1 to indicate we should be killed
+            return 1
+
+        return 0
     
     def available(self):
         if self._cur_job:
@@ -119,7 +151,7 @@
 class ArchWelderManager:
     def __init__(self):
         # List of addresses of possible builders
-        self.possible_aw = CONFIG('builders')
+        self.possible_aw = CONIFG.get('builders')
 
         # Dict:  arches => available builders
         # Like so:  [ 'i386':['10.0.0.1', '10.0.0.2'],
@@ -127,12 +159,25 @@
         #           ]
         self.running_aw = {}
 
+        print "-----------------------------------------------------"
+        print " Looking for ArchWelders..."
+        self.update_archwelder_instances()
+        print "-----------------------------------------------------\n"
+
+    def update_archwelder_instances(self):
         # Figure out which archwelders are alive, and what they support
         # We create a separate archwelder instance for each arch on each builder,
         # even though both instances talk to the same XMLRPC server on the builder
-        print "-----------------------------------------------------"
-        print " Looking for ArchWelders..."
         for address in self.possible_aw:
+            # If the address is already in our running_aw list, skip it
+            skip = False
+            for awi_list in self.running_aw.values():
+                for awi in awi_list:
+                    if address == awi.address():
+                        skip = True
+            if skip == True:
+                continue
+
             arches = None
             server = xmlrpclib.Server(address)
             try:
@@ -141,32 +186,35 @@
                 pass
             if arches:
                 arches.append('noarch')
-                print "   Found ArchWelder '%s' supporting arches %s." % (address, string.join(arches))
+                print "   New AW: '%s' [%s]" % (address, string.join(arches))
                 for a in arches:
                     if not self.running_aw.has_key(a):
                         self.running_aw[a] = []
-                    awi = ArchWelderInstance(address, a)
+                    awi = ArchWelderInstance(self, address, a)
                     self.running_aw[a].append(awi)
             else:
                 self.possible_aw.remove(address)
             del server
-        print "-----------------------------------------------------\n"
 
     def process(self):
+        """ Allow each ArchWelderInstance to update its status and do some processing """
         for awi_list in self.running_aw.values():
             for awi in awi_list:
-                awi.process()
+                if awi.process() == 1:
+                    # Remove the ArchWelderInstance from our lists
+                    print "Removing ArchWelder '%s'/%s because it timed out." % (awi.address(), awi.arch())
+                    awi_list.remove(awi)
 
     def track_job(self, job):
         if job:
-            awi = job.awi.track_job(job)
+            job.awi.track_job(job)
 
-    def new_job_on_arch(self, arch, srpm, target, mydir):
+    def new_job_on_arch(self, parent_job, arch, srpm, target, mydir):
         """ Create a job on a free builder for this arch """
 
         if self.running_aw.has_key(arch):
             for aw in self.running_aw[arch]:
                 if aw.available():
-                    return aw.new_job(srpm, target, mydir)
+                    return aw.new_job(parent_job, srpm, target, mydir)
         return None
 


Index: bm_server.py
===================================================================
RCS file: /cvs/fedora/extras-buildsys-temp/automation2/server/bm_server.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- bm_server.py	11 May 2005 19:48:57 -0000	1.1
+++ bm_server.py	12 May 2005 16:48:15 -0000	1.2
@@ -17,7 +17,7 @@
 
 
 import time
-from bm_server_config import CONFIG
+import CONFIG
 import socket
 import SimpleXMLRPCServer
 import xmlrpclib
@@ -38,12 +38,12 @@
     if not subject:
         subject = 'Build Result: %s' % cvs_tag
     msg['Subject'] = subject
-    msg['From'] = CONFIG('email_from')
-    email_to = '%s@%s' % (username, CONFIG('email_to_domain'))
+    msg['From'] = CONFIG.get('email_from')
+    email_to = '%s@%s' % (username, CONFIG.get('email_to_domain'))
     msg['To'] = email_to
     s = smtplib.SMTP()
     s.connect()
-    s.sendmail(CONFIG('email_from'), [email_to], msg.as_string())
+    s.sendmail(CONFIG.get('email_from'), [email_to], msg.as_string())
     s.close()
 
 
@@ -63,7 +63,7 @@
 
         print "Request to enqueue '%s' tag '%s' for target '%s' (user '%s')" \
                 % (package, cvs_tag, target, username)
-        targets = CONFIG('targets')
+        targets = CONFIG.get('targets')
         if not targets.has_key(target):
             print "Error setting up build for %s on %s: target does not exist."\
                     % (cvs_tag, target)
@@ -102,6 +102,14 @@
             job_list.append(tempX)
         return job_list
 
+    def look_for_archwelders(self):
+        reload(CONFIG)
+        print "-----------------------------------------------------"
+        print " Looking for ArchWelders..."
+        self.awm.update_archwelder_instances()
+        print "-----------------------------------------------------\n"
+        return 0
+
 
 class MyXMLRPCServer(SimpleXMLRPCServer.SimpleXMLRPCServer):
     """ XMLRPC server subclass that turns on SO_REUSEADDR """
@@ -124,9 +132,9 @@
 
     # Create the BuildMaster XMLRPC server
     xmlrpc_bm = XMLRPCBuildMaster(awm)
-    bm_server = MyXMLRPCServer((CONFIG('hostname'), 8887))
+    bm_server = MyXMLRPCServer((CONFIG.get('hostname'), 8887))
     bm_server.register_instance(xmlrpc_bm)
-    print "BuildMaster accepting requests on %s:8887.\n" % CONFIG('hostname')
+    print "BuildMaster accepting requests on %s:8887.\n" % CONFIG.get('hostname')
     try:
         bm_server.serve_forever()
     except Exception:


Index: buildjob.py
===================================================================
RCS file: /cvs/fedora/extras-buildsys-temp/automation2/server/buildjob.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- buildjob.py	11 May 2005 19:48:57 -0000	1.1
+++ buildjob.py	12 May 2005 16:48:15 -0000	1.2
@@ -31,13 +31,13 @@
 import string
 import SimpleXMLRPCServer
 import xmlrpclib
-from bm_server_config import CONFIG
+import CONFIG
 import socket
 from aw_manager import ArchWelderManager
 from aw_manager import ArchWelderJob
 
-os.environ['CVSROOT'] = CONFIG('pkg_cvs_root')
-os.environ['CVS_RSH'] = CONFIG('pkg_cvs_rsh')
+os.environ['CVSROOT'] = CONFIG.get('pkg_cvs_root')
+os.environ['CVS_RSH'] = CONFIG.get('pkg_cvs_rsh')
 
 DEBUG = True
 def debugprint(stuff=''):
@@ -75,7 +75,7 @@
         self.username = username
         self.starttime = time.time()
         self.endtime = None
-        self.stages_root = CONFIG('stages_root')
+        self.stages_root = CONFIG.get('stages_root')
         self.package = package
         self.cvs_tag = cvs_tag
         self.target = target
@@ -91,7 +91,7 @@
         
     def arch_handling(self, hdr):
         archs = []
-        targets = CONFIG('targets')
+        targets = CONFIG.get('targets')
         buildable_arches = targets[self.target]
         
         ba = hdr['buildarchs']
@@ -137,9 +137,9 @@
         
     def _checkout(self):
         self.curstage = 'checkout'
-        self.tmpdir = tempfile.mkdtemp(prefix=self.cvs_tag, dir=CONFIG('tmpdir'))
+        self.tmpdir = tempfile.mkdtemp(prefix=self.cvs_tag, dir=CONFIG.get('tmpdir'))
         os.chdir(self.tmpdir)
-        cmd = '%s co -r %s %s' % (CONFIG('cvs_cmd'), self.cvs_tag, self.package)
+        cmd = '%s co -r %s %s' % (CONFIG.get('cvs_cmd'), self.cvs_tag, self.package)
         debugprint("%d: Running %s" % (self.uid, cmd))
         s, o = commands.getstatusoutput(cmd)
         if s != 0:
@@ -150,9 +150,9 @@
             self.failed = True
             return
 
-        if CONFIG('redhat_internal_cvs') == 1:
+        if CONFIG.get('redhat_internal_cvs') == 1:
             os.chdir(os.path.join(self.tmpdir, self.package))
-            cmd = '%s co common' % CONFIG('cvs_cmd')
+            cmd = '%s co common' % CONFIG.get('cvs_cmd')
             debugprint("%d: Running %s" % (self.uid, cmd))
             s, o = commands.getstatusoutput(cmd)
             if s != 0:
@@ -175,13 +175,13 @@
             self.failed = True
             return
 
-        if CONFIG('redhat_internal_cvs') == 1:
+        if CONFIG.get('redhat_internal_cvs') == 1:
             make_srpm_dir = os.path.join(packagedir, self.target)
         else:
             make_srpm_dir = packagedir
         os.chdir(make_srpm_dir)
 
-        cmd = '%s srpm' % CONFIG('make_cmd')
+        cmd = '%s srpm' % CONFIG.get('make_cmd')
         debugprint("%d: Running %s in %s" % (self.uid, cmd, make_srpm_dir))
         s, o = commands.getstatusoutput(cmd)
         if s != 0:
@@ -258,17 +258,22 @@
                 self._succeeded()
 
 
+    def job_server_gone(self, job):
+        """ Remove a job from our building queue if its server went away """
+
+        print "%d: ArchWelder for %s went away...  Will start new job for %s" % (self.uid, job.arch, job.arch)
+        del self.sub_jobs[job.arch]
+
     def _start_unspawned_builds(self):
         for arch in self.buildarches:
             if not self.sub_jobs.has_key(arch):
-                job = self.awm.new_job_on_arch(arch, self.srpmpath, self.target, self.stage_dir)
+                job = self.awm.new_job_on_arch(self, arch, self.srpmpath, self.target, self.stage_dir)
                 if job:
                     if job.start() == True:
                         self.awm.track_job(job)
                         self.sub_jobs[arch] = job
                         print "%s: Started job %s with builder id %s" % (self.uid, self.package, job.jobid)
                 else:
-                    print "%d: Waiting for free buildhost on %s" % (self.uid, arch)
                     del job
 
     def _monitor(self):
@@ -309,7 +314,7 @@
         for job in self.sub_jobs.values():
             buildroot = 'fedora-%s-%s-core' % (self.target, job.arch)
             stage_arch = os.path.join(self.stage_dir, job.arch)
-            build_log = '%s/mach/%s/%s-%s-%s/rpm.log' % (CONFIG('tmpdir'), buildroot,
+            build_log = '%s/mach/%s/%s-%s-%s/rpm.log' % (CONFIG.get('tmpdir'), buildroot,
                                             self.name, self.ver, self.release)
             if os.path.exists(build_log):
                 bl = open(build_log, 'r')
@@ -337,7 +342,7 @@
         # markup status file
         resultstring = """
 %s: Build of %s on %s failed to complete on one or more archs. Please see logs at:
-%s/%s/%s""" % (self.uid, self.name, self.target, CONFIG('log_url'), self.target, self.name)
+%s/%s/%s""" % (self.uid, self.name, self.target, CONFIG.get('log_url'), self.target, self.name)
         self.email_result(resultstring)
         return False
         
@@ -365,12 +370,12 @@
         if not subject:
             subject = 'Build Result: %s on %s' % (self.name, self.target)
         msg['Subject'] = subject
-        msg['From'] = CONFIG('email_from')
-        email_to = '%s@%s' % (self.username, CONFIG('email_to_domain'))
+        msg['From'] = CONFIG.get('email_from')
+        email_to = '%s@%s' % (self.username, CONFIG.get('email_to_domain'))
         msg['To'] = email_to
         s = smtplib.SMTP()
         s.connect()
-        s.sendmail(CONFIG('email_from'), [email_to], msg.as_string())
+        s.sendmail(CONFIG.get('email_from'), [email_to], msg.as_string())
         s.close()
 
     def _createrepo(self, stage=None):


Index: buildmaster.py
===================================================================
RCS file: /cvs/fedora/extras-buildsys-temp/automation2/server/buildmaster.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- buildmaster.py	11 May 2005 19:48:57 -0000	1.1
+++ buildmaster.py	12 May 2005 16:48:15 -0000	1.2
@@ -17,7 +17,7 @@
 
 
 import time
-from bm_server_config import CONFIG
+import CONFIG
 from buildjob import BuildJob
 from buildjob import PrepError
 import sqlite
@@ -89,7 +89,7 @@
                         item['cvs_tag'], item['target'], self.awm)
                 self.building_jobs.append(job)
 
-            time.sleep(1)
+            time.sleep(5)
             if self.should_stop == True:
                 break
 


--- bm_server_config.py DELETED ---




More information about the fedora-extras-commits mailing list