extras-buildsys-temp/automation2/server CONFIG.py, NONE, 1.1 aw_manager.py, 1.2, 1.3 bm_server.py, 1.1, 1.2 buildjob.py, 1.1, 1.2 buildmaster.py, 1.1, 1.2 bm_server_config.py, 1.1, NONE
Daniel Williams (dcbw)
fedora-extras-commits at redhat.com
Thu May 12 16:48:17 UTC 2005
Author: dcbw
Update of /cvs/fedora/extras-buildsys-temp/automation2/server
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv27643/server
Modified Files:
aw_manager.py bm_server.py buildjob.py buildmaster.py
Added Files:
CONFIG.py
Removed Files:
bm_server_config.py
Log Message:
2005-05-12 Dan Williams <dcbw at redhat.com>
* Add ability to drop ArchWelders and the jobs they are currently building,
and to find them again when kicked to do so. To do this, config file for
the server got changed to CONFIG so we can reload(CONFIG) and get new
builders on the fly
--- NEW FILE CONFIG.py ---
# Configuration file for buildmaster.py
config_opts = {}
config_opts['hostname'] = "localhost"
config_opts['email_to_domain'] = "redhat.com"
config_opts['email_from'] = "buildsys at fedoraproject.org"
config_opts['stages_root'] = "/rpmbuild/extras/stages"
config_opts['pkg_cvs_root'] = ":gserver:cvs.devel.redhat.com:/cvs/dist"
config_opts['pkg_cvs_rsh'] = "/usr/kerberos/bin/krsh"
config_opts['cvs_cmd'] = "/usr/bin/cvs"
config_opts['make_cmd'] = "/usr/bin/make"
config_opts['tmpdir'] = "/tmp"
config_opts['redhat_internal_cvs'] = 1
config_opts['log_url'] = "http://foo.foo.org/logs/"
config_opts['targets'] = { 'FC-3' : ['i386', 'x86_64'],
'devel' : ['i386']
}
config_opts['builders'] = [ 'http://127.0.0.1:8888' ]
def get(key):
if config_opts.has_key(key):
return config_opts[key]
else:
print "Bad request for key '%s'" % (key)
exit (1)
Index: aw_manager.py
===================================================================
RCS file: /cvs/fedora/extras-buildsys-temp/automation2/server/aw_manager.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- aw_manager.py 12 May 2005 00:42:14 -0000 1.2
+++ aw_manager.py 12 May 2005 16:48:15 -0000 1.3
@@ -18,13 +18,22 @@
import time
import string
import xmlrpclib
-from bm_server_config import CONFIG
+import CONFIG
import base64
+import sys
+import socket
+
+def status_is_finished(status):
+ if status == 'failed' or status == 'done' or status == 'killed':
+ return True
+ return False
+
class ArchWelderJob:
""" Tracks a single build instance for a single arch on an ArchWelder """
- def __init__(self, awi, server, srpm, target, mydir, arch):
+ def __init__(self, parent_job, awi, server, srpm, target, mydir, arch):
+ self.parent_job = parent_job
self.awi = awi
self.jobid = None
self.status = None
@@ -47,7 +56,7 @@
return True
def update_status(self):
- if self.status == 'failed' or self.status == 'done':
+ if status_is_finished(self.status):
return
status = ''
@@ -58,6 +67,11 @@
self.status = status
return self.status
+ def server_gone(self):
+ if status_is_finished(self.status):
+ return
+ self.parent_job.job_server_gone(self)
+
def die(self):
self._server.die(self.jobid)
self.status = 'killed'
@@ -73,11 +87,13 @@
class ArchWelderInstance:
""" Tracks an single arch on an ArchWelder """
- def __init__(self, address, arch):
+ def __init__(self, awm, address, arch):
+ self._awm = awm
self._jobs = []
self._arch = arch
self._address = address
self._server = xmlrpclib.Server(self._address)
+ self._unavail_count = 0
self._cur_job = self._get_cur_job()
def arch(self):
@@ -86,8 +102,8 @@
def address(self):
return self._address
- def new_job(self, srpm, target, mydir):
- return ArchWelderJob(self, self._server, srpm, target, mydir, self._arch)
+ def new_job(self, parent_job, srpm, target, mydir):
+ return ArchWelderJob(parent_job, self, self._server, srpm, target, mydir, self._arch)
def track_job(self, job):
self._jobs.append(job)
@@ -96,9 +112,14 @@
cur_job = None
try:
cur_job = self._server.get_cur_job()
- except Exception, e:
- print "XMLAW: got error '%s' from AW during get_cur_job()" % e
+ except socket.error, e:
+ # Check for "Connection refused" or "Connection reset by peer"
+ if e[0] == 111 or e[0] == 104:
+ self._unavail_count = self._unavail_count + 1
+ else:
+ print "XMLAW: got error '%s' from AW during get_cur_job()" % e
else:
+ self._unavail_count = 0
if cur_job == 0:
cur_job = None
return cur_job
@@ -109,6 +130,17 @@
# Update status of all jobs
for j in self._jobs:
j.update_status()
+
+ # If we haven't been able to contact the ArchWelder for a bit, kill build
+ # jobs on this ArchWelder
+ if self._unavail_count > 2:
+ for job in self._jobs:
+ job.server_gone()
+ del job
+ # Return 1 to indicate we should be killed
+ return 1
+
+ return 0
def available(self):
if self._cur_job:
@@ -119,7 +151,7 @@
class ArchWelderManager:
def __init__(self):
# List of addresses of possible builders
- self.possible_aw = CONFIG('builders')
+ self.possible_aw = CONIFG.get('builders')
# Dict: arches => available builders
# Like so: [ 'i386':['10.0.0.1', '10.0.0.2'],
@@ -127,12 +159,25 @@
# ]
self.running_aw = {}
+ print "-----------------------------------------------------"
+ print " Looking for ArchWelders..."
+ self.update_archwelder_instances()
+ print "-----------------------------------------------------\n"
+
+ def update_archwelder_instances(self):
# Figure out which archwelders are alive, and what they support
# We create a separate archwelder instance for each arch on each builder,
# even though both instances talk to the same XMLRPC server on the builder
- print "-----------------------------------------------------"
- print " Looking for ArchWelders..."
for address in self.possible_aw:
+ # If the address is already in our running_aw list, skip it
+ skip = False
+ for awi_list in self.running_aw.values():
+ for awi in awi_list:
+ if address == awi.address():
+ skip = True
+ if skip == True:
+ continue
+
arches = None
server = xmlrpclib.Server(address)
try:
@@ -141,32 +186,35 @@
pass
if arches:
arches.append('noarch')
- print " Found ArchWelder '%s' supporting arches %s." % (address, string.join(arches))
+ print " New AW: '%s' [%s]" % (address, string.join(arches))
for a in arches:
if not self.running_aw.has_key(a):
self.running_aw[a] = []
- awi = ArchWelderInstance(address, a)
+ awi = ArchWelderInstance(self, address, a)
self.running_aw[a].append(awi)
else:
self.possible_aw.remove(address)
del server
- print "-----------------------------------------------------\n"
def process(self):
+ """ Allow each ArchWelderInstance to update its status and do some processing """
for awi_list in self.running_aw.values():
for awi in awi_list:
- awi.process()
+ if awi.process() == 1:
+ # Remove the ArchWelderInstance from our lists
+ print "Removing ArchWelder '%s'/%s because it timed out." % (awi.address(), awi.arch())
+ awi_list.remove(awi)
def track_job(self, job):
if job:
- awi = job.awi.track_job(job)
+ job.awi.track_job(job)
- def new_job_on_arch(self, arch, srpm, target, mydir):
+ def new_job_on_arch(self, parent_job, arch, srpm, target, mydir):
""" Create a job on a free builder for this arch """
if self.running_aw.has_key(arch):
for aw in self.running_aw[arch]:
if aw.available():
- return aw.new_job(srpm, target, mydir)
+ return aw.new_job(parent_job, srpm, target, mydir)
return None
Index: bm_server.py
===================================================================
RCS file: /cvs/fedora/extras-buildsys-temp/automation2/server/bm_server.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- bm_server.py 11 May 2005 19:48:57 -0000 1.1
+++ bm_server.py 12 May 2005 16:48:15 -0000 1.2
@@ -17,7 +17,7 @@
import time
-from bm_server_config import CONFIG
+import CONFIG
import socket
import SimpleXMLRPCServer
import xmlrpclib
@@ -38,12 +38,12 @@
if not subject:
subject = 'Build Result: %s' % cvs_tag
msg['Subject'] = subject
- msg['From'] = CONFIG('email_from')
- email_to = '%s@%s' % (username, CONFIG('email_to_domain'))
+ msg['From'] = CONFIG.get('email_from')
+ email_to = '%s@%s' % (username, CONFIG.get('email_to_domain'))
msg['To'] = email_to
s = smtplib.SMTP()
s.connect()
- s.sendmail(CONFIG('email_from'), [email_to], msg.as_string())
+ s.sendmail(CONFIG.get('email_from'), [email_to], msg.as_string())
s.close()
@@ -63,7 +63,7 @@
print "Request to enqueue '%s' tag '%s' for target '%s' (user '%s')" \
% (package, cvs_tag, target, username)
- targets = CONFIG('targets')
+ targets = CONFIG.get('targets')
if not targets.has_key(target):
print "Error setting up build for %s on %s: target does not exist."\
% (cvs_tag, target)
@@ -102,6 +102,14 @@
job_list.append(tempX)
return job_list
+ def look_for_archwelders(self):
+ reload(CONFIG)
+ print "-----------------------------------------------------"
+ print " Looking for ArchWelders..."
+ self.awm.update_archwelder_instances()
+ print "-----------------------------------------------------\n"
+ return 0
+
class MyXMLRPCServer(SimpleXMLRPCServer.SimpleXMLRPCServer):
""" XMLRPC server subclass that turns on SO_REUSEADDR """
@@ -124,9 +132,9 @@
# Create the BuildMaster XMLRPC server
xmlrpc_bm = XMLRPCBuildMaster(awm)
- bm_server = MyXMLRPCServer((CONFIG('hostname'), 8887))
+ bm_server = MyXMLRPCServer((CONFIG.get('hostname'), 8887))
bm_server.register_instance(xmlrpc_bm)
- print "BuildMaster accepting requests on %s:8887.\n" % CONFIG('hostname')
+ print "BuildMaster accepting requests on %s:8887.\n" % CONFIG.get('hostname')
try:
bm_server.serve_forever()
except Exception:
Index: buildjob.py
===================================================================
RCS file: /cvs/fedora/extras-buildsys-temp/automation2/server/buildjob.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- buildjob.py 11 May 2005 19:48:57 -0000 1.1
+++ buildjob.py 12 May 2005 16:48:15 -0000 1.2
@@ -31,13 +31,13 @@
import string
import SimpleXMLRPCServer
import xmlrpclib
-from bm_server_config import CONFIG
+import CONFIG
import socket
from aw_manager import ArchWelderManager
from aw_manager import ArchWelderJob
-os.environ['CVSROOT'] = CONFIG('pkg_cvs_root')
-os.environ['CVS_RSH'] = CONFIG('pkg_cvs_rsh')
+os.environ['CVSROOT'] = CONFIG.get('pkg_cvs_root')
+os.environ['CVS_RSH'] = CONFIG.get('pkg_cvs_rsh')
DEBUG = True
def debugprint(stuff=''):
@@ -75,7 +75,7 @@
self.username = username
self.starttime = time.time()
self.endtime = None
- self.stages_root = CONFIG('stages_root')
+ self.stages_root = CONFIG.get('stages_root')
self.package = package
self.cvs_tag = cvs_tag
self.target = target
@@ -91,7 +91,7 @@
def arch_handling(self, hdr):
archs = []
- targets = CONFIG('targets')
+ targets = CONFIG.get('targets')
buildable_arches = targets[self.target]
ba = hdr['buildarchs']
@@ -137,9 +137,9 @@
def _checkout(self):
self.curstage = 'checkout'
- self.tmpdir = tempfile.mkdtemp(prefix=self.cvs_tag, dir=CONFIG('tmpdir'))
+ self.tmpdir = tempfile.mkdtemp(prefix=self.cvs_tag, dir=CONFIG.get('tmpdir'))
os.chdir(self.tmpdir)
- cmd = '%s co -r %s %s' % (CONFIG('cvs_cmd'), self.cvs_tag, self.package)
+ cmd = '%s co -r %s %s' % (CONFIG.get('cvs_cmd'), self.cvs_tag, self.package)
debugprint("%d: Running %s" % (self.uid, cmd))
s, o = commands.getstatusoutput(cmd)
if s != 0:
@@ -150,9 +150,9 @@
self.failed = True
return
- if CONFIG('redhat_internal_cvs') == 1:
+ if CONFIG.get('redhat_internal_cvs') == 1:
os.chdir(os.path.join(self.tmpdir, self.package))
- cmd = '%s co common' % CONFIG('cvs_cmd')
+ cmd = '%s co common' % CONFIG.get('cvs_cmd')
debugprint("%d: Running %s" % (self.uid, cmd))
s, o = commands.getstatusoutput(cmd)
if s != 0:
@@ -175,13 +175,13 @@
self.failed = True
return
- if CONFIG('redhat_internal_cvs') == 1:
+ if CONFIG.get('redhat_internal_cvs') == 1:
make_srpm_dir = os.path.join(packagedir, self.target)
else:
make_srpm_dir = packagedir
os.chdir(make_srpm_dir)
- cmd = '%s srpm' % CONFIG('make_cmd')
+ cmd = '%s srpm' % CONFIG.get('make_cmd')
debugprint("%d: Running %s in %s" % (self.uid, cmd, make_srpm_dir))
s, o = commands.getstatusoutput(cmd)
if s != 0:
@@ -258,17 +258,22 @@
self._succeeded()
+ def job_server_gone(self, job):
+ """ Remove a job from our building queue if its server went away """
+
+ print "%d: ArchWelder for %s went away... Will start new job for %s" % (self.uid, job.arch, job.arch)
+ del self.sub_jobs[job.arch]
+
def _start_unspawned_builds(self):
for arch in self.buildarches:
if not self.sub_jobs.has_key(arch):
- job = self.awm.new_job_on_arch(arch, self.srpmpath, self.target, self.stage_dir)
+ job = self.awm.new_job_on_arch(self, arch, self.srpmpath, self.target, self.stage_dir)
if job:
if job.start() == True:
self.awm.track_job(job)
self.sub_jobs[arch] = job
print "%s: Started job %s with builder id %s" % (self.uid, self.package, job.jobid)
else:
- print "%d: Waiting for free buildhost on %s" % (self.uid, arch)
del job
def _monitor(self):
@@ -309,7 +314,7 @@
for job in self.sub_jobs.values():
buildroot = 'fedora-%s-%s-core' % (self.target, job.arch)
stage_arch = os.path.join(self.stage_dir, job.arch)
- build_log = '%s/mach/%s/%s-%s-%s/rpm.log' % (CONFIG('tmpdir'), buildroot,
+ build_log = '%s/mach/%s/%s-%s-%s/rpm.log' % (CONFIG.get('tmpdir'), buildroot,
self.name, self.ver, self.release)
if os.path.exists(build_log):
bl = open(build_log, 'r')
@@ -337,7 +342,7 @@
# markup status file
resultstring = """
%s: Build of %s on %s failed to complete on one or more archs. Please see logs at:
-%s/%s/%s""" % (self.uid, self.name, self.target, CONFIG('log_url'), self.target, self.name)
+%s/%s/%s""" % (self.uid, self.name, self.target, CONFIG.get('log_url'), self.target, self.name)
self.email_result(resultstring)
return False
@@ -365,12 +370,12 @@
if not subject:
subject = 'Build Result: %s on %s' % (self.name, self.target)
msg['Subject'] = subject
- msg['From'] = CONFIG('email_from')
- email_to = '%s@%s' % (self.username, CONFIG('email_to_domain'))
+ msg['From'] = CONFIG.get('email_from')
+ email_to = '%s@%s' % (self.username, CONFIG.get('email_to_domain'))
msg['To'] = email_to
s = smtplib.SMTP()
s.connect()
- s.sendmail(CONFIG('email_from'), [email_to], msg.as_string())
+ s.sendmail(CONFIG.get('email_from'), [email_to], msg.as_string())
s.close()
def _createrepo(self, stage=None):
Index: buildmaster.py
===================================================================
RCS file: /cvs/fedora/extras-buildsys-temp/automation2/server/buildmaster.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- buildmaster.py 11 May 2005 19:48:57 -0000 1.1
+++ buildmaster.py 12 May 2005 16:48:15 -0000 1.2
@@ -17,7 +17,7 @@
import time
-from bm_server_config import CONFIG
+import CONFIG
from buildjob import BuildJob
from buildjob import PrepError
import sqlite
@@ -89,7 +89,7 @@
item['cvs_tag'], item['target'], self.awm)
self.building_jobs.append(job)
- time.sleep(1)
+ time.sleep(5)
if self.should_stop == True:
break
--- bm_server_config.py DELETED ---
More information about the fedora-extras-commits
mailing list