bouncer_r/python repomd.py, NONE, 1.1 derivedrepocheckers.py, 1.1, 1.2 mirrorchecker.py, 1.1, 1.2 repoview.py, 1.1, 1.2 derivedmirrorcheckers.py, 1.1, 1.2 repochecker.py, 1.2, 1.3 sentry.py, 1.2, 1.3

Mon Aug 1 20:38:03 UTC 2005

Author: dfarning

Update of /cvs/fedora/bouncer_r/python
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv13863/python

Modified Files:
	derivedrepocheckers.py mirrorchecker.py repoview.py 
	derivedmirrorcheckers.py repochecker.py sentry.py 
Added Files:
	repomd.py 
Log Message:
cvs cleanup for fedora initial commit

--- NEW FILE repomd.py ---

import sentryutil
import threading
import urllib2
import os
import gzip
import re
import time
## 
# Try to import cElementTree, and if that fails attempt to fall back to
# ElementTree, a slower, but pure-python implementation.
#
try: 
    from cElementTree import iterparse
except ImportError: 
    from elementtree.ElementTree import iterparse



def _bn(tag):
    """
    This is a very dirty way to go from {xmlns}tag to just tag.
    """
    try: return tag.split('}')[1]
    except: return tag

emailre = re.compile('<.*?@.*?>')
def _webify(text):
    """
    Make it difficult to harvest email addresses.
    """
    if text is None: return None
    mo = emailre.search(text)
    if mo:
        email = mo.group(0)
        remail = email.replace('.', '{*}')
        remail = remail.replace('@', '{%}')
        text = re.sub(email, remail, text)
    return text


class RepoMD:
    """
    The base class.
    """
    def __init__(self, caller,aRepo,webpath):
        self.caller = caller
        self.aRepo = aRepo
        self.rFilepath = aRepo.repo_url
        self.lFilepath = (webpath +'/'+ aRepo.product_path +'/'+ aRepo.version_path+'/'+aRepo.arch_path)  
        self.p_timestamp = aRepo.repo_primary_timestamp
                
        self.repodata = {}
        self.arches = []
        self.packages = {}

    def checkRepo(self):
      self._parseRepoMD()
      return self._checkNecessity()

    def updateRepo(self):
      print '+++++++++++++++++++++'
      self._parseRepoMD()
              
      try:
        #data = self.caller.getURI(self.rFilepath+ '/'+self.repodata['primary']['location'],self.lFilepath, self.lFilepath+'/primary.xml.gz')
        pass
      except urllib2.HTTPError, e:
        if 400 <= e.code <= 499:
          return (2, RepoIntegrityChecker.repoStatusAssociations[2] % e.__str__())
        raise e
      #FIXME add errorhandling to report promary.xml.gz failure 
            
      self._parsePrimary()
      
      dataForNewFileRecord = []
      dataForNewArchRecord = []
      
      i=0
      for pkgid in self.packages.keys():
         #  print self.packages[pkgid].pkgid
        
        dataForNewFileRecord.append((self.packages[pkgid].pkgid,
            self.packages[pkgid].n,
            self.packages[pkgid].summary,
            self.packages[pkgid].vendor,
            self.packages[pkgid].v,
            self.packages[pkgid].license,
            self.packages[pkgid].r,
            self.packages[pkgid].url,
            self.aRepo.repo_id
            ))
        
        #dataForNewFileRecord.append((self.packages[pkgid].pkgid,
        #    self.packages[pkgid].n,
        #    self.packages[pkgid].summary,
        #    self.packages[pkgid].vendor,
        #    self.packages[pkgid].v,
        #    self.packages[pkgid].license,
        #    self.packages[pkgid].r,
        #    self.packages[pkgid].url,
        #    self.packages[pkgid].description,
        #    self.aRepo.repo_id
        #    ))
        
        #print  self.packages[pkgid].pkgid
        for arch in self.packages[pkgid].arches.values():
        #    print '   ' + self.packages[pkgid].pkgid + ' ' + arch.arch + ' ' + arch.getFileName() + ' ' + str(arch.time) + ' ' + arch.packager + ' ' + str(arch.size)
                 
            dataForNewArchRecord.append((self.packages[pkgid].pkgid,
                arch.arch,
                arch.getFileName(),
                str(arch.time),
                arch.packager,
                str(arch.size),
                self.aRepo.repo_id,
            ))
             
      self.caller.database.executeSql(RepoMD.DeleteFilesSQL % self.aRepo.repo_id)
      self.caller.database.executeManySql(RepoMD.InsertFilesSQL, dataForNewFileRecord)
      
      self.caller.database.executeSql(RepoMD.DeleteArchesSQL % self.aRepo.repo_id)
      self.caller.database.executeManySql(RepoMD.InsertArchesSQL, dataForNewArchRecord)
      self.caller.database.executeSql(RepoMD.UpdateTimestampSQL %  (str(self.repodata['primary']['timestamp']), self.aRepo.repo_id))
   
      self.caller.database.commit()
    
    DeleteArchesSQL = """
      DELETE FROM
          file_arches
       WHERE
          repo_id = %d
      """
    
    InsertArchesSQL = """
      INSERT INTO
          file_arches (`pkg_id`,
                 `arch_arch`,
                 `arch_filename`,
                 `arch_time`,
                 `arch_packager`,
                 `arch_size`,
                 `repo_id`)
      VALUES
          (%s,%s,%s,%s,%s,%s,%s)
      """
    
    
    
    DeleteFilesSQL = """
      DELETE FROM
          files
       WHERE
          repo_id = %d
      """
      
    InsertFilesSQL = """
      INSERT INTO
          files (`pkg_id`,
                 `pkg_name`,
                 `pkg_summary`,
                 `pkg_vendor`,
                 `pkg_version`,
                 `pkg_license`,
                 `pkg_release`,
                 `pkg_url`,
                 `repo_id`)
      VALUES
          (%s,%s,%s,%s,%s,%s,%s,%s, %s)
      """
   
    #InsertFilesSQL = """
    #  INSERT INTO
    #      files (`pkg_id`,
    #             `pkg_name`,
    #             `pkg_summary`,
    #             `pkg_vendor`,
    #             `pkg_version`,
    #             `pkg_license`,
    #             `pkg_release`,
    #             `pkg_url`,
    #             `pkg_description`,
    #             `repo_id`)
    #  VALUES
    #      (%s,%s,%s,%s,%s,%s,%s,%s, %s,%s)
    #  """
      
     
  #----------------------------------------------------------------------------------------------
  # I n s e r t N e w R e p o s S Q L 
  #----------------------------------------------------------------------------------------------

    UpdateTimestampSQL= """
    UPDATE
      repos
    SET
      repo_primary_timestamp = %s,
      repo_active = '1'
    WHERE
      repo_id = %d"""
        
       
        
        

    def _parseRepoMD(self):
        """
        Parser method for repomd.xml
        """
        type = 'unknown'
        self.caller.logger.log(sentryutil.INFO7, "%s: Reading repository data...", threading.currentThread().getName())
        
        for event, elem in iterparse(self.lFilepath+'/repomd.xml', events=('start',)):
            tag = _bn(elem.tag)
            if tag == 'data':
                type = elem.get('type', 'unknown')
                self.repodata[type] = {}
            elif tag == 'location':
                self.repodata[type]['location'] = elem.get('href', '#')
            elif tag == 'checksum':
                self.repodata[type]['checksum'] = elem.text
            elif tag == 'timestamp':
                self.repodata[type]['timestamp'] = elem.text
            elem.clear()
 #       _say('done\n')
 
    def _checkNecessity(self):
        """
        This will look at the checksum for primary.xml and compare it to the
        one recorded during the last run in repoview/checksum. If they match,
        the program exits, unless overridden with -f.
        """
        print self.p_timestamp
        print self.repodata['primary']['timestamp']
        if self.p_timestamp != self.repodata['primary']['timestamp']:
            return 0
        self.caller.logger.log(sentryutil.INFO7, "%s: Repository has not changed.", threading.currentThread().getName())
        return 1
        
    def _getFileFh(self, loc):
        """
        Transparently handle gzipped xml files.
        """
        if loc[-3:] == '.gz': fh = gzip.open(loc, 'r')
        else: fh = open(loc, 'r')
        return fh       
        
    def _getevr(self, elem):
        """
        Utility method to get e-v-r out of the <version> element.
        """
        e = elem.get('epoch', '0')
        v = elem.get('ver', '0')
        r = elem.get('rel', '0')
        return {'epoch': e, 'ver': v, 'rel': r}
    
    def _mkpkgid(self, n, e, v, r):
        """
        Make the n-e-v-r package id out of n, e, v, r.
        """
        return '%s-%s-%s-%s' % (n, e, v, r)
    
    def _doPackage(self, pkgdata):
        """
        Helper method for cleanliness. Accepts pkgdata and sees if we need
        to create a new package or add arches to existing ones, or ignore it
        outright.
        """
        if not pkgdata:
            return 0
        
        if pkgdata['arch'] not in self.arches:
            print  pkgdata['arch']
            self.arches.append(pkgdata['arch'])
        ## We make a package here from pkgdata ##
        (n, e, v, r) = (pkgdata['name'], pkgdata['epoch'], 
                        pkgdata['ver'], pkgdata['rel'])
        pkgid = self._mkpkgid(n, e, v, r)

        if self.packages.has_key(pkgid):
            package = self.packages[pkgid]
        else:
            package = Package(n, e, v, r)
            package.pkgid = pkgid
            self.packages[pkgid] = package
        package.doPackage(pkgdata)
        return 1
        
    def _parsePrimary(self):
        """
        Utility method for parsing primary.xml.
        """
        
        self.caller.logger.log(sentryutil.INFO7, "%s: parsing primary...", threading.currentThread().getName())
        fh = self._getFileFh(self.lFilepath+'/primary.xml.gz')
        pct = 0
        ignored = 0
        pkgdata = {}
        simpletags = (
            'name', 
            'arch', 
            'summary', 
            'description', 
            'url',
            'packager',
            'checksum',
            'license',
            'group',
            'vendor')
        for event, elem in iterparse(fh):
            tag = _bn(elem.tag)
            if tag == 'package':
                if not self._doPackage(pkgdata): ignored += 1
                pct += 1
                self.caller.logger.log(sentryutil.INFO7, "%s: parsing primary: %s packages.", threading.currentThread().getName(),pct)
                pkgdata = {}
            elif tag in simpletags:
                pkgdata[tag] = _webify(elem.text)
            elif tag == 'version':
                pkgdata.update(self._getevr(elem))
            elif tag == 'time':
                pkgdata['time'] = elem.get('build', '0')
            elif tag == 'size':
                pkgdata['size'] = elem.get('package', '0')
            elif tag == 'location':
                pkgdata['location'] = elem.get('href', '#')
            elem.clear()
        self.pkgcount = pct 

        fh.close()

class Archer:
    """
    This class handles all possible architectures for a package, since
    the listing is done by n-e-v-r.html, and a single release can have more
    than one architecture available, e.g. "src". This is effectively where
    all packages end up being: there are no further sublevels.
    """
    def __init__(self, pkgdata):
        self.arch = pkgdata['arch']
        self.time = int(pkgdata['time'])
        self.size = int(pkgdata['size'])
        self.loc = pkgdata['location']
        self.packager = pkgdata['packager']

    def getFileName(self):
        """
        Get the basename of the RPM file in question.
        """
        return os.path.basename(self.loc)

    def getTime(self, format='%c'):
        """
        Return the build time of this package in locale format, unless
        passed as format='strformat'.
        """
        return time.strftime(format, time.localtime(self.time))

    def getSize(self):
        """
        You can access the byte size of the package by looking at arch.size,
        but this will return the size in sane units (KiB or MiB).
        """
        kbsize = self.size/1024
        if kbsize/1024 < 1:
            return '%d KiB' % kbsize
        else:
            return '%0.2f MiB' % (float(kbsize)/1024)


        
class Package:
    """
    A bit of a misnomer -- this is "package" in the sense of repoview, not in 
    the sense of an .rpm file, since it will include multiple architectures.
    """
    def __init__(self, n, e, v, r):
        self.nevr = (n, e, v, r)
        self.n = n
        self.e = e
        self.v = v
        self.r = r
        self.group = None
        self.rpmgroup = None
        self.arches = {}
        self.incomplete = 1
        self.changelogs = []
        
    def doPackage(self, pkgdata):
        """
        Accept a dict with key-value pairs and populate ourselves with it.
        """
        if self.incomplete: self._getPrimary(pkgdata)
        pkgid = pkgdata['checksum']
        if self.arches.has_key(pkgid): return
        arch = Archer(pkgdata)
        self.arches[pkgid] = arch

    def addChangelogs(self, changelogs):
        """
        Accept changelogs from other-parser and assign them, unless we
        already have some (sometimes happens with multiple architectures).
        """
        if self.changelogs: return 0
        self.changelogs = changelogs
        return 1
    
    def _getPrimary(self, pkgdata):
        """
        A helper method to grab values from pkgdata dict.
        """
        self.summary = pkgdata['summary']
        self.description = pkgdata['description']
        self.url = pkgdata['url']
        self.license = pkgdata['license']
        self.vendor = pkgdata['vendor']
        self.rpmgroup = pkgdata['group']
        self.incomplete = 0

    def getChangeLogs(self):
        """
        Get the changelogs in the [c-formatted date, author, entry] style.
        """
        self.changelogs.sort()
        self.changelogs.reverse()
        retlist = []
        for changelog in self.changelogs:
            date, author, entry = changelog
            date = time.strftime('%c', time.localtime(date))
            retlist.append ([date, author, entry])
        return retlist
        

Index: derivedrepocheckers.py
===================================================================
RCS file: /cvs/fedora/bouncer_r/python/derivedrepocheckers.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- derivedrepocheckers.py	6 Jul 2005 20:44:08 -0000	1.1
+++ derivedrepocheckers.py	1 Aug 2005 20:38:01 -0000	1.2
@@ -39,6 +39,7 @@
 import cse.MySQLDatabase
 import cse.TabularData
 
+from repomd import *
 
 #==========================================================================
 # D a t a b a s e S o u r c e d M i r r o r I n t e g r i t y C h e c k e r
@@ -107,6 +108,44 @@
     return self.database.executeSql(DatabaseSourcedRepoIntegrityChecker.ReposSQL)
     
     
+  #------------------------------------------
+  # r e p o  A c t i o n G o o d T o B a d
+  #------------------------------------------
+  def repoActionGoodToBad (self, repoTuple):
+    """The action to take if a mirror is found to be bad.
+    """
+    self.actionThreadDatabaseConnection.executeSql(DatabaseSourcedRepoIntegrityChecker.repoActionGoodToBadMarkRepoSQL % repoTuple[0].repo_id)
+    self.actionThreadDatabaseConnection.commit()
+    super(DatabaseSourcedRepoIntegrityChecker, self).repoActionGoodToBad(repoTuple)
+    
+    
+  #------------------------------------------
+  # r e p o  A c t i o n G o o d T o B a d
+  #------------------------------------------
+  def repoActionBadToGood (self, repoTuple):
+    """The action to take if a mirror is found to be Good.
+    """
+    self.actionThreadDatabaseConnection.executeSql(DatabaseSourcedRepoIntegrityChecker.repoActionBadtToGoodMarkRepoSQL % repoTuple[0].repo_id)
+    self.actionThreadDatabaseConnection.commit()
+    super(DatabaseSourcedRepoIntegrityChecker, self).repoActionBadToGood(repoTuple)
+    
+    
+    
+  #------------------------------------------
+  # r e p o  A c t i o n U p d a t e
+  #------------------------------------------
+  def repoActionUpdate (self, repoTuple):
+    """The action to take if a mirror is found to be bad.
+    """
+    #self.actionThreadDatabaseConnection.executeSql(DatabaseSourcedRepoIntegrityChecker.repoActionGoodToBadMarkRepoSQL % repoTuple[0].repo_id)
+    #self.actionThreadDatabaseConnection.commit()
+
+    repomd = RepoMD(self, repoTuple[0], repoTuple[6])
+    repomd.updateRepo()
+    super(DatabaseSourcedRepoIntegrityChecker, self).repoActionUpdate(repoTuple)
+  
+    
+    
     
   #--------------------
   # R e p o s S Q L
@@ -119,6 +158,27 @@
       repos r"""
       
       
+  #--------------------------------------------------------------------
+  # r e p o A c t i o n G o o d T o B a d M a r k M i r r o r S Q L
+  #--------------------------------------------------------------------
+  repoActionGoodToBadMarkRepoSQL = """
+    update repos 
+    set 
+      repo_active = '0' 
+    where 
+      repo_id = %d"""
+      
+  #--------------------------------------------------------------------
+  # r e p o A c t i o n B a d T o G o o d M a r k M i r r o r S Q L
+  #--------------------------------------------------------------------
+  repoActionGoodToBadMarkRepoSQL = """
+    update repos 
+    set 
+      repo_active = '1' 
+    where 
+      repo_id = %d"""
+    
+      
 #================================================================================
 # N e w M i r r o r s A n d F i l e s M i r r o r I n t e g r i t y C h e c k e r
 #================================================================================
@@ -170,12 +230,13 @@
     dataForNewReposTableRecords = [ ]
     dataForUrlUpdate = []
     for x in listOfNewRepos:
-      dataForUrlUpdate = (self.doTemplateSubstitution(x.template_text, x.__dict__), x.repo_id )
-      
+      url = self.doTemplateSubstitution(x.template_text, x.__dict__)
+      dataForUrlUpdate = (url, x.repo_id )
       self.database.executeSql(NewReposIntegrityChecker.UpdateNewReposSQL % dataForUrlUpdate)
       
     self.database.commit()
     
+    listOfNewRepos = self.database.executeSql(NewReposIntegrityChecker.NewReposSQL)
     return cse.TabularData.TabularData((listOfNewRepos.schema, listOfNewRepos))
   
   #----------------------------------------------------------------------------------------------
@@ -189,23 +250,17 @@
       v.*,
       a.*
     FROM
-      repos r
-    JOIN
-      templates t
-    ON
-      r.template_id = t.template_id
-    JOIN
-      products p
-    ON
-      r.product_id = p.product_id
-    JOIN
-      versions v
-    ON
-      r.version_id = v.version_id
-    JOIN
-      archs a
-    ON
-      r.arch_id = a.arch_id  
+      repos r, templates t, products p, versions v, archs a
+    WHERE
+        r.version_id = v.version_id
+    AND
+        r.arch_id = a.arch_id
+    AND
+        r.product_id = p.product_id
+    AND
+        r.template_id = t.template_id
+    AND
+      r.repo_active = '1'    
      
      """
 
@@ -221,13 +276,6 @@
     WHERE
       repo_id = %d"""
 
-#    update downloadables
-#    set 
-#      downloadable_active = '1' 
-#    where 
-#      mirror_id = %d 
-#      and file_id = %d"""
-
 
   #----------------------------------------------------------------------------------------------
   # M i r r o r s A n d F i l e s N o t R e p r e s e n t e d I n D o w n l o a d a b l e s S Q L 
@@ -249,6 +297,3 @@
     where
       d.file_id is null
       and m.mirror_admin_disable = '0'"""  
-    
-
-    
\ No newline at end of file


Index: mirrorchecker.py
===================================================================
RCS file: /cvs/fedora/bouncer_r/python/mirrorchecker.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- mirrorchecker.py	6 Jul 2005 20:44:08 -0000	1.1
+++ mirrorchecker.py	1 Aug 2005 20:38:01 -0000	1.2
@@ -152,44 +152,47 @@
     self.downloadTaskManager = threadlib.TaskManager(self.numberOfThreads)
     self.actionTaskManager = threadlib.TaskManager(1) # never change this value - an unsynchronized list called 'humanReadableActionList' used by this thread
     MirrorIntegrityChecker.logger.log(sentryutil.INFO9, self.workingEnvironment.__str__())
-    try:
-      try:
-        MirrorIntegrityChecker.logger.log(sentryutil.DEBUG2, "about to give out special initialization task to action thread")
-        self.actionTaskManager.newTask(self.specialInitializationTask, None) # give a derived class a chance to execute intialization in the thread assigned to process actions
-        if self.checklevel != 0:
-          for aMirror in self.getListOfMirrors():
-            MirrorIntegrityChecker.logger.log(sentryutil.DEBUG1, "handing out check tasks to task threads")
-            self.downloadTaskManager.newTask (self.checkMirrorTask, (aMirror, self.filterListOfFiles(self.getListOfFiles(aMirror.mirror_id))))
-      finally:
-        MirrorIntegrityChecker.logger.log(sentryutil.DEBUG2, "waiting for task threads to quit")
-        self.downloadTaskManager.waitForCompletion()
-        MirrorIntegrityChecker.logger.log(sentryutil.DEBUG2, "waiting for the action thread to quit")
-        self.actionTaskManager.newTask(self.specialFinalizationTask, None) # give a derived class a chance to execute finalization in the thread assigned to process actions
-        self.actionTaskManager.waitForCompletion()
-        MirrorIntegrityChecker.logger.log(sentryutil.DEBUG3, "all threads stopped")
-    except Exception, e:
-      # something bad has happened - or perhaps a keyboard interrupt
-      MirrorIntegrityChecker.logger.exception(e)
-      MirrorIntegrityChecker.logger.log(sentryutil.DEBUG9, "  stopping all threads - please wait...")
-      try:
-        # eat up all pending tasks in the task queue
-        while (True):
-          self.downloadTaskManager.taskQueue.get(False)
-      except: pass
-      self.downloadTaskManager.waitForCompletion()
-      try:
-        # eat up all the actions in the action queue
-        while (True):
-          self.actionTaskManager.taskQueue.get(False)
-      except: pass
-      self.actionTaskManager.waitForCompletion()
-      MirrorIntegrityChecker.logger.log(sentryutil.DEBUG9, "  done")
-      sys.exit()
-
-    MirrorIntegrityChecker.logger.log(sentryutil.DEBUG4, "begin postCheck")
-    self.postCheck()
-    MirrorIntegrityChecker.logger.log(sentryutil.DEBUG4, "begin notifyOfActionsTaken")
-    self.notifyOfActionsTaken()
+#    try:
+#      try:
+
+    MirrorIntegrityChecker.logger.log(sentryutil.DEBUG2, "about to give out special initialization task to action thread")
+    self.actionTaskManager.newTask(self.specialInitializationTask, None) # give a derived class a chance to execute intialization in the thread assigned to process actions
+#        if self.checklevel != 0:
+    for aMirror in self.getListOfMirrors():
+        print aMirror
+#        MirrorIntegrityChecker.logger.log(sentryutil.DEBUG1, "handing out check tasks to task threads")
+#        self.downloadTaskManager.newTask (self.checkMirrorTask, (aMirror, self.filterListOfFiles(self.getListOfFiles(aMirror.mirror_id))))
+
+#      finally:
+#        MirrorIntegrityChecker.logger.log(sentryutil.DEBUG2, "waiting for task threads to quit")
+#        self.downloadTaskManager.waitForCompletion()
+#        MirrorIntegrityChecker.logger.log(sentryutil.DEBUG2, "waiting for the action thread to quit")
+#        self.actionTaskManager.newTask(self.specialFinalizationTask, None) # give a derived class a chance to execute finalization in the thread assigned to process actions
+#        self.actionTaskManager.waitForCompletion()
+#        MirrorIntegrityChecker.logger.log(sentryutil.DEBUG3, "all threads stopped")
+#    except Exception, e:
+#      # something bad has happened - or perhaps a keyboard interrupt
+#      MirrorIntegrityChecker.logger.exception(e)
+#      MirrorIntegrityChecker.logger.log(sentryutil.DEBUG9, "  stopping all threads - please wait...")
+#      try:
+#        # eat up all pending tasks in the task queue
+#        while (True):
+#          self.downloadTaskManager.taskQueue.get(False)
+#      except: pass
+#      self.downloadTaskManager.waitForCompletion()
+#      try:
+#        # eat up all the actions in the action queue
+#        while (True):
+#          self.actionTaskManager.taskQueue.get(False)
+#      except: pass
+#      self.actionTaskManager.waitForCompletion()
+#      MirrorIntegrityChecker.logger.log(sentryutil.DEBUG9, "  done")
+#      sys.exit()
+
+#    MirrorIntegrityChecker.logger.log(sentryutil.DEBUG4, "begin postCheck")
+#    self.postCheck()
+#    MirrorIntegrityChecker.logger.log(sentryutil.DEBUG4, "begin notifyOfActionsTaken")
+#    self.notifyOfActionsTaken()
  
   #--------------------
   # f i l e S t a t u s


Index: repoview.py
===================================================================
RCS file: /cvs/fedora/bouncer_r/python/repoview.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- repoview.py	9 Jul 2005 21:16:44 -0000	1.1
+++ repoview.py	1 Aug 2005 20:38:01 -0000	1.2
@@ -21,40 +21,679 @@
 # Author: Konstantin Ryabitsev <icon at duke.edu>
 #
 
-import repochecker
-import sentryutil
-import threading
+import fnmatch
+import getopt
+import gzip
+import os
+import re
+import shutil
+import sys
+import time
 
+## 
+# Try to import cElementTree, and if that fails attempt to fall back to
+# ElementTree, a slower, but pure-python implementation.
+#
+try: 
+    from cElementTree import iterparse
+except ImportError: 
+    from elementtree.ElementTree import iterparse
+
+from kid import Template
+##
+# Kid generates a FutureWarning on python 2.3
+#
+if sys.version_info[0] == 2 and sys.version_info[1] == 3:
+    import warnings
+    warnings.filterwarnings('ignore', category=FutureWarning)
+
+##
+# Some hardcoded constants
+#
+pkgkid = 'package.kid'
+pkgfile = '%s.html'
+grkid = 'group.kid'
+grfile = '%s.group.html'
+idxkid = 'index.kid'
+idxfile = 'index.html'
+
+VERSION = '0.3'
+DEFAULT_TEMPLATEDIR = './templates'
+
+def _bn(tag):
+    """
+    This is a very dirty way to go from {xmlns}tag to just tag.
+    """
+    try: return tag.split('}')[1]
+    except: return tag
+
+emailre = re.compile('<.*?@.*?>')
+def _webify(text):
+    """
+    Make it difficult to harvest email addresses.
+    """
+    if text is None: return None
+    mo = emailre.search(text)
+    if mo:
+        email = mo.group(0)
+        remail = email.replace('.', '{*}')
+        remail = remail.replace('@', '{%}')
+        text = re.sub(email, remail, text)
+    return text
+
+quiet = 0
+def _say(text, flush=0):
+    """
+    Unless in quiet mode, output the text passed.
+    """
+    if quiet: return
+    sys.stdout.write(text)
+    if flush: sys.stdout.flush()
+
+def _mkid(text):
+    """
+    Remove slashes.
+    """
+    text = text.replace('/', '.')
+    text = text.replace(' ', '')
+    return text
+
+## Jonathan :)
+
+
+
+class GroupFactory(dict):
+    """
+    A small utility class that extends the dict functionality to aide in
+    kid template generation. It contains the groups, keyed by group id.
+    """
+    def __init__(self):
+        dict.__init__(self)
+        self.sortedlist = None
+        
+    def getSortedList(self, showinvisible=0):
+        """
+        Get the sorted list of groups. The sorting is done by group id 
+        in ascending order (locale-specific).
+        """
+        if self.sortedlist is None:
+            grids = []
+            for grid in self.keys():
+                if self[grid].uservisible or showinvisible:
+                    grids.append(grid)
+            grids.sort()
+            self.sortedlist = []
+            for grid in grids:
+                self.sortedlist.append(self[grid])
+        return self.sortedlist
+
+class Group:
+    """
+    Contains a list of packages.
+    """
+    def __init__(self, grid=None, name=None):
+        self.packages = []
+        self.grid = grid
+        self.name = name
+        self.sorted = 0
+        self.uservisible = 1
+
+    def getSortedList(self, trim=0, nevr=None):
+        """
+        A utility method for calling from kid templates. This will
+        return a sorted list of packages, optionally trimmed since
+        on large repositories this list can be very large, and makes
+        the display useless. If you pass the trim parameter, you must
+        pass the nevr parameter, too, so the it knows around which package
+        to trim.
+        """
+        if not self.sorted:
+            nevrlist = {}
+            for package in self.packages:
+                nevrlist[package.nevr] = package
+            keys = nevrlist.keys()
+            keys.sort()
+            retlist = []
+            for nevr in keys:
+                retlist.append(nevrlist[nevr])
+            self.packages = retlist
+            self.sorted = 1
+        if not trim or len(self.packages) <= trim: return self.packages
+        retlist = []
+        i = 0
+        for pkg in self.packages:
+            if pkg.nevr == nevr: break
+            i += 1
+        half = trim/2
+        if i - half < 0:
+            return self.packages[0:trim]
+        if i + half > len(self.packages):
+            return self.packages[-trim:]
+        return self.packages[i-half:i+half]        
+       
 class RepoView:
     """
     The base class.
     """
-    def __init__(self, RepoIntegrityChecker, aRepoTuple, webpath):
-        print (webpath)
- 
-        self.aRepoTuple = aRepoTuple
-        self.webpath=webpath 
- 
-        #self.ignore = ignore
-        #self.xarch = xarch
-        #self.arches = []
-        #self.force = force
-        #self.outdir = os.path.join(self.repodir, 'repodata', 'repoview')
-        #self.packages = {}
-        #self.groups = GroupFactory()
-        #self.letters = GroupFactory()
-        #self.maxlatest = maxlatest
-        #self.repodata = {}
-
-        #if not os.access(repomd, os.R_OK):
-        #    sys.stderr.write('Not found: %s\n' % repomd)
-        #    sys.stderr.write('Does not look like a repository. Exiting.\n')
-        #    sys.exit(1)
-        #self._parseRepoMD(repomd)
+    def __init__(self, repodir, ignore=[], xarch=[], force=0, maxlatest=30):
+        self.repodir = repodir
+        self.ignore = ignore
+        self.xarch = xarch
+        self.arches = []
+        self.force = force
+        self.outdir = os.path.join(self.repodir, 'repodata', 'repoview')
+        self.packages = {}
+        self.groups = GroupFactory()
+        self.letters = GroupFactory()
+        self.maxlatest = maxlatest
+        self.repodata = {}
+        repomd = os.path.join(self.repodir, 'repodata', 'repomd.xml')
+        if not os.access(repomd, os.R_OK):
+            sys.stderr.write('Not found: %s\n' % repomd)
+            sys.stderr.write('Does not look like a repository. Exiting.\n')
+            sys.exit(1)
+        self._parseRepoMD(repomd)
         ## Do packages (primary.xml and other.xml)
-        #self._parsePrimary()
-        #self._parseOther()
+        self._parsePrimary()
+        self._parseOther()
         ## Do groups and resolve them
-        #if self.repodata.has_key('group'):
-        #    self._parseGroups()
-        
\ No newline at end of file
+        if self.repodata.has_key('group'):
+            self._parseGroups()
+
+    def _parseRepoMD(self, loc):
+        """
+        Parser method for repomd.xml
+        """
+        type = 'unknown'
+        _say('Reading repository data...', 1)
+        for event, elem in iterparse(loc, events=('start',)):
+            tag = _bn(elem.tag)
+            if tag == 'data':
+                type = elem.get('type', 'unknown')
+                self.repodata[type] = {}
+            elif tag == 'location':
+                self.repodata[type]['location'] = elem.get('href', '#')
+            elif tag == 'checksum':
+                self.repodata[type]['checksum'] = elem.text
+            elem.clear()
+        _say('done\n')
+        self._checkNecessity()
+            
+    def _checkNecessity(self):
+        """
+        This will look at the checksum for primary.xml and compare it to the
+        one recorded during the last run in repoview/checksum. If they match,
+        the program exits, unless overridden with -f.
+        """
+        if self.force: return 1
+        ## Check and get the existing repoview checksum file
+        try:
+            chkfile = os.path.join(self.outdir, 'checksum')
+            fh = open(chkfile, 'r')
+            checksum = fh.read()
+            fh.close()
+        except IOError: return 1
+        checksum = checksum.strip()
+        if checksum != self.repodata['primary']['checksum']: return 1
+        _say("RepoView: Repository has not changed. Force the run with -f.\n")
+        sys.exit(0)
+
+    def _getFileFh(self, loc):
+        """
+        Transparently handle gzipped xml files.
+        """
+        loc = os.path.join(self.repodir, loc)
+        if loc[-3:] == '.gz': fh = gzip.open(loc, 'r')
+        else: fh = open(loc, 'r')
+        return fh
+
+    def _parseGroups(self):
+        """
+        Utility method for parsing comps.xml.
+        """
+        _say('parsing comps...', 1)
+        fh = self._getFileFh(self.repodata['group']['location'])
+        namemap = self._getNameMap()
+        pct = 0
+        group = Group()
+        for event, elem in iterparse(fh):
+            tag = elem.tag
+            if tag == 'group':
+                pct += 1
+                _say('\rparsing comps: %s groups' % pct)
+                self.groups[group.grid] = group
+                group = Group()
+            elif tag == 'id':
+                group.grid = _mkid(elem.text)
+            elif tag == 'name' and not elem.attrib:
+                group.name = _webify(elem.text)
+            elif tag == 'description' and not elem.attrib:
+                group.description = _webify(elem.text)
+            elif tag == 'uservisible':
+                if elem.text.lower() == 'true': group.uservisible = 1
+                else: group.uservisible = 0
+            elif tag == 'packagereq':
+                pkgname = elem.text
+                if pkgname in namemap.keys():
+                    pkglist = namemap[pkgname]
+                    group.packages += pkglist
+                    for pkg in pkglist:
+                        pkg.group = group
+            elem.clear()
+        _say('...done\n', 1)
+        fh.close()
+
+    def _getNameMap(self):
+        """
+        Needed for group parsing: since only package names are listed in
+        <comps>, this maps names to package objects. The result is in the
+        format: {'pkgname': [pkgobject1, pkgobject2, ...]}.
+        """
+        namemap = {}
+        for pkgid in self.packages.keys():
+            package = self.packages[pkgid]
+            name = package.n
+            if name not in namemap.keys(): 
+                namemap[name] = [package]
+            else:
+                namemap[name].append(package)
+        return namemap
+
+    def _parsePrimary(self):
+        """
+        Utility method for parsing primary.xml.
+        """
+        _say('parsing primary...', 1)
+        fh = self._getFileFh(self.repodata['primary']['location'])
+        pct = 0
+        ignored = 0
+        pkgdata = {}
+        simpletags = (
+            'name', 
+            'arch', 
+            'summary', 
+            'description', 
+            'url',
+            'packager',
+            'checksum',
+            'license',
+            'group',
+            'vendor')
+        for event, elem in iterparse(fh):
+            tag = _bn(elem.tag)
+            if tag == 'package':
+                if not self._doPackage(pkgdata): ignored += 1
+                pct += 1
+                _say('\rparsing primary: %s packages, %s ignored' % 
+                        (pct, ignored))
+                pkgdata = {}
+            elif tag in simpletags:
+                pkgdata[tag] = _webify(elem.text)
+            elif tag == 'version':
+                pkgdata.update(self._getevr(elem))
+            elif tag == 'time':
+                pkgdata['time'] = elem.get('build', '0')
+            elif tag == 'size':
+                pkgdata['size'] = elem.get('package', '0')
+            elif tag == 'location':
+                pkgdata['location'] = elem.get('href', '#')
+            elem.clear()
+        self.pkgcount = pct - ignored
+        self.pkgignored = ignored
+        _say('...done\n', 1)
+        fh.close()
+
+    def _doPackage(self, pkgdata):
+        """
+        Helper method for cleanliness. Accepts pkgdata and sees if we need
+        to create a new package or add arches to existing ones, or ignore it
+        outright.
+        """
+        if not pkgdata: return 0
+        if pkgdata['arch'] in self.xarch: return 0
+        if pkgdata['arch'] not in self.arches: 
+            self.arches.append(pkgdata['arch'])
+        ## We make a package here from pkgdata ##
+        (n, e, v, r) = (pkgdata['name'], pkgdata['epoch'], 
+                        pkgdata['ver'], pkgdata['rel'])
+        pkgid = self._mkpkgid(n, e, v, r)
+        if self._checkIgnore(pkgid): return 0
+        if self.packages.has_key(pkgid):
+            package = self.packages[pkgid]
+        else:
+            package = Package(n, e, v, r)
+            package.pkgid = pkgid
+            self.packages[pkgid] = package
+        package.doPackage(pkgdata)
+        return 1
+        
+    def _checkIgnore(self, pkgid):
+        """
+        Check if package id (n-e-v-r) matches the ignore globs passed
+        via -i.
+        """
+        for glob in self.ignore:
+            if fnmatch.fnmatchcase(pkgid, glob): return 1
+        return 0
+
+    def _parseOther(self, limit=3):
+        """
+        Utility method to parse other.xml.
+        """
+        _say('parsing other...', 1)
+        fh = self._getFileFh(self.repodata['other']['location'])
+        pct = 0
+        ignored = 0
+        changelogs = []
+        evr = None
+        cct = 0
+        for event, elem in iterparse(fh):
+            tag = _bn(elem.tag)
+            if tag == 'package':
+                n = elem.get('name', '__unknown__')
+                pkgid = self._mkpkgid(n, evr['epoch'], evr['ver'], evr['rel'])
+                if not self._doOther(pkgid, changelogs): ignored += 1
+                pct += 1
+                _say('\rparsing other: %s packages, %s ignored' % 
+                    (pct, ignored))
+                evr = None
+                changelogs = []
+                n = None
+                cct = 0
+            elif tag == 'version':
+                evr = self._getevr(elem)
+            elif tag == 'changelog':
+                if cct >= limit: continue
+                author = _webify(elem.get('author', 'incognito'))
+                date = int(elem.get('date', '0'))
+                changelog = _webify(elem.text)
+                changelogs.append([date, author, changelog])
+                cct += 1
+            elem.clear()
+        _say('...done\n', 1)
+        fh.close()
+
+    def _doOther(self, pkgid, changelogs):
+        """
+        Helper method for cleanliness.
+        """
+        if pkgid and changelogs and self.packages.has_key(pkgid):
+            package = self.packages[pkgid]
+            return package.addChangelogs(changelogs)
+        return 0
+        
+    def _mkpkgid(self, n, e, v, r):
+        """
+        Make the n-e-v-r package id out of n, e, v, r.
+        """
+        return '%s-%s-%s-%s' % (n, e, v, r)
+
+    def _getevr(self, elem):
+        """
+        Utility method to get e-v-r out of the <version> element.
+        """
+        e = elem.get('epoch', '0')
+        v = elem.get('ver', '0')
+        r = elem.get('rel', '0')
+        return {'epoch': e, 'ver': v, 'rel': r}
+
+    def _makeExtraGroups(self):
+        """
+        This is a utility method to create the extra groups. Currently,
+        the extra groups are:
+        __nogroup__: packages not in any other groups
+        __latest__: the last NN packages updated
+        letter groups: All packages get grouped by their uppercased first 
+                       letter
+        Any empty groups are then removed.
+        """
+        nogroup = Group(grid='__nogroup__', 
+                        name='Packages not in Groups')
+        latest = {}
+        i = 0
+        makerpmgroups = 0
+        if not len(self.groups): 
+            makerpmgroups = 1
+        for pkgid in self.packages.keys():
+            package = self.packages[pkgid]
+            if package.group is None:
+                if makerpmgroups:
+                    grid = _mkid(package.rpmgroup)
+                    if grid not in self.groups.keys():
+                        group = Group(grid=grid, name=package.rpmgroup)
+                        self.groups[grid] = group
+                    else:
+                        group = self.groups[grid]
+                    package.group = group
+                    group.packages.append(package)
+                else:
+                    package.group = nogroup
+                    nogroup.packages.append(package)
+            letter = pkgid[0].upper()
+            if letter not in self.letters.keys():
+                group = Group(grid=letter, name='Letter: %s' % letter)
+                self.letters[letter] = group
+            self.letters[letter].packages.append(package)
+            # btime is number of seconds since epoch, so reverse logic!
+            btime = 0
+            for arch in package.arches.values():
+                if arch.time > btime: btime = arch.time
+            if len(latest.keys()) < self.maxlatest:
+                latest[btime] = package
+            else:
+                times = latest.keys()
+                times.sort()
+                times.reverse()
+                oldest = times[-1]
+                if btime > oldest:
+                    del latest[oldest]
+                    latest[btime] = package
+            i += 1
+            _say('\rcreating extra groups: %s entries' % i)
+        if nogroup.packages:
+            self.groups[nogroup.grid] = nogroup
+        times = latest.keys()
+        times.sort()
+        times.reverse()
+        lgroup = Group(grid='__latest__', 
+                       name='Last %s Packages Updated' % len(times))
+        for time in times:
+            lgroup.packages.append(latest[time])
+        lgroup.sorted = 1
+        self.groups[lgroup.grid] = lgroup
+        _say('...done\n', 1)
+        ## Prune empty groups
+        for grid in self.groups.keys():
+            if not self.groups[grid].packages: del self.groups[grid]
+
+    def _mkOutDir(self, templatedir):
+        """
+        Remove the existing repoview directory if it exists, and create a
+        new one, copying in the layout dir from templates (if found).
+        """
+        if os.path.isdir(self.outdir):
+            _say('deleting old repoview...', 1)
+            shutil.rmtree(self.outdir)
+            _say('done\n', 1)
+        os.mkdir(self.outdir)
+        layoutsrc = os.path.join(templatedir, 'layout')
+        layoutdst = os.path.join(self.outdir, 'layout')
+        if os.path.isdir(layoutsrc):
+            _say('copying layout...', 1)
+            shutil.copytree(layoutsrc, layoutdst)
+            _say('done\n', 1)
+
+    def mkLinkUrl(self, object, isindex=0):
+        """
+        This is a utility method passed to kid templates. The templates use 
+        it to get the link to a package, group, or layout object without
+        having to figure things out on their own.
+        """
+        link = '#'
+        prefix = ''
+        if isindex:
+            if self.toplevel: prefix = os.path.join('repodata', 'repoview')
+            else: prefix = 'repoview'
+        if object.__class__ is str:
+            if not isindex and object == idxfile:
+                if self.toplevel: link = os.path.join('..', '..', object)
+                else: link = os.path.join('..', object)
+            else:
+                link = os.path.join(prefix, object)
+        elif object.__class__ is Package:
+            link = os.path.join(prefix, pkgfile % object.pkgid)
+        elif object.__class__ is Group:
+            link = os.path.join(prefix, grfile % object.grid)
+        elif object.__class__ is Archer:
+            if isindex and self.toplevel:
+                link = os.path.join('..', object.loc)
+            else:
+                link = os.path.join('..', '..', object.loc)
+        return link
+
+    def applyTemplates(self, templatedir, toplevel=0, title='RepoView'):
+        """
+        Just what it says. :)
+        """
+        if not self.packages:
+            _say('No packages available.')
+            sys.exit(0)
+        gentime = time.strftime('%c')
+        self.toplevel = toplevel
+        self._makeExtraGroups()
+        self._mkOutDir(templatedir)
+        stats = {
+            'title': title,
+            'pkgcount': self.pkgcount,
+            'pkgignored': self.pkgignored,
+            'ignorelist': self.ignore,
+            'archlist': self.arches,
+            'ignorearchlist': self.xarch,
+            'VERSION': VERSION,
+            'gentime': gentime
+            }
+        ## Do groups
+        grtmpl = os.path.join(templatedir, grkid)
+        kobj = Template(file=grtmpl, mkLinkUrl=self.mkLinkUrl,
+                letters=self.letters, groups=self.groups, stats=stats)
+        i = 0
+        for grid in self.groups.keys():            
+            kobj.group = self.groups[grid]
+            out = os.path.join(self.outdir, grfile % grid)
+            fh = open(out, 'w')
+            kobj.write(fh)
+            fh.close()
+            i += 1
+            _say('writing groups: %s written\r' % i)
+        _say('\n', 1)
+        ## Do letter groups
+        i = 0
+        for grid in self.letters.keys():
+            kobj.group = self.letters[grid]
+            out = os.path.join(self.outdir, grfile % grid)
+            fh = open(out, 'w')
+            kobj.write(fh)
+            fh.close()
+            i += 1
+            _say('writing letter groups: %s written\r' % i)
+        _say('\n', 1)
+        ## Do packages
+        i = 0
+        pkgtmpl = os.path.join(templatedir, pkgkid)
+        kobj = Template(file=pkgtmpl, mkLinkUrl=self.mkLinkUrl,
+                letters=self.letters, stats=stats)
+        for pkgid in self.packages.keys():
+            kobj.package = self.packages[pkgid]
+            out = os.path.join(self.outdir, pkgfile % pkgid)
+            fh = open(out, 'w')
+            kobj.write(fh)
+            fh.close()
+            i += 1
+            _say('writing packages: %s written\r' % i)
+        _say('\n', 1)
+        ## Do index
+        _say('generating index...', 1)
+        idxtmpl = os.path.join(templatedir, idxkid)
+        self.arches.sort()
+        kobj = Template(file=idxtmpl, mkLinkUrl=self.mkLinkUrl,
+            letters=self.letters, groups=self.groups, stats=stats)
+        if self.toplevel: out = os.path.join(self.repodir, idxfile)
+        else: out = os.path.join(self.repodir, 'repodata', idxfile)
+        fh = open(out, 'w')
+        kobj.write(out)
+        fh.close()
+        _say('done\n')
+        _say('writing checksum...', 1)
+        chkfile = os.path.join(self.outdir, 'checksum')
+        fh = open(chkfile, 'w')
+        fh.write(self.repodata['primary']['checksum'])
+        fh.close()
+        _say('done\n')
+
+def usage(ecode=0):
+    print """
+    repoview [-i name] [-x arch] [-k dir] [-l title] [-t] [-f] [-q] [repodir]
+    This will make your repository browseable
+    -i name
+        Optionally ignore this package -- can be a shell-style glob.
+        This is useful for excluding debuginfo packages:
+        -i *debuginfo* -i *doc*
+        The globbing will be done against name-epoch-version-release, 
+        e.g. foo-0-1.0-1
+    -x arch
+        Optionally exclude this arch. E.g.:
+        -x src -x ia64
+    -k templatedir
+        Use an alternative directory with kid templates instead of
+        the default: %s
+        The template directory must contain three required template 
+        files: index.kid, group.kid, package.kid and the
+        "layout" dir which will be copied into the repoview directory.
+    -l title
+        Describe the repository in a few words. By default, "RepoView" is used.
+        E.g.:
+        -l "Extras for Fedora Core 3 x86"
+    -t
+        Place the index.html into the top level of the repodir, instead of 
+        just in repodata/index.html.
+    -f
+        Regenerate the pages even if the repomd checksum hasn't changed.
+    -q
+        Do not output anything except fatal erros.
+    repodir
+        Where to look for the 'repodata' directory.
+    """ % DEFAULT_TEMPLATEDIR
+    sys.exit(ecode)
+
+def main(args):
+    global quiet
+    if not args: usage()
+    ignore = []
+    xarch = []
+    toplevel = 0
+    templatedir = DEFAULT_TEMPLATEDIR
+    title = 'RepoView'
+    force = 0
+    try:
+        gopts, cmds = getopt.getopt(args, 'i:x:k:l:tfqh', ['help'])
+        if not cmds: usage(1)
+        for o, a in gopts:
+            if o == '-i': ignore.append(a)
+            elif o == '-x': xarch.append(a)
+            elif o == '-k': templatedir = a
+            elif o == '-l': title = a
+            elif o == '-t': toplevel = 1
+            elif o == '-f': force = 1
+            elif o == '-q': quiet = 1
+            else: usage()
+        repodir = cmds[0]
+    except getopt.error, e:
+        print "Error: %s" % e
+        usage(1)
+    if templatedir is None:
+        templatedir = os.path.join(repodir, 'templates')
+    rv = RepoView(repodir, ignore=ignore, xarch=xarch, force=force)
+    rv.applyTemplates(templatedir, toplevel=toplevel, title=title)
+
+if __name__ == '__main__':
+    main(sys.argv[1:])


Index: derivedmirrorcheckers.py
===================================================================
RCS file: /cvs/fedora/bouncer_r/python/derivedmirrorcheckers.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- derivedmirrorcheckers.py	10 Jul 2005 09:15:28 -0000	1.1
+++ derivedmirrorcheckers.py	1 Aug 2005 20:38:01 -0000	1.2
@@ -173,7 +173,7 @@
     from 
       mirrors m
     where
-      m.mirror_admin_disable = '0'"""
+      m.mirror_active = '1'"""
   
   #----------------------------
   # T a r g e t F i l e s S Q L
@@ -485,22 +485,23 @@
   def getListOfMirrors (self):
     """Execute appropriate SQL to get the list of mirrors
     """
-    listOfMirrorsAndFilesNotRepresentedInDownloadables = self.database.executeSql(NewMirrorsAndFilesMirrorIntegrityChecker.MirrorsAndFilesNotRepresentedInDownloadablesSQL)
+    listOfMirrorsAndReposNotRepresentedInDownloadables = self.database.executeSql(NewMirrorsAndFilesMirrorIntegrityChecker.MirrorsAndReposNotRepresentedInDownloadablesSQL)
+    print listOfMirrorsAndReposNotRepresentedInDownloadables
     self.filesForEachMirror = { }
     uniqueMirrorList = []
     dataForNewDownloadablesTableRecords = [ ]
-    for x in listOfMirrorsAndFilesNotRepresentedInDownloadables:
+    for x in listOfMirrorsAndReposNotRepresentedInDownloadables:
       if self.filesForEachMirror.has_key(x.mirror_id):
         self.filesForEachMirror[x.mirror_id].append(x.file_id)
-      else:
-        self.filesForEachMirror[x.mirror_id] = [ x.file_id ]
-        uniqueMirrorList.append(x)
-      dataForNewDownloadablesTableRecords.append( (x.mirror_id, x.file_id, '0', 0, self.doTemplateSubstitution(x.template_text, x.__dict__)))
+    #  else:
+    #    self.filesForEachMirror[x.mirror_id] = [ x.file_id ]
+    #    uniqueMirrorList.append(x)
+    #  dataForNewDownloadablesTableRecords.append( (x.mirror_id, x.file_id, '0', 0, self.doTemplateSubstitution(x.template_text, x.__dict__)))
     
-    self.database.executeManySql(NewMirrorsAndFilesMirrorIntegrityChecker.InsertMirrorsAndFilesNotRepresentedInDownloadablesSQL, dataForNewDownloadablesTableRecords)
-    self.database.commit()
+    #self.database.executeManySql(NewMirrorsAndFilesMirrorIntegrityChecker.InsertMirrorsAndFilesNotRepresentedInDownloadablesSQL, dataForNewDownloadablesTableRecords)
+    #self.database.commit()
     
-    return cse.TabularData.TabularData((listOfMirrorsAndFilesNotRepresentedInDownloadables.schema, uniqueMirrorList))
+    #return cse.TabularData.TabularData((listOfMirrorsAndFilesNotRepresentedInDownloadables.schema, uniqueMirrorList))
   
   #----------------------------
   # g e t L i s t O f F i l e s
@@ -519,23 +520,52 @@
   #----------------------------------------------------------------------------------------------
   # M i r r o r s A n d F i l e s N o t R e p r e s e n t e d I n D o w n l o a d a b l e s S Q L 
   #----------------------------------------------------------------------------------------------
-  MirrorsAndFilesNotRepresentedInDownloadablesSQL = """
-    select
-      m.*,
-      f.file_id,
+  
+  
+ 
+  MirrorsAndReposNotRepresentedInDownloadablesSQL = """
+    SELECT
+      r.*,
       t.template_text,
-      pv.*,
-      o.*,
-      l.*
-    from 
-      ((((((mirrors m join files f) left outer join downloadables d on m.mirror_id = d.mirror_id and f.file_id = d.file_id)
-        join templates t on f.template_id = t.template_id)
-          join product_versions pv on f.pv_id = pv.pv_id)
-            join oss o on f.os_id = o.os_id)
-              join langs l on f.lang_id = l.lang_id)
-    where
-      d.file_id is null
-      and m.mirror_admin_disable = '0'"""  
+      p.*,
+      v.*,
+      a.*
+    FROM
+      repos r, templates t, products p, versions v, archs a
+    WHERE
+        r.version_id = v.version_id
+    AND
+        r.arch_id = a.arch_id
+    AND
+        r.product_id = p.product_id
+    AND
+        r.template_id = t.template_id
+    AND
+      r.repo_active = '1'    
+     
+     """
+  
+  
+  
+  
+  
+#  MirrorsAndFilesNotRepresentedInDownloadablesSQL = """
+#    select
+#      m.*,
+#      f.file_id,
+#      t.template_text,
+#      pv.*,
+#      o.*,
+#      l.*
+#    from 
+#      ((((((mirrors m join files f) left outer join downloadables d on m.mirror_id = d.mirror_id and f.file_id = d.file_id)
+#        join templates t on f.template_id = t.template_id)
+#          join product_versions pv on f.pv_id = pv.pv_id)
+#            join oss o on f.os_id = o.os_id)
+#              join langs l on f.lang_id = l.lang_id)
+#    where
+#      d.file_id is null
+#      and m.mirror_admin_disable = '0'"""  
 
       
   #----------------------------------------------------------------------------------------------------------


Index: repochecker.py
===================================================================
RCS file: /cvs/fedora/bouncer_r/python/repochecker.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- repochecker.py	9 Jul 2005 21:16:44 -0000	1.2
+++ repochecker.py	1 Aug 2005 20:38:01 -0000	1.3
@@ -36,67 +36,18 @@
 import socket
 import sys
 
-import re
-import gzip
+from repomd import *
+
 import StringIO
 import time
 import os
 import shutil
-## 
-# Try to import cElementTree, and if that fails attempt to fall back to
-# ElementTree, a slower, but pure-python implementation.
-#
-try: 
-    from cElementTree import iterparse
-except ImportError: 
-    from elementtree.ElementTree import iterparse
-
-from kid import Template
-
-##
-# Some hardcoded constants
-#
-pkgkid = 'package.kid'
-pkgfile = '%s.html'
-grkid = 'group.kid'
-grfile = '%s.group.html'
-idxkid = 'index.kid'
-idxfile = 'index.html'
 
-VERSION = '0.3'
-DEFAULT_TEMPLATEDIR = './templates'
 
-def _bn(tag):
-    """
-    This is a very dirty way to go from {xmlns}tag to just tag.
-    """
-    try: return tag.split('}')[1]
-    except: return tag
-    
-emailre = re.compile('<.*?@.*?>')
-def _webify(text):
-    """
-    Make it difficult to harvest email addresses.
-    """
-    if text is None: return None
-    mo = emailre.search(text)
-    if mo:
-        email = mo.group(0)
-        remail = email.replace('.', '{*}')
-        remail = remail.replace('@', '{%}')
-        text = re.sub(email, remail, text)
-    return text
-    
-def _mkid(text):
-    """
-    Remove slashes.
-    """
-    text = text.replace('/', '.')
-    text = text.replace(' ', '')
-    return text    
+
     
 #============================================
-# M i r r o r I n t e g r i t y C h e c k e r
+# R e p o I n t e g r i t y C h e c k e r
 #============================================
 class RepoIntegrityChecker (object):
   """This base class sets up the framework for testing a set of repos
@@ -172,45 +123,17 @@
     pass    
     
     
-  #--------------------------------------------------
-  # s p e c i a l I n i t i a l i z a t i o n T a s k
-  #--------------------------------------------------
-  def specialInitializationTask (self, unusedDummy):
-    """Meant to be overridden in a derived class, this function is given to the
-    action thread to accomplish any preprocess initialization such as logging into
-    a database"""
-    pass
-    
-  #----------------------------------------------
-  # s p e c i a l F i n a l i z a t i o n T a s k
-  #----------------------------------------------
-  def specialFinalizationTask (self, unusedDummy):
-    """Meant to be overridden in a derived class, this function is given to the
-    action thread to accomplish any postprocess initialization such as logging off a
-    a database"""
-    pass
-    
-  #----------------------------
-  # g e t L i s t O f F i l e s
-  #----------------------------
-  def getListOfFiles (self, auxillaryData=None):
-    """Get a list of files in tabular data format
-    
-    This routine is meant to be overridden by a subclass.
-    """
-    return TabularData()
-    
-    
+
   #----------
   # c h e c k
   #----------
   def check (self):
-    """Queue mirrors for testing.
+    """Queue repos for testing.
       
-    After fetching the list of mirrors, this routine iterates through them, queueing
-    for testing.  Along with the mirror information, it also bundles this list of
-    files expected to be found on the mirror into a tuple.  This tuple is subsequently
-    referred to as a mirrorTuple.
+    After fetching the list of repos, this routine iterates through them, queueing
+    for testing.  Along with the repo information, it also bundles this list of
+    files expected to be found on the repo into a tuple.  This tuple is subsequently
+    referred to as a repoTuple.
     """
     
     self.preCheck()
@@ -257,17 +180,334 @@
     RepoIntegrityChecker.logger.log(sentryutil.DEBUG4, "begin notifyOfActionsTaken")
     self.notifyOfActionsTaken()
     
+  #------------------------------
+  # c h e c k R e p o T a s k
+  #------------------------------
+  def checkRepoTask (self, aRepo):
+    """Check a repo by downloading and checking its metadata.
+    
+    This function is to be called by a worker thread.
+    
+    The results of this check are queued in the actions queue.
+    
+    Parameters:
+      aRepoTuple - a tuple in the form (repoInformation, listOfFileInformation)
+    
+    Notes:
+      1) why does 'repoIsValid' have string values rather than boolean? The MySQL database for 
+         setup with a column type of enum.  In MySQL, enum is just a restriction on values 
+         for a varchar column type.  Since 'aRepo.repo_active' will be fetched as a string
+         and 'repoIsValid' is used in comparison with 'aRepo.repo_active', it was thought
+         they should be of the same type.  For clarity in the future, perhaps these should be
+         reimplemented as boolean or at least 0 and 1 integers.
+    """
+    
+
+
+    repoStatus = '2' #see note 1 above
+    #try:
+    RepoIntegrityChecker.logger.log(sentryutil.INFO7, "%s: downloading: %s", threading.currentThread().getName(), "asdf")
+    repoStatus, errorMessage = self.repoStatus (aRepo)
+
+    repoStatus = str(repoStatus)  
+    print repoStatus
+    print errorMessage  
+    try:
+        pass      
+    except Exception, e:
+      # something bad happened while downloading a file from a repo.  This means that the status of the
+      # repo itself might have to be changed.
+      message = e.__str__()
+      if message == '':
+        message = e.__class__
+      if aRepo.repo_active == '1':
+        # the repo is bad, but used to be good - take the bad repo action
+        self.actionTaskManager.newTask(self.repoActionGoodToBad, (aRepo, None, None, message, mx.DateTime.now(), self.checklevel))
+        RepoIntegrityChecker.logger.log(sentryutil.INFO6, "%s: repoActionGoodToBad: %s", threading.currentThread().getName(), e)
+      else:
+        # the repo is bad, but was marked bad before, there is no change in status
+        self.actionTaskManager.newTask(self.repoActionBadNoChange, (aRepo, None, None, message, mx.DateTime.now(), self.checklevel))
+        RepoIntegrityChecker.logger.log(sentryutil.INFO6, "%s: repoActionBadNoChange: %s ", threading.currentThread().getName(), aRepo.repo_name)
+
+    else:
+      if aRepo.repo_active == '1' and repoStatus == '0':
+        # the repo is good and remains good
+        self.actionTaskManager.newTask(self.repoActionGoodNoChange, (aRepo, None, None, 'the repo is OK', mx.DateTime.now(), self.checklevel))
+        RepoIntegrityChecker.logger.log(sentryutil.INFO6, "%s: repoActionGoodNoChange: %s", threading.currentThread().getName(), aRepo.repo_name)
+
+      elif (aRepo.repo_active == '0' or aRepo.repo_active == '1') and repoStatus == '1':
+        # the repo is good and remains good
+        tempTuple = (aRepo, None, None, 'updating repo ', mx.DateTime.now(), self.checklevel, self.webpath)
+        self.actionTaskManager.newTask(self.repoActionUpdate, tempTuple)
+        RepoIntegrityChecker.logger.log(sentryutil.INFO6, "%s: repoActionUpdate: %s", threading.currentThread().getName(), aRepo.repo_name)
+
+      elif aRepo.repo_active == '1' and repoStatus == '2':
+        # the repo is good, but used to be marked bad, take the bad to good transition
+        self.actionTaskManager.newTask(self.repoActionGoodToBad, (aRepo, None, None, message, mx.DateTime.now(), self.checklevel))
+        RepoIntegrityChecker.logger.log(sentryutil.INFO6, "%s: repoActionGoodToBad: %s", threading.currentThread().getName(), e)
+
+      elif aRepo.repo_active == '0' and repoStatus == '0':
+        self.actionTaskManager.newTask(self.repoActionBadToGood, (aRepo, None, None, message, mx.DateTime.now(), self.checklevel))
+        RepoIntegrityChecker.logger.log(sentryutil.INFO6, "%s: repoActionBadToGood: %s", threading.currentThread().getName(), e)
+
+      else:
+#        # we never managed to download anything - assume all of this repo's files are marked as bad.
+#        # there are no files to download and the repo was marked as bad, don't change the status.
+        self.actionTaskManager.newTask(self.repoActionBadNoChange, (aRepo, None, None, 'all files are bad - full test required', mx.DateTime.now(), self.checklevel))
+        RepoIntegrityChecker.logger.log(sentryutil.INFO6, "%s: repoActionBadNoChange: %s ", threading.currentThread().getName(), aRepo.repo_name)
+     
+    
+  #--------------------
+  # f i l e S t a t u s
+  #--------------------
+  def repoStatus (self, aRepo):
+    """Returns a value indicating the validity of the file using several tests.
+        
+    In general, this function will be called only by a worker thread.
+    
+    This routine downloads a URL and first checks to see if it returns the 
+    desired MimeType as indicated by the mimeTypeAssociations dictionary.
+    If it passes that test, it downloads the entire file and creates a hash.
+    
+    Parameters:
+      aURL - the complete URL to try to download
+      expectedHashCode - a string representing the hash code of the file.  The length of
+        this string will be used to determine which hash algorithm to use.  See the 
+        class level variable 'hashLengthHashAlgorithmsAssociations' to see the lengths
+        and their associations.
+    
+    This routine returns a tuple: (ReturnCode, HumanReadableErrorMessage)
+    See the class level variable 'fileStatusAssociations' for the messages
+    The ReturnCode is as follows:
+      0 - the repo is okay
+      1 - repo is out of date
+      2 - an HTTP Error occurred
+    """
+    print aRepo
+    
+    #try:
+    rFilepath = aRepo.repo_url + '/'+ aRepo.metadata_path
+    lFilepath = (self.webpath +'/'+ aRepo.product_path +'/'+ aRepo.version_path+'/'+aRepo.arch_path)  
+    timestamp = aRepo.repo_primary_timestamp
+      
+    data = self.getURI(rFilepath+'/repomd.xml',lFilepath, lFilepath+'/repomd.xml')
+
+    #except urllib2.HTTPError, e:
+    #  if 400 <= e.code <= 499:
+    #    return (2, RepoIntegrityChecker.repoStatusAssociations[2] % e.__str__())
+    #  raise e      
+    
+    repomd = RepoMD(self, aRepo, self.webpath)
+    repoIsUp2date =  repomd.checkRepo()
+   
+    if repoIsUp2date != 1:
+      # old primary timestamp
+      return (1, RepoIntegrityChecker.repoStatusAssociations[1])
+    # the repo is OK
+    return (0, RepoIntegrityChecker.repoStatusAssociations[0])
+  
+  #--------------------------------------------
+  # r e p o S t a t u s A s s o c i a t i o n s
+  #--------------------------------------------
+  # This dictionary represents the possible return values of the 'reopStatus' method
+  repoStatusAssociations = { 0: 'the repo is OK',
+                             1: 'the repo is out of date',
+                             2: '%s'
+                           }
+    
+    
+  #--------------------------------------------------
+  # s p e c i a l I n i t i a l i z a t i o n T a s k
+  #--------------------------------------------------
+  def specialInitializationTask (self, unusedDummy):
+    """Meant to be overridden in a derived class, this function is given to the
+    action thread to accomplish any preprocess initialization such as logging into
+    a database"""
+    pass
+
+    
+  #----------------------------------------------
+  # s p e c i a l F i n a l i z a t i o n T a s k
+  #----------------------------------------------
+  def specialFinalizationTask (self, unusedDummy):
+    """Meant to be overridden in a derived class, this function is given to the
+    action thread to accomplish any postprocess initialization such as logging off a
+    a database"""
+    pass
+    
+         
+  #------------------------------------------
+  # r e p o A c t i o n G o o d T o B a d
+  #------------------------------------------
+  def repoActionGoodToBad (self, repoTuple):
+    """ Derived classes implement whatever action is appropriate when a repo changes status from good to bad.
+    This function is run exclusively by the action thread.
+    
+    Any derived classes implementing this function MUST chain their execution to
+    the their parent classes like this:
+      super(MyDerivedClass, self).repoActionGoodToBad()  
+    
+    Parameter:
+      repoFileTuple - (repoObject, fileObject, url, errorMessageOrException, timestamp, testType)
+    """    
+    if self.workingEnvironment.has_key('notifyRepoActionGoodToBad'):
+      actionString = 'repoActionGoodToBad (%s): %s (%s) ' % (repoTuple[4], repoTuple[0].repo_baseurl, repoTuple[3])
+      self.humanReadableActionList.append(actionString)
+    RepoIntegrityChecker.logger.log(sentryutil.INFO4, "%s: repoActionGoodToBad %s", threading.currentThread().getName(), repoTuple[0].repo_name)
+
+      
+  #------------------------------------------
+  # r e p o A c t i o n B a d T o G o o d
+  #------------------------------------------
+  def repoActionBadToGood (self, repoTuple):
+    """ Derived classes implement whatever action is appropriate when a repo changes status from bad to good.
+    This function is run exclusively by the action thread.
+    
+    Any derived classes implementing this function MUST chain their execution to
+    the their parent classes like this:
+      super(MyDerivedClass, self).repoActionBadToGood()  
+    
+    Parameter:
+      repoFileTuple - (repoObject, fileObject, url, errorMessageOrException, timestamp, testType)
+    """    
+    if self.workingEnvironment.has_key('notifyRepoActionBadToGood'):
+      actionString = 'repoActionBadToGood (%s): %s' % (repoTuple[4], repoTuple[0].repo_baseurl)
+      self.humanReadableActionList.append(actionString)
+    RepoIntegrityChecker.logger.log(sentryutil.INFO4, "%s: repoActionGoodToBad %s", threading.currentThread().getName(), repoFileTuple[0].repo_name)
+
+
+  #------------------------------------------------
+  # r e p o A c t i o n G o o d N o C h a n g e
+  #------------------------------------------------
+  def repoActionGoodNoChange (self, repoFileTuple):
+    """ Derived classes implement whatever action is appropriate when a repo changes is good and does not need to change status.
+    This function is run exclusively by the action thread.
+    
+    Any derived classes implementing this function MUST chain their execution to
+    the their parent classes like this:
+      super(MyDerivedClass, self).repoActionGoodNoChange()  
+    
+    Parameter:
+      repoFileTuple - (repoObject, fileObject, url, errorMessageOrException, timestamp, testType)
+    """
+    if self.workingEnvironment.has_key('notifyRepoActionGoodNoChange'):
+      actionString = 'repoActionGoodNoChange (%s): %s' % (repoFileTuple[4], repoFileTuple[0].repo_baseurl)
+      self.humanReadableActionList.append(actionString)
+    RepoIntegrityChecker.logger.log(sentryutil.INFO4, "%s: repoActionGoodNoChange %s", threading.currentThread().getName(), repoFileTuple[0].repo_name)
+
+
+  #----------------------------------------------
+  # r e p o A c t i o n B a d N o C h a n g e
+  #----------------------------------------------
+  def repoActionBadNoChange (self, repoTuple):
+    """ Derived classes implement whatever action is appropriate when a repo changes is bad and does not need to change status.
+    This function is run exclusively by the action thread.
     
-  def _get(self,uri,filename):
-    RepoIntegrityChecker.logger.log(sentryutil.INFO7, "%s: downloading: %s", threading.currentThread().getName(), uri)
-    request = urllib2.Request(uri)
-    request.add_header("Accept-encoding", "gzip")
-    usock = urllib2.urlopen(request)
-    data = usock.read()
-    if usock.headers.get('content-encoding', None) == 'gzip':
-        data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
+    Any derived classes implementing this function MUST chain their execution to
+    the their parent classes like this:
+      super(MyDerivedClass, self).repoActionBadNoChange()  
+    
+    Parameter:
+      repoFileTuple - (repoObject, fileObject, url, errorMessageOrException, timestamp, testType)
+    """    
+    if self.workingEnvironment.has_key('notifyRepoActionBadNoChange'):
+      actionString = 'repoActionBadNoChange (%s): %s (%s) ' % (repoTuple[4], repoTuple[0].repo_baseurl, repoTuple[3])
+      self.humanReadableActionList.append(actionString)
+    RepoIntegrityChecker.logger.log(sentryutil.INFO4, "%s: repoActionBadNoChange %s", threading.currentThread().getName(), repoTuple[0].repo_name)
+
+
+  #----------------------------------------------
+  # r e p o A c t i o n B a d N o C h a n g e
+  #----------------------------------------------
+  def repoActionUpdate (self, repoTuple):
+    """ Derived classes implement whatever action is appropriate when a repo changes is bad and does not need to change status.
+    This function is run exclusively by the action thread.
+    
+    Any derived classes implementing this function MUST chain their execution to
+    the their parent classes like this:
+      super(MyDerivedClass, self).repoActionBadNoChange()  
+    
+    Parameter:
+      repoFileTuple - (repoObject, fileObject, url, errorMessageOrException, timestamp, testType)
+    """
+    if self.workingEnvironment.has_key('notifyRepoActionUpdate'):
+      actionString = 'repoActionBadNoChange (%s): %s (%s) ' % (repoTuple[4], repoTuple[0].repo_baseurl, repoTuple[3])
+      self.humanReadableActionList.append(actionString)
+    RepoIntegrityChecker.logger.log(sentryutil.INFO4, "%s: repoActionUpdate %s", threading.currentThread().getName(), repoTuple[0].repo_name)
+
+
+
+  #--------------------------
+  # g e n e r i c A c t i o n
+  #--------------------------
+  def genericAction (self, parameter):
+    """ If a derived class needs to setup an action for the action thread but that action does not fit the other defined actions
+    this function can serve as the action.
+    
+    This function is run exclusively by the action thread.
+    
+    Any derived classes implementing this function MUST chain their execution to
+    the their parent classes like this:
+      super(MyDerivedClass, self).genericAction()  
+    
+    Parameter:
+      parameter - whatever the derived class needs
+    """    
+    self.humanReadableActionList.append(parameters)
+    RepoIntegrityChecker.logger.log(sentryutil.INFO4, "%s: genericAction %s", parameter)
+
+
+  #----------------------------------------
+  # n o t i f y O f A c t i o n s T a k e n
+  #----------------------------------------
+  def notifyOfActionsTaken (self):
+    for x in enumerate(self.humanReadableActionList):
+      print "%4d %s" % x
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+  def getURI(self,uri,filepath,filename):
+    try:
+      RepoIntegrityChecker.logger.log(sentryutil.INFO7, "%s: downloading: %s", threading.currentThread().getName(), uri)
+      request = urllib2.Request(uri)
+      request.add_header("Accept-encoding", "gzip")
+      usock = urllib2.urlopen(request)
+      data = usock.read()
+    except urllib2.HTTPError, e:
+      raise e
+    #if usock.headers.get('content-encoding', None) == 'gzip':
+    #  data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
     RepoIntegrityChecker.logger.log(sentryutil.INFO7, "%s: done downloading: %s", threading.currentThread().getName(), uri)
     RepoIntegrityChecker.logger.log(sentryutil.INFO7, "%s: saving: %s", threading.currentThread().getName(), filename)
+    
+    if os.path.exists(filepath) and os.path.isdir(filepath):
+      pass
+    else:
+      try:
+        os.makedirs(filepath, mode=0755)
+      except OSError, e:
+        raise Errors.RepoError, "Error making cache directory: %s error was: %s" % ((filepath), e)
+    
+    
     fh = open(filename,'wb')
     fh.write(data)
     fh.close()    
@@ -276,25 +516,25 @@
     
     
   #------------------------------
-  # c h e c k M i r r o r T a s k
+  # c h e c k R e p o T a s k
   #------------------------------
-  def checkRepoTask (self, aRepoTuple):
-    """Check a mirror by downloading all of its files.
+  def checkRepoTask_old (self, aRepoTuple):
+    """Check a repo by downloading all of its files.
     
     This function is to be called by a worker thread.
     
-    Given a mirror, this routine iterates through the associated file list, downloading
+    Given a repo, this routine iterates through the associated file list, downloading
     each file.  After downloading, the file is checked for validity using the 'fileIsValid'
     function.  The results of this check are queued in the actions queue.
     
     Parameters:
-      aMirrorTuple - a tuple in the form (mirrorInformation, listOfFileInformation)
+      aRepoTuple - a tuple in the form (repoInformation, listOfFileInformation)
     
     Notes:
-      1) why does 'mirrorIsValid' have string values rather than boolean? The MySQL database for 
+      1) why does 'repoIsValid' have string values rather than boolean? The MySQL database for 
          setup with a column type of enum.  In MySQL, enum is just a restriction on values 
-         for a varchar column type.  Since 'aMirror.mirror_active' will be fetched as a string
-         and 'mirrorIsValid' is used in comparison with 'aMirror.mirror_active', it was thought
+         for a varchar column type.  Since 'aRepo.repo_active' will be fetched as a string
+         and 'repoIsValid' is used in comparison with 'aRepo.repo_active', it was thought
          they should be of the same type.  For clarity in the future, perhaps these should be
          reimplemented as boolean or at least 0 and 1 integers.
     """
@@ -307,7 +547,7 @@
     letters = GroupFactory()
     maxlatest = 30
         
-    mirrorIsValid = '0' #see note 1 above
+    repoIsValid = '0' #see note 1 above
      
     rFilepath = aRepoTuple.repo_url    
     lFilepath = (self.webpath +'/'+ aRepoTuple.product_path +'/'+ aRepoTuple.version_path+'/'+aRepoTuple.arch_path)    
@@ -326,513 +566,14 @@
     
     self.database.executeSql(RepoIntegrityChecker.UpdateTimestampSQL %  (str(repodata['primary']['timestamp']), aRepoTuple.repo_id))
     self.database.commit()
-    print ('--------------------' + repodata['primary']['timestamp'])
 
-  def _parseRepoMD(self, rFilepath, lFilepath,repodata,timestamp):
-        """
-        Parser method for repomd.xml
-        """
-        #type = 'unknown'
-        rFilename = (rFilepath+'/repodata/repomd.xml')  
-        lFilename = (lFilepath+'/repodata/repomd.xml') 
-        
-        if os.path.exists(lFilepath+'/repodata/') and os.path.isdir(lFilepath+'/repodata/'):
-          pass
-        else:
-          try:
-            os.makedirs(lFilepath+('/repodata/'), mode=0755)
-          except OSError, e:
-            raise Errors.RepoError, \
-                "Error making cache directory: %s error was: %s" % ((lFilepath+'/repodata/'), e)
-        
-        
-        
-        data = self._get(rFilename,lFilename) 
-        
-        RepoIntegrityChecker.logger.log(sentryutil.INFO7, "%s: parsing repo metadata: %s", threading.currentThread().getName(), lFilename)
-        for event, elem in iterparse(lFilename, events=('start',)):
-            tag = _bn(elem.tag)
-            if tag == 'data':
-                type = elem.get('type', 'unknown')
-                repodata[type] = {}
-            elif tag == 'location':
-                repodata[type]['location'] = elem.get('href', '#')
-            elif tag == 'checksum':
-                repodata[type]['checksum'] = elem.text
-            elif tag == 'timestamp':
-                repodata[type]['timestamp'] = elem.text
-            elem.clear()
-                
-        self._checkNecessity(repodata,timestamp)
-        return repodata
-
-  def _checkNecessity(self,repodata,timestamp):
-        """
-        This will look at the checksum for primary.xml and compare it to the
-        one recorded during the last run in repoview/checksum. If they match,
-        the program exits, unless overridden with -f.
-        """
-        if timestamp != repodata['primary']['timestamp']:
-            return 1
-        RepoIntegrityChecker.logger.log(sentryutil.INFO7, "%s: Repository has not changed.", threading.currentThread().getName())
-        return 0
-
-  def _mkpkgid(self, n, e, v, r):
-        """
-        Make the n-e-v-r package id out of n, e, v, r.
-        """
-        return '%s-%s-%s-%s' % (n, e, v, r)
-
-  def _checkIgnore(self, pkgid,ignore):
-        """
-        Check if package id (n-e-v-r) matches the ignore globs passed
-        via -i.
-        """
-        for glob in ignore:
-            if fnmatch.fnmatchcase(pkgid, glob): return 1
-        return 0
-
-  def _getevr(self, elem):
-        """
-        Utility method to get e-v-r out of the <version> element.
-        """
-        e = elem.get('epoch', '0')
-        v = elem.get('ver', '0')
-        r = elem.get('rel', '0')
-        return {'epoch': e, 'ver': v, 'rel': r}
-
-
-  def _doPackage(self, pkgdata,xarch,arches,ignore,packages):
-        """
-        Helper method for cleanliness. Accepts pkgdata and sees if we need
-        to create a new package or add arches to existing ones, or ignore it
-        outright.
-        """
-        if not pkgdata: return 0
-        if pkgdata['arch'] in xarch: return 0
-        if pkgdata['arch'] not in arches: 
-            arches.append(pkgdata['arch'])
-        ## We make a package here from pkgdata ##
-        (n, e, v, r) = (pkgdata['name'], pkgdata['epoch'], 
-                        pkgdata['ver'], pkgdata['rel'])
-        pkgid = self._mkpkgid(n, e, v, r)
-        if self._checkIgnore(pkgid,ignore): return 0
-        if packages.has_key(pkgid):
-            package = packages[pkgid]
-        else:
-            package = Package(n, e, v, r)
-            package.pkgid = pkgid
-            packages[pkgid] = package
-        package.doPackage(pkgdata)
-        return 
-
-  def _parsePrimary(self, rFilepath, lFilepath,repodata,xarch,arches,ignore,packages):
-        """
-        Utility method for parsing primary.xml.
-        """
-        rFilename = (rFilepath+ '/'+ repodata['primary']['location'])  
-        lFilename = (lFilepath+ '/'+ repodata['primary']['location']) 
-        
 
-        
-        data = self._get(rFilename,lFilename) 
 
-        RepoIntegrityChecker.logger.log(sentryutil.INFO7, "%s: parsing primary: %s", threading.currentThread().getName(), lFilename)
-      
-        fh = self._get(rFilename,lFilename)
-        pct = 0
-        ignored = 0
-        pkgdata = {}
-        simpletags = (
-            'name', 
-            'arch', 
-            'summary', 
-            'description', 
-            'url',
-            'packager',
-            'checksum',
-            'license',
-            'group',
-            'vendor')
-        for event, elem in iterparse(lFilename):
-            tag = _bn(elem.tag)
-            if tag == 'package':
-                if not self._doPackage(pkgdata,xarch,arches,ignore,packages): ignored += 1
-                pct += 1
-                RepoIntegrityChecker.logger.log(sentryutil.INFO7, "parsing primary: %s packages, %s ignored", pct, ignored)
-
-                pkgdata = {}
-            elif tag in simpletags:
-                pkgdata[tag] = _webify(elem.text)
-            elif tag == 'version':
-                pkgdata.update(self._getevr(elem))
-            elif tag == 'time':
-                pkgdata['time'] = elem.get('build', '0')
-            elif tag == 'size':
-                pkgdata['size'] = elem.get('package', '0')
-            elif tag == 'location':
-                pkgdata['location'] = elem.get('href', '#')
-            elem.clear()
-        self.pkgcount = pct - ignored
-        self.pkgignored = ignored
-                
-  def _doOther(self, pkgid, changelogs,packages):
-        """
-        Helper method for cleanliness.
-        """
-        if pkgid and changelogs and packages.has_key(pkgid):
-            package = packages[pkgid]
-            return package.addChangelogs(changelogs)
-        return 0
-                
-  def _parseOther(self,rFilepath,lFilepath,repodata,packages, limit=3):
-        """
-        Utility method to parse other.xml.
-        """
-        rFilename = (rFilepath+ '/'+ repodata['other']['location'])  
-        lFilename = (lFilepath+ '/'+ repodata['other']['location']) 
         
-        data = self._get(rFilename,lFilename) 
-
-        RepoIntegrityChecker.logger.log(sentryutil.INFO7, "%s: parsing other: %s", threading.currentThread().getName(), lFilename)
-      
-        pct = 0
-        ignored = 0
-        changelogs = []
-        evr = None
-        cct = 0
-        for event, elem in iterparse(lFilename):
-            tag = _bn(elem.tag)
-            if tag == 'package':
-                n = elem.get('name', '__unknown__')
-                pkgid = self._mkpkgid(n, evr['epoch'], evr['ver'], evr['rel'])
-                if not self._doOther(pkgid, changelogs,packages): ignored += 1
-                pct += 1
-                
-                RepoIntegrityChecker.logger.log(sentryutil.INFO7, "parsing other: %s packages, %s ignored", pct, ignored)
-                evr = None
-                changelogs = []
-                n = None
-                cct = 0
-            elif tag == 'version':
-                evr = self._getevr(elem)
-            elif tag == 'changelog':
-                if cct >= limit: continue
-                author = _webify(elem.get('author', 'incognito'))
-                date = int(elem.get('date', '0'))
-                changelog = _webify(elem.text)
-                changelogs.append([date, author, changelog])
-                cct += 1
-            elem.clear()
-        
-
-  def mkLinkUrl(self, object, isindex=0):
-        """
-        This is a utility method passed to kid templates. The templates use 
-        it to get the link to a package, group, or layout object without
-        having to figure things out on their own.
-        """
-        link = '#'
-        prefix = ''
-        if isindex:
-            if self.toplevel: prefix = os.path.join('repodata', 'repoview')
-            else: prefix = 'repoview'
-        if object.__class__ is str:
-            if not isindex and object == idxfile:
-                if self.toplevel: link = os.path.join('..', '..', object)
-                else: link = os.path.join('..', object)
-            else:
-                link = os.path.join(prefix, object)
-        elif object.__class__ is Package:
-            link = os.path.join(prefix, pkgfile % object.pkgid)
-        elif object.__class__ is Group:
-            link = os.path.join(prefix, grfile % object.grid)
-        elif object.__class__ is Archer:
-            if isindex and self.toplevel:
-                link = os.path.join('..', object.loc)
-            else:
-                link = os.path.join('..', '..', object.loc)
-        return link
-
-
-  def applyTemplates(self, lFilepath,templatedir,packages,groups,letters, maxlatest,outdir,arches,repodata,toplevel=0, title='RepoView'):
-        """
-        Just what it says. :)
-        """
-        if not packages:
-            RepoIntegrityChecker.logger.log(sentryutil.INFO7, "No packages available")
-            sys.exit(0)
-        gentime = time.strftime('%c')
-        self.toplevel = toplevel
-        self._makeExtraGroups(groups,packages,letters,maxlatest)
-        self._mkOutDir(templatedir,outdir)
-        stats = {
-            'title': title,
-            'pkgcount': 0,
-            'pkgignored': {},
-            #'ignorelist': self.ignore,
-            'archlist': {},
-            #'ignorearchlist': self.xarch,
-            'VERSION': VERSION,
-            'gentime': gentime
-            }
-        ## Do groups
-        grtmpl = os.path.join(templatedir, grkid)
-        kobj = Template(file=grtmpl, mkLinkUrl=self.mkLinkUrl,
-                letters=letters, groups=groups, stats=stats)
-        i = 0
-        for grid in groups.keys():            
-            kobj.group = groups[grid]
-            out = os.path.join(outdir, grfile % grid)
-            fh = open(out, 'w')
-            kobj.write(fh)
-            fh.close()
-            i += 1
-            RepoIntegrityChecker.logger.log(sentryutil.INFO7, "writing groups: %s written", i)
-            
         
-        ## Do letter groups
-        i = 0
-        for grid in letters.keys():
-            kobj.group = letters[grid]
-            out = os.path.join(outdir, grfile % grid)
-            fh = open(out, 'w')
-            kobj.write(fh)
-            fh.close()
-            i += 1
-            RepoIntegrityChecker.logger.log(sentryutil.INFO7, "writing letter groups: %s written", i)
-        ## Do packages
-        i = 0
-        pkgtmpl = os.path.join(templatedir, pkgkid)
-        kobj = Template(file=pkgtmpl, mkLinkUrl=self.mkLinkUrl,
-                letters=letters, stats=stats)
-        for pkgid in packages.keys():
-            kobj.package = packages[pkgid]
-            out = os.path.join(outdir, pkgfile % pkgid)
-            fh = open(out, 'w')
-            kobj.write(fh)
-            fh.close()
-            i += 1
-            RepoIntegrityChecker.logger.log(sentryutil.INFO7, "writing packages: %s written", i)
-         ## Do index
-        RepoIntegrityChecker.logger.log(sentryutil.INFO7, "generating index...")
         
-        idxtmpl = os.path.join(templatedir, idxkid)
-        arches.sort()
-        kobj = Template(file=idxtmpl, mkLinkUrl=self.mkLinkUrl,
-            letters=letters, groups=groups, stats=stats)
-        if self.toplevel:
-            out = os.path.join(lFilepath, idxfile)
-        else:
-            out = os.path.join(lFilepath, 'repodata', idxfile)
-        fh = open(out, 'w')
-        kobj.write(out)
-        fh.close()
-        RepoIntegrityChecker.logger.log(sentryutil.INFO7, "writing checksum...")
-        chkfile = os.path.join(outdir, 'checksum')
-        fh = open(chkfile, 'w')
-        fh.write(repodata['primary']['checksum'])
-        fh.close()
-
-
-
-  def _makeExtraGroups(self,groups,packages,letters,maxlatest):
-        """
-        This is a utility method to create the extra groups. Currently,
-        the extra groups are:
-        __nogroup__: packages not in any other groups
-        __latest__: the last NN packages updated
-        letter groups: All packages get grouped by their uppercased first 
-                       letter
-        Any empty groups are then removed.
-        """
-        nogroup = Group(grid='__nogroup__', 
-                        name='Packages not in Groups')
-        latest = {}
-        i = 0
-        makerpmgroups = 0
-        if not len(groups): 
-            makerpmgroups = 1
-        for pkgid in packages.keys():
-            package = packages[pkgid]
-            if package.group is None:
-                if makerpmgroups:
-                    grid = _mkid(package.rpmgroup)
-                    if grid not in groups.keys():
-                        group = Group(grid=grid, name=package.rpmgroup)
-                        groups[grid] = group
-                    else:
-                        group = groups[grid]
-                    package.group = group
-                    group.packages.append(package)
-                else:
-                    package.group = nogroup
-                    nogroup.packages.append(package)
-            letter = pkgid[0].upper()
-            if letter not in letters.keys():
-                group = Group(grid=letter, name='Letter: %s' % letter)
-                letters[letter] = group
-            letters[letter].packages.append(package)
-            # btime is number of seconds since epoch, so reverse logic!
-            btime = 0
-            for arch in package.arches.values():
-                if arch.time > btime: btime = arch.time
-            if len(latest.keys()) < maxlatest:
-                latest[btime] = package
-            else:
-                times = latest.keys()
-                times.sort()
-                times.reverse()
-                oldest = times[-1]
-                if btime > oldest:
-                    del latest[oldest]
-                    latest[btime] = package
-            i += 1
-            RepoIntegrityChecker.logger.log(sentryutil.INFO7, "creating extra groups: %s entries", i)
-        if nogroup.packages:
-            groups[nogroup.grid] = nogroup
-        times = latest.keys()
-        times.sort()
-        times.reverse()
-        lgroup = Group(grid='__latest__', 
-                       name='Last %s Packages Updated' % len(times))
-        for time in times:
-            lgroup.packages.append(latest[time])
-        lgroup.sorted = 1
-        groups[lgroup.grid] = lgroup
-         ## Prune empty groups
-        for grid in groups.keys():
-            if not groups[grid].packages: del groups[grid]
-
-  def _mkOutDir(self, templatedir,outdir):
-        """
-        Remove the existing repoview directory if it exists, and create a
-        new one, copying in the layout dir from templates (if found).
-        """
-        if os.path.isdir(outdir):
-            RepoIntegrityChecker.logger.log(sentryutil.INFO7, "deleting old repoview...")
-            shutil.rmtree(outdir)
-        os.mkdir(outdir)
-        layoutsrc = os.path.join(templatedir, 'layout')
-        layoutdst = os.path.join(outdir, 'layout')
-        if os.path.isdir(layoutsrc):
-            RepoIntegrityChecker.logger.log(sentryutil.INFO7, "copying layout...")
-            shutil.copytree(layoutsrc, layoutdst)
 
-      
-#      rv.applyTemplates(DEFAULT_TEMPLATEDIR, 0, 'RepoView')
-#      mirrorIsValid = '1'  # since the file downloaded without raising an exception, we know the mirror is valid
-#      fileActionFunction = self.determineFileAction(aFile.downloadable_active, fileIsNotValid, aURL)
-#      self.actionTaskManager.newTask(fileActionFunction, (aMirror, aFile, aURL, errorMessage, mx.DateTime.now(), self.checklevel))
-    #except Exception, e:
-      # something bad happened while downloading a file from a mirror.  This means that the status of the
-      # mirror itself might have to be changed.
-     # message = e.__str__()
-    #  if message == '':
-    #    message = e.__class__
-    #  if aMirror.mirror_active == '1':
-#        # the mirror is bad, but used to be good - take the bad mirror action
-#        self.actionTaskManager.newTask(self.mirrorActionGoodToBad, (aMirror, None, None, message, mx.DateTime.now(), self.checklevel))
-    #    MirrorIntegrityChecker.logger.log(sentryutil.INFO6, "%s: mirrorActionGoodToBad: %s", threading.currentThread().getName(), e)
-    #  else:
-#        # the mirror is bad, but was marked bad before, there is no change in status
-#        self.actionTaskManager.newTask(self.mirrorActionBadNoChange, (aMirror, None, None, message, mx.DateTime.now(), self.checklevel))
-#        RepoIntegrityChecker.logger.log(sentryutil.INFO6, "%s: mirrorActionBadNoChange: %s ", threading.currentThread().getName(), aMirror.mirror_name)
-#    else:
-#        printpass
-#      if aMirror.mirror_active == '1' and mirrorIsValid == '1':
-#        # the mirror is good and remains good
-#        self.actionTaskManager.newTask(self.mirrorActionGoodNoChange, (aMirror, None, None, 'the mirror is OK', mx.DateTime.now(), self.checklevel))
-#        MirrorIntegrityChecker.logger.log(sentryutil.INFO6, "%s: mirrorActionGoodNoChange: %s", threading.currentThread().getName(), aMirror.mirror_name)
-#      elif mirrorIsValid == '1':
-#        # the mirror is good, but used to be marked bad, take the bad to good transition
-#        self.actionTaskManager.newTask(self.mirrorActionBadToGood, (aMirror, None, None, 'the mirror is OK', mx.DateTime.now(), self.checklevel))
-#        MirrorIntegrityChecker.logger.log(sentryutil.INFO6, "%s: mirrorActionBadToGood: %s ", threading.currentThread().getName(), aMirror.mirror_name)
-#      elif aMirror.mirror_active == '1':
-#        # we never managed to download anything - assume all of this mirror's files are marked as bad.
-#        # there are no files to download but the mirror was marked as good, change the status to bad.
-#        self.actionTaskManager.newTask(self.mirrorActionGoodToBad, (aMirror, None, None, 'all files are bad - full test required', mx.DateTime.now(), self.checklevel))
-#        MirrorIntegrityChecker.logger.log(sentryutil.INFO6, "%s: mirrorActionGoodToBad: %s ", threading.currentThread().getName(), aMirror.mirror_name)
-#      else:
-#        # we never managed to download anything - assume all of this mirror's files are marked as bad.
-#        # there are no files to download and the mirror was marked as bad, don't change the status.
-#        self.actionTaskManager.newTask(self.mirrorActionBadNoChange, (aMirror, None, None, 'all files are bad - full test required', mx.DateTime.now(), self.checklevel))
-#        RepoIntegrityChecker.logger.log(sentryutil.INFO6, "%s: mirrorActionBadNoChange: %s ", threading.currentThread().getName(), aMirror.mirror_name)
-    
-    
-  #----------------------------------------
-  # n o t i f y O f A c t i o n s T a k e n
-  #----------------------------------------
-  def notifyOfActionsTaken (self):
-    for x in enumerate(self.humanReadableActionList):
-      print "%4d %s" % x
-      
-      
-       #--------------------
-  # f i l e S t a t u s
-  #--------------------
-  def fileStatus (self, aURL):
-    """Returns a value indicating the validity of the file using several tests.
-        
-    In general, this function will be called only by a worker thread.
-    
-    This routine downloads a URL and first checks to see if it returns the 
-    desired MimeType as indicated by the mimeTypeAssociations dictionary.
-    If it passes that test, it downloads the entire file and creates a hash.
-    
-    Parameters:
-      aURL - the complete URL to try to download
-      expectedHashCode - a string representing the hash code of the file.  The length of
-        this string will be used to determine which hash algorithm to use.  See the 
-        class level variable 'hashLengthHashAlgorithmsAssociations' to see the lengths
-        and their associations.
-    
-    This routine returns a tuple: (ReturnCode, HumanReadableErrorMessage)
-    See the class level variable 'fileStatusAssociations' for the messages
-    The ReturnCode is as follows:
-      0 - the file is okay
-      1 - the server is giving the incorrect MIME type
-      2 - the hash values did not match
-      3 - an HTTP Error occurred
-    """
-#    print (aURL)
-#    try:
-#    urlReader = urllib2.urlopen(aURL)
-#      try:
-#    page = urlReader.read()
-#    print(page)
-    
-    
-    
-    
-#         expectedContentType = MirrorIntegrityChecker.mimeTypeAssociations[aURL[aURL.rfind('.')+1:]]
-#        if urlContentType not in expectedContentType:
-#          # incorrect MIME type error
-#          return (1, MirrorIntegrityChecker.fileStatusAssociations[1] % (urlContentType, expectedContentType))
-#      except KeyError: pass
-#    except urllib2.HTTPError, e:
-#      if 400 <= e.code <= 499:
-#        return (3, MirrorIntegrityChecker.fileStatusAssociations[3] % e.__str__())
-#      raise e      
-    
-#    expectedHashCodeLength = len(expectedHashCode)
-#    if expectedHashCodeLength not in MirrorIntegrityChecker.hashLengthHashAlgorithmsAssociations.keys():
-#      expectedHashCodeLength = 32 #if the expected hash code is of unexpected length - assume MD5
-#    hashCalculator = MirrorIntegrityChecker.hashLengthHashAlgorithmsAssociations[expectedHashCodeLength][1].new()
-#    while True:
-#      block = urlReader.read(self.hashReadChunkSize)
-#      hashCalculator.update(block)
-#      blockLength = len(block)
-#      MirrorIntegrityChecker.logger.log(sentryutil.DEBUG0, "%s: downloading: %s, %d bytes read", threading.currentThread().getName(), aURL, blockLength)
-#      if blockLength < self.hashReadChunkSize:
-#        break # we're done reading this file
-    
-#    urlComputedHashCode = hashCalculator.hexdigest()
-#    if urlComputedHashCode != expectedHashCode:
-#      # incorrect hash code error
-#      return (2, MirrorIntegrityChecker.fileStatusAssociations[2] % (MirrorIntegrityChecker.hashLengthHashAlgorithmsAssociations[expectedHashCodeLength][0], urlComputedHashCode, expectedHashCode))
-#    # the file is OK
-#    return (0, MirrorIntegrityChecker.fileStatusAssociations[0])
+
 
   #----------------------------------------------------------------------------------------------
   # I n s e r t N e w R e p o s S Q L 
@@ -845,172 +586,5 @@
       repo_primary_timestamp = %s
     WHERE
       repo_id = %d"""
-
-class Archer:
-    """
-    This class handles all possible architectures for a package, since
-    the listing is done by n-e-v-r.html, and a single release can have more
-    than one architecture available, e.g. "src". This is effectively where
-    all packages end up being: there are no further sublevels.
-    """
-    def __init__(self, pkgdata):
-        self.arch = pkgdata['arch']
-        self.time = int(pkgdata['time'])
-        self.size = int(pkgdata['size'])
-        self.loc = pkgdata['location']
-        self.packager = pkgdata['packager']
-
-    def getFileName(self):
-        """
-        Get the basename of the RPM file in question.
-        """
-        return os.path.basename(self.loc)
-
-    def getTime(self, format='%c'):
-        """
-        Return the build time of this package in locale format, unless
-        passed as format='strformat'.
-        """
-        return time.strftime(format, time.localtime(self.time))
-
-    def getSize(self):
-        """
-        You can access the byte size of the package by looking at arch.size,
-        but this will return the size in sane units (KiB or MiB).
-        """
-        kbsize = self.size/1024
-        if kbsize/1024 < 1:
-            return '%d KiB' % kbsize
-        else:
-            return '%0.2f MiB' % (float(kbsize)/1024)
-
   
-class Package:
-    """
-    A bit of a misnomer -- this is "package" in the sense of repoview, not in 
-    the sense of an .rpm file, since it will include multiple architectures.
-    """
-    def __init__(self, n, e, v, r):
-        self.nevr = (n, e, v, r)
-        self.n = n
-        self.e = e
-        self.v = v
-        self.r = r
-        self.group = None
-        self.rpmgroup = None
-        self.arches = {}
-        self.incomplete = 1
-        self.changelogs = []
-        
-    def doPackage(self, pkgdata):
-        """
-        Accept a dict with key-value pairs and populate ourselves with it.
-        """
-        if self.incomplete: self._getPrimary(pkgdata)
-        pkgid = pkgdata['checksum']
-        if self.arches.has_key(pkgid): return
-        arch = Archer(pkgdata)
-        self.arches[pkgid] = arch
-
-    def addChangelogs(self, changelogs):
-        """
-        Accept changelogs from other-parser and assign them, unless we
-        already have some (sometimes happens with multiple architectures).
-        """
-        if self.changelogs: return 0
-        self.changelogs = changelogs
-        return 1
-    
-    def _getPrimary(self, pkgdata):
-        """
-        A helper method to grab values from pkgdata dict.
-        """
-        self.summary = pkgdata['summary']
-        self.description = pkgdata['description']
-        self.url = pkgdata['url']
-        self.license = pkgdata['license']
-        self.vendor = pkgdata['vendor']
-        self.rpmgroup = pkgdata['group']
-        self.incomplete = 0
-
-    def getChangeLogs(self):
-        """
-        Get the changelogs in the [c-formatted date, author, entry] style.
-        """
-        self.changelogs.sort()
-        self.changelogs.reverse()
-        retlist = []
-        for changelog in self.changelogs:
-            date, author, entry = changelog
-            date = time.strftime('%c', time.localtime(date))
-            retlist.append ([date, author, entry])
-        return retlist
-class Group:
-    """
-    Contains a list of packages.
-    """
-    def __init__(self, grid=None, name=None):
-        self.packages = []
-        self.grid = grid
-        self.name = name
-        self.sorted = 0
-        self.uservisible = 1
-
-    def getSortedList(self, trim=0, nevr=None):
-        """
-        A utility method for calling from kid templates. This will
-        return a sorted list of packages, optionally trimmed since
-        on large repositories this list can be very large, and makes
-        the display useless. If you pass the trim parameter, you must
-        pass the nevr parameter, too, so the it knows around which package
-        to trim.
-        """
-        if not self.sorted:
-            nevrlist = {}
-            for package in self.packages:
-                nevrlist[package.nevr] = package
-            keys = nevrlist.keys()
-            keys.sort()
-            retlist = []
-            for nevr in keys:
-                retlist.append(nevrlist[nevr])
-            self.packages = retlist
-            self.sorted = 1
-        if not trim or len(self.packages) <= trim: return self.packages
-        retlist = []
-        i = 0
-        for pkg in self.packages:
-            if pkg.nevr == nevr: break
-            i += 1
-        half = trim/2
-        if i - half < 0:
-            return self.packages[0:trim]
-        if i + half > len(self.packages):
-            return self.packages[-trim:]
-        return self.packages[i-half:i+half]
-       
-class GroupFactory(dict):
-    """
-    A small utility class that extends the dict functionality to aide in
-    kid template generation. It contains the groups, keyed by group id.
-    """
-    def __init__(self):
-        dict.__init__(self)
-        self.sortedlist = None
-        
-    def getSortedList(self, showinvisible=0):
-        """
-        Get the sorted list of groups. The sorting is done by group id 
-        in ascending order (locale-specific).
-        """
-        if self.sortedlist is None:
-            grids = []
-            for grid in self.keys():
-                if self[grid].uservisible or showinvisible:
-                    grids.append(grid)
-            grids.sort()
-            self.sortedlist = []
-            for grid in grids:
-                self.sortedlist.append(self[grid])
-        return self.sortedlist        
           
\ No newline at end of file


Index: sentry.py
===================================================================
RCS file: /cvs/fedora/bouncer_r/python/sentry.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- sentry.py	10 Jul 2005 09:15:28 -0000	1.2
+++ sentry.py	1 Aug 2005 20:38:01 -0000	1.3
@@ -215,7 +215,7 @@
       
   # insure that only one instance of the program runs at a time if we're running a long test
   # such as anything over checklevel 2
-  if workingEnvironment["checklevel"] > 2:
+  if workingEnvironment["checklevel"] > 4:
     pidFileName = ("%s/pid" % workingEnvironment["persistentDataPath"]).replace("//", "/")
     try:
       f = open(pidFileName, "r")
@@ -231,15 +231,15 @@
     expectedPid = f.write(`os.getpid()`)
     f.close()
       
-  #try:
-  #  aMirrorIntegrityCheckerClass = createMirrorIntegrityCheckerClass (workingEnvironment)
-  #  aMirrorIntegrityChecker = aMirrorIntegrityCheckerClass(workingEnvironment)
-  #  aMirrorIntegrityChecker.check()
-   
   try:
-    aRepoIntegrityCheckerClass = createRepoIntegrityCheckerClass (workingEnvironment)
-    aRepoIntegrityChecker = aRepoIntegrityCheckerClass(workingEnvironment)
-    aRepoIntegrityChecker.check()   
+    aMirrorIntegrityCheckerClass = createMirrorIntegrityCheckerClass (workingEnvironment)
+    aMirrorIntegrityChecker = aMirrorIntegrityCheckerClass(workingEnvironment)
+    aMirrorIntegrityChecker.check()
+   
+  #try:
+  #  aRepoIntegrityCheckerClass = createRepoIntegrityCheckerClass (workingEnvironment)
+  #  aRepoIntegrityChecker = aRepoIntegrityCheckerClass(workingEnvironment)
+  #  aRepoIntegrityChecker.check()   
    
     sentryLogger.log(sentryutil.INFO4, "done")
   except Exception, e: