Bodhi 10k bug

Luke Macken lmacken at redhat.com
Sat Nov 22 23:41:34 UTC 2008


As some of you may have noticed, the last batch of updates contained 209
updates with the ID of 'FEDORA-2008-10000'.  This is is due to a flaw in the
way bodhi's PackageUpdate.assign_id() method finds the current update with the
highest id.  Presently, it does a PackageUpdate.select(...,
orderBy=PackageUpdate.q.updateid).  Since PackageUpdate.updateid is a unicode
column, and due to the fact that u'FEDORA-2008-10000' < u'FEDORA-2008-9999',
this started to fail miserably.

Attached is a patch that has the assign_id method order the query by the
date_pushed DateTimeCol in order to find the highest updateid.  However, it
seems that SQLObject completely ignore milliseconds:

    if datetime:
        def DateTimeConverter(value, db):
                return "'%04d-%02d-%02d %02d:%02d:%02d'" % (
                        value.year, value.month, value.day,
                        value.hour, value.minute,
                        value.second)

The problem with this is that we must now take into account multiple updates
that were pushed at the same second.

The "proper" way to fix this is at the model level, and probably to use an
integer for the updateid column.  I'm in the process of finishing up the
SQLAlchemy port, which will properly solve this problem.  In the mean time,
this hack will not require any database changes.

This patch also includes a test case for this 10k bug.

    [lmacken at x300 bodhi]$ nosetests
    bodhi/tests/test_model.py:TestPackageUpdate.test_id
    .
    ----------------------------------------------------------------------
     Ran 1 test in 0.084s

     OK

Once approved and applied, I will push out a fixed package (to releng2 only),
fix the existing updates from the last push, and send out an errata containing
the new update IDs.

+1's ?

luke
-------------- next part --------------
>From 965360653ee505c76a89228bac462ada597dad0b Mon Sep 17 00:00:00 2001
From: Luke Macken <lmacken at redhat.com>
Date: Sat, 22 Nov 2008 18:27:40 -0500
Subject: [PATCH] Righteous hack for the 10k bug.

Due to a flaw in the way the PackageUpdate.assign_id method finds the update
with the highest id, when we reached 10,000 updates this year, it started to
fail.  The previous method ordered updates by the updateid string column, which
now fails since u'FEDORA-2008-10000' < u'FEDORA-2008-9999'.

This patch changes this query to grab the most recent update based on the
date_pushed column.  This technically should also have the highest updateid.
However, since SQLObject completely ignores milliseconds, we need to also
take into account multiple updates being pushed during the same second.

This changeset also includes a testcase for the 10k bug.

diff --git a/bodhi/model.py b/bodhi/model.py
index 2bc7c07..7d4627b 100644
--- a/bodhi/model.py
+++ b/bodhi/model.py
@@ -258,19 +258,37 @@ class PackageUpdate(SQLObject):
         if self.updateid != None and self.updateid != u'None':
             log.debug("Keeping current update id %s" % self.updateid)
             return
-        update = PackageUpdate.select(PackageUpdate.q.updateid != 'None',
-                                      orderBy=PackageUpdate.q.updateid)
+
+        updates = PackageUpdate.select(
+                    AND(PackageUpdate.q.date_pushed != None,
+                        PackageUpdate.q.updateid != None),
+                    orderBy=PackageUpdate.q.date_pushed, limit=1).reversed()
+
         try:
-            prefix, year, id = update[-1].updateid.split('-')
+            update = updates[0]
+
+            # We need to check if there are any other updates that were pushed
+            # at the same time, since SQLObject ignores milliseconds
+            others = PackageUpdate.select(
+                        PackageUpdate.q.date_pushed == update.date_pushed)
+            if others.count() > 1:
+                # find the update with the highest id
+                for other in others:
+                    if other.updateid_int > update.updateid_int:
+                        update = other
+
+            prefix, year, id = update.updateid.split('-')
             if int(year) != time.localtime()[0]: # new year
                 id = 0
             id = int(id) + 1
-        except (AttributeError, IndexError):
-            id = 1
+        except IndexError:
+            id = 1 # First update
+
         self.updateid = u'%s-%s-%0.4d' % (self.release.id_prefix,
                                           time.localtime()[0],id)
         log.debug("Setting updateid for %s to %s" % (self.title,
                                                      self.updateid))
+        self.date_pushed = datetime.utcnow()
         hub.commit()
 
     def set_request(self, action, pathcheck=True):
@@ -356,7 +374,6 @@ class PackageUpdate(SQLObject):
         """
         if self.request == 'testing':
             self.pushed = True
-            self.date_pushed = datetime.utcnow()
             self.status = 'testing'
             self.assign_id()
         elif self.request == 'obsolete':
@@ -364,7 +381,6 @@ class PackageUpdate(SQLObject):
             self.status = 'obsolete'
         elif self.request == 'stable':
             self.pushed = True
-            self.date_pushed = datetime.utcnow()
             self.status = 'stable'
             self.assign_id()
         self.request = None
@@ -742,6 +758,15 @@ class PackageUpdate(SQLObject):
         sorted.sort(lambda x, y: cmp(x.timestamp, y.timestamp))
         return sorted
 
+    @property
+    def updateid_int(self):
+        """ Return the integer $ID from the 'FEDORA-2008-$ID' updateid """
+        if not self.updateid:
+            return None
+        prefix, year, id = self.updateid.split('-')
+        print "returning %d for %s" % (int(id), self.updateid)
+        return int(id)
+
 
 class Comment(SQLObject):
     timestamp   = DateTimeCol(default=datetime.utcnow)
diff --git a/bodhi/tests/test_model.py b/bodhi/tests/test_model.py
index b654fe8..eab511e 100644
--- a/bodhi/tests/test_model.py
+++ b/bodhi/tests/test_model.py
@@ -109,11 +109,32 @@ class TestPackageUpdate(testutil.DBTest):
         update.assign_id()
         assert update.updateid == '%s-%s-0001' % (update.release.id_prefix,
                                                   time.localtime()[0])
+        assert update.date_pushed
         update = self.get_update(name='TurboGears-0.4.4-8.fc7')
         update.assign_id()
         assert update.updateid == '%s-%s-0002' % (update.release.id_prefix,
                                                   time.localtime()[0])
 
+        # 10k bug
+        update.updateid = 'FEDORA-2008-9999'
+        newupdate = self.get_update(name='nethack-2.5.6-1.fc10')
+        newupdate.assign_id()
+        assert newupdate.updateid == 'FEDORA-2008-10000'
+
+        newerupdate = self.get_update(name='nethack-2.5.7-1.fc10')
+        newerupdate.assign_id()
+        assert newerupdate.updateid == 'FEDORA-2008-10001'
+
+        # test updates that were pushed at the same time.  assign_id should
+        # be able to figure out which one has the highest id.
+        now = datetime.utcnow()
+        newupdate.date_pushed = now
+        newerupdate.date_pushed = now
+
+        newest = self.get_update(name='nethack-2.5.8-1.fc10')
+        newest.assign_id()
+        assert newest.updateid == 'FEDORA-2008-10002'
+
     def test_url(self):
         update = self.get_update()
         print "URL = ", update.get_url()
-- 
1.6.0.3



More information about the Fedora-infrastructure-list mailing list