[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: InstantMirror needs a rethink



After spending some time examining http-replicator I think that it's a
much better foundation for further development of InstantMirror
features than mod_python is. Running as a standalone daemon lets it
handle concurrent requests more naturally.

As a bonus, http-replicator supports upstream ftp servers as well as
http, and deals with byte-range requests properly (more properly than
InstantMirror does, at least).

Unlike InstantMirror, the current http-replicator implements a
traditional http proxy rather than a transparent proxy. It was pretty
easy to hack in a new --mirror option that lets it support either
mode. With the attached patch (applied atop
http-replicator_4.0alpha1), http-replicator acts as a drop-in
replacement for InstantMirror:

./http-replicator --port 80 --root /mirrors --mirror
http://download.fedora.redhat.com --nohost --daemon mirror.log

If this works for someone besides me, I'll submit the patch to the
http-replicator maintainer.

--Ed
Index: http-replicator_4.0alpha1/Request.py
===================================================================
--- http-replicator_4.0alpha1.orig/Request.py
+++ http-replicator_4.0alpha1/Request.py
@@ -79,20 +79,21 @@ class HttpRequest:
       self.__recvbuf = self.__recvbuf[ bytes: ]
     assert not self.__recvbuf, 'client sends junk data after message header'
 
-    if self.__url.startswith( 'http://' ):
-      host = self.__url[ 7: ]
+    url = Params.MIRRORPREFIX + self.__url
+    if url.startswith( 'http://' ):
+      host = url[ 7: ]
       port = 80
       if self.__cmd == 'GET':
         self.Protocol = Protocol.HttpProtocol
       else:
         self.Protocol = Protocol.BlindProtocol
-    elif self.__url.startswith( 'ftp://' ):
+    elif url.startswith( 'ftp://' ):
       assert self.__cmd == 'GET', '%s request unsupported for ftp' % self.__cmd
       self.Protocol = Protocol.FtpProtocol
-      host = self.__url[ 6: ]
+      host = url[ 6: ]
       port = 21
     else:
-      raise AssertionError, 'invalid url: %s' % self.__url
+      raise AssertionError, 'invalid url: %s' % url
     if '/' in host:
       host, path = host.split( '/', 1 )
     else:
Index: http-replicator_4.0alpha1/Protocol.py
===================================================================
--- http-replicator_4.0alpha1.orig/Protocol.py
+++ http-replicator_4.0alpha1/Protocol.py
@@ -60,7 +60,7 @@ class HttpProtocol( Cache.File ):
 
   def __init__( self, request ):
 
-    Cache.File.__init__( self, '%s:%i/%s' % request.url() )
+    Cache.File.__init__( self, Params.NOHOST and request.url()[2] or ( '%s:%i/%s' % request.url() ) )
 
     if Params.STATIC and self.full():
       print 'Static mode; serving file directly from cache'
@@ -215,7 +215,7 @@ class FtpProtocol( Cache.File ):
 
   def __init__( self, request ):
 
-    Cache.File.__init__( self, '%s:%i/%s' % request.url() )
+    Cache.File.__init__( self, Params.NOHOST and request.url()[2] or ( '%s:%i/%s' % request.url() ) )
 
     if Params.STATIC and self.full():
       self.__socket = None
Index: http-replicator_4.0alpha1/Params.py
===================================================================
--- http-replicator_4.0alpha1.orig/Params.py
+++ http-replicator_4.0alpha1/Params.py
@@ -5,10 +5,12 @@ _args = iter( sys.argv )
 PROG = _args.next()
 PORT = 8080
 ROOT = os.getcwd() + os.sep
+MIRRORPREFIX = ''
 VERBOSE = 0
 TIMEOUT = 15
 FAMILY = socket.AF_INET
 FLAT = False
+NOHOST = False
 STATIC = False
 ONLINE = True
 LIMIT = False
@@ -23,10 +25,12 @@ options:
   -h --help          show this help message and exit
   -p --port PORT     listen on this port for incoming connections, default %(PORT)i
   -r --root DIR      set cache root directory, default current: %(ROOT)s
+     --mirror URL    mirror this URL prefix directly instead of acting as a proxy
   -v --verbose       show http headers and other info
   -t --timeout SEC   break connection after so many seconds of inactivity, default %(TIMEOUT)i
   -6 --ipv6          try ipv6 addresses if available
      --flat          flat mode; cache all files in root directory (dangerous!)
+     --nohost        do not use host:port as top level of directory hierarchy
      --static        static mode; assume files never change
      --offline       offline mode; never connect to server
      --limit RATE    limit download rate at a fixed K/s
@@ -51,6 +55,10 @@ for _arg in _args:
       sys.exit( 'Error: %s requires a directory argument' % _arg )
     except:
       sys.exit( 'Error: invalid cache directory %s' % ROOT )
+  elif _arg == '--mirror':
+    MIRRORPREFIX = _args.next()
+    if not MIRRORPREFIX.startswith( 'http://' ) and not MIRRORPREFIX.startswith( 'ftp://' ):
+      sys.exit( 'Error: %s requires an argument starting with http:// or ftp://' % _arg )
   elif _arg in ( '-v', '--verbose' ):
     VERBOSE += 1
   elif _arg in ( '-t', '--timeout' ):
@@ -63,6 +71,8 @@ for _arg in _args:
     FAMILY = socket.AF_UNSPEC
   elif _arg == '--flat':
     FLAT = True
+  elif _arg == '--nohost':
+    NOHOST = True
   elif _arg == '--static':
     STATIC = True
   elif _arg == '--offline':

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]