[Ovirt-devel] [PATCH] Use multiple processes to check host status

Ian Main imain at redhat.com
Thu Jun 12 15:35:51 UTC 2008


This patch causes host-status to fork() up to 10 times to connect out
to hosts via libvirt.  This should help with the bottleneck we were
seeing with libvirt connect timeouts.

Signed-off-by: Ian Main <imain at redhat.com>
---
 wui/src/host-status/host-status.rb |  193 +++++++++++++++++++++---------------
 1 files changed, 114 insertions(+), 79 deletions(-)

diff --git a/wui/src/host-status/host-status.rb b/wui/src/host-status/host-status.rb
index 41638da..eddd348 100755
--- a/wui/src/host-status/host-status.rb
+++ b/wui/src/host-status/host-status.rb
@@ -1,5 +1,5 @@
 #!/usr/bin/ruby
-# 
+#
 # Copyright (C) 2008 Red Hat, Inc.
 # Written by Chris Lalancette <clalance at redhat.com>
 #
@@ -29,7 +29,7 @@ include Daemonize
 $logfile = '/var/log/ovirt-wui/host-status.log'
 
 do_daemon = true
-sleeptime = 5
+sleeptime = 20
 opts = OptionParser.new do |opts|
   opts.on("-h", "--help", "Print help message") do
     puts opts
@@ -97,104 +97,139 @@ def kick_taskomatic(msg, vm)
   task.save
 end
 
-loop do
-  get_credentials
 
-  hosts = Host.find(:all)
-  hosts.each do |host|
-    
-    begin
-      conn = Libvirt::open("qemu+tcp://" + host.hostname + "/system")
-    rescue
-      # we couldn't contact the host for whatever reason.  Since we can't get
-      # to this host, we have to mark all vms on it as disconnected or stopped
-      # or such.
-      if host.state != "unavailable"
-        puts "Updating host state to unavailable: " + host.hostname
-        host.state = "unavailable"
-        host.save
-      end
+def check_status(host)
 
-      Vm.find(:all, :conditions => [ "host_id = ?", host.id ]).each do |vm|
-        # Since we can't reach the host on which the vms reside, we mark these
-        # as STATE_UNREACHABLE.  If they come back up we can mark them as
-        # running again, else they'll be stopped.  At least for now the user
-	# will know what's going on.
-        #
-        # If this causes too much trouble in the UI, this can be changed to
-        # STATE_STOPPED for now until it is resolved of another solution is
-        # brought forward.
-
-        if vm.state != Vm::STATE_UNREACHABLE:
-          kick_taskomatic(Vm::STATE_UNREACHABLE, vm)
-        end
+  # This is in a new process, we need a new database connection.
+  database_connect
+
+  begin
+    puts "Connecting to host " + host.hostname
+    conn = Libvirt::open("qemu+tcp://" + host.hostname + "/system")
+  rescue
+    # we couldn't contact the host for whatever reason.  Since we can't get
+    # to this host, we have to mark all vms on it as disconnected or stopped
+    # or such.
+    if host.state != "unavailable"
+      puts "Updating host state to unavailable: " + host.hostname
+      host.state = "unavailable"
+      host.save
+    end
+
+    Vm.find(:all, :conditions => [ "host_id = ?", host.id ]).each do |vm|
+      # Since we can't reach the host on which the vms reside, we mark these
+      # as STATE_UNREACHABLE.  If they come back up we can mark them as
+      # running again, else they'll be stopped.  At least for now the user
+      # will know what's going on.
+      #
+      # If this causes too much trouble in the UI, this can be changed to
+      # STATE_STOPPED for now until it is resolved of another solution is
+      # brought forward.
+
+      if vm.state != Vm::STATE_UNREACHABLE:
+        kick_taskomatic(Vm::STATE_UNREACHABLE, vm)
       end
 
+    end
+
+    return
+  end
+
+  if host.state != "available"
+    puts "Updating host state to available: " + host.hostname
+    host.state = "available"
+    host.save
+  end
+
+  begin
+    vm_ids = conn.list_domains
+  rescue
+    puts "Failed to request domain list on host " + host.hostname
+    conn.close
+    next
+  end
+
+  # Here we're going through every vm listed through libvirt.  This
+  # really only lets us find ones that are started that shouldn't be.
+  vm_ids.each do |vm_id|
+    puts "VM ID: %d" % [vm_id]
+    begin
+      dom = conn.lookup_domain_by_id(vm_id)
+    rescue
+      puts "Failed to find domain " + vm.description
       next
     end
 
-    if host.state != "available"
-      puts "Updating host state to available: " + host.hostname
-      host.state = "available"
-      host.save
+    vm_uuid = dom.uuid
+    info = dom.info
+
+    puts "VM UUID: %s" % [vm_uuid]
+    info = dom.info
+
+    vm = Vm.find(:first, :conditions => [ "uuid = ?", vm_uuid ])
+    if vm == nil
+      puts "VM Not found in database, must be created by user.  giving up."
+      next
     end
 
+    check_state(vm, info)
+  end
+
+  # Now we get a list of all vms that should be on this system and see if
+  # they are all running.
+  Vm.find(:all, :conditions => [ "host_id = ?", host.id ]).each do |vm|
+
     begin
-      vm_ids = conn.list_domains
+      dom = conn.lookup_domain_by_uuid(vm.uuid)
     rescue
-      puts "Failed to request domain list on host " + host.hostname
-      conn.close
+      # OK.  We couldn't find the UUID that we thought was there.  The only
+      # explanation is that the domain is dead.
+      puts "Failed to find domain " + vm.description
+      kick_taskomatic(Vm::STATE_STOPPED, vm)
       next
     end
+    info = dom.info
+    check_state(vm, info)
 
-    # Here we're going through every vm listed through libvirt.  This
-    # really only lets us find ones that are started that shouldn't be.
-    vm_ids.each do |vm_id|
-      puts "VM ID: %d" % [vm_id]
-      begin
-        dom = conn.lookup_domain_by_id(vm_id)
-      rescue
-        puts "Failed to find domain " + vm.description
-        next
-      end
-      
-      vm_uuid = dom.uuid
-      info = dom.info
-
-      puts "VM UUID: %s" % [vm_uuid]
-      info = dom.info
-      puts info.to_s
- 
-      vm = Vm.find(:first, :conditions => [ "uuid = ?", vm_uuid ])
-      if vm == nil
-        puts "VM Not found in database, must be created by user.  giving up."
-        next
-      end
+    conn.close
 
-      check_state(vm, info)
-    end
+  end
+end
 
-    # Now we get a list of all vms that should be on this system and see if
-    # they are all running.
-    Vm.find(:all, :conditions => [ "host_id = ?", host.id ]).each do |vm|
-    
-      begin
-        dom = conn.lookup_domain_by_uuid(vm.uuid)
-      rescue
-        # OK.  We couldn't find the UUID that we thought was there.  The only
-        # explanation is that the domain is dead.
-        puts "Failed to find domain " + vm.description
-        kick_taskomatic(Vm::STATE_STOPPED, vm)
-        next
-      end
-      info = dom.info
-      check_state(vm, info)
+get_credentials
 
-      conn.close
+loop do
+
+  # fork() seems to really mess with our db connection.  Need to have this
+  # in the main connection as well.  I verified it's not leaking connections/fds.
+  database_connect
+  hosts = Host.find(:all)
+
+  p_count = 0
+  hosts.each do |host|
+
+    p_count += 1
 
+    # Only allow up to 10 processes running at a time.  If we go above 10
+    # Then we wait for one to exit before continuing.
+    if p_count > 10
+      Process.wait
+      p_count -= 1
     end
+
+    fork do
+      check_status(host)
+      exit 0
+    end
+
   end
 
+  while p_count > 0
+    Process.wait
+    p_count -= 1
+  end
+
+
   STDOUT.flush
   sleep sleeptime
 end
-- 
1.5.5.1




More information about the ovirt-devel mailing list