[Ovirt-devel] [PATCH] Use multiple processes to check host status
Ian Main
imain at redhat.com
Thu Jun 12 15:35:51 UTC 2008
This patch causes host-status to fork() up to 10 times to connect out
to hosts via libvirt. This should help with the bottleneck we were
seeing with libvirt connect timeouts.
Signed-off-by: Ian Main <imain at redhat.com>
---
wui/src/host-status/host-status.rb | 193 +++++++++++++++++++++---------------
1 files changed, 114 insertions(+), 79 deletions(-)
diff --git a/wui/src/host-status/host-status.rb b/wui/src/host-status/host-status.rb
index 41638da..eddd348 100755
--- a/wui/src/host-status/host-status.rb
+++ b/wui/src/host-status/host-status.rb
@@ -1,5 +1,5 @@
#!/usr/bin/ruby
-#
+#
# Copyright (C) 2008 Red Hat, Inc.
# Written by Chris Lalancette <clalance at redhat.com>
#
@@ -29,7 +29,7 @@ include Daemonize
$logfile = '/var/log/ovirt-wui/host-status.log'
do_daemon = true
-sleeptime = 5
+sleeptime = 20
opts = OptionParser.new do |opts|
opts.on("-h", "--help", "Print help message") do
puts opts
@@ -97,104 +97,139 @@ def kick_taskomatic(msg, vm)
task.save
end
-loop do
- get_credentials
- hosts = Host.find(:all)
- hosts.each do |host|
-
- begin
- conn = Libvirt::open("qemu+tcp://" + host.hostname + "/system")
- rescue
- # we couldn't contact the host for whatever reason. Since we can't get
- # to this host, we have to mark all vms on it as disconnected or stopped
- # or such.
- if host.state != "unavailable"
- puts "Updating host state to unavailable: " + host.hostname
- host.state = "unavailable"
- host.save
- end
+def check_status(host)
- Vm.find(:all, :conditions => [ "host_id = ?", host.id ]).each do |vm|
- # Since we can't reach the host on which the vms reside, we mark these
- # as STATE_UNREACHABLE. If they come back up we can mark them as
- # running again, else they'll be stopped. At least for now the user
- # will know what's going on.
- #
- # If this causes too much trouble in the UI, this can be changed to
- # STATE_STOPPED for now until it is resolved of another solution is
- # brought forward.
-
- if vm.state != Vm::STATE_UNREACHABLE:
- kick_taskomatic(Vm::STATE_UNREACHABLE, vm)
- end
+ # This is in a new process, we need a new database connection.
+ database_connect
+
+ begin
+ puts "Connecting to host " + host.hostname
+ conn = Libvirt::open("qemu+tcp://" + host.hostname + "/system")
+ rescue
+ # we couldn't contact the host for whatever reason. Since we can't get
+ # to this host, we have to mark all vms on it as disconnected or stopped
+ # or such.
+ if host.state != "unavailable"
+ puts "Updating host state to unavailable: " + host.hostname
+ host.state = "unavailable"
+ host.save
+ end
+
+ Vm.find(:all, :conditions => [ "host_id = ?", host.id ]).each do |vm|
+ # Since we can't reach the host on which the vms reside, we mark these
+ # as STATE_UNREACHABLE. If they come back up we can mark them as
+ # running again, else they'll be stopped. At least for now the user
+ # will know what's going on.
+ #
+ # If this causes too much trouble in the UI, this can be changed to
+ # STATE_STOPPED for now until it is resolved of another solution is
+ # brought forward.
+
+ if vm.state != Vm::STATE_UNREACHABLE:
+ kick_taskomatic(Vm::STATE_UNREACHABLE, vm)
end
+ end
+
+ return
+ end
+
+ if host.state != "available"
+ puts "Updating host state to available: " + host.hostname
+ host.state = "available"
+ host.save
+ end
+
+ begin
+ vm_ids = conn.list_domains
+ rescue
+ puts "Failed to request domain list on host " + host.hostname
+ conn.close
+ next
+ end
+
+ # Here we're going through every vm listed through libvirt. This
+ # really only lets us find ones that are started that shouldn't be.
+ vm_ids.each do |vm_id|
+ puts "VM ID: %d" % [vm_id]
+ begin
+ dom = conn.lookup_domain_by_id(vm_id)
+ rescue
+ puts "Failed to find domain " + vm.description
next
end
- if host.state != "available"
- puts "Updating host state to available: " + host.hostname
- host.state = "available"
- host.save
+ vm_uuid = dom.uuid
+ info = dom.info
+
+ puts "VM UUID: %s" % [vm_uuid]
+ info = dom.info
+
+ vm = Vm.find(:first, :conditions => [ "uuid = ?", vm_uuid ])
+ if vm == nil
+ puts "VM Not found in database, must be created by user. giving up."
+ next
end
+ check_state(vm, info)
+ end
+
+ # Now we get a list of all vms that should be on this system and see if
+ # they are all running.
+ Vm.find(:all, :conditions => [ "host_id = ?", host.id ]).each do |vm|
+
begin
- vm_ids = conn.list_domains
+ dom = conn.lookup_domain_by_uuid(vm.uuid)
rescue
- puts "Failed to request domain list on host " + host.hostname
- conn.close
+ # OK. We couldn't find the UUID that we thought was there. The only
+ # explanation is that the domain is dead.
+ puts "Failed to find domain " + vm.description
+ kick_taskomatic(Vm::STATE_STOPPED, vm)
next
end
+ info = dom.info
+ check_state(vm, info)
- # Here we're going through every vm listed through libvirt. This
- # really only lets us find ones that are started that shouldn't be.
- vm_ids.each do |vm_id|
- puts "VM ID: %d" % [vm_id]
- begin
- dom = conn.lookup_domain_by_id(vm_id)
- rescue
- puts "Failed to find domain " + vm.description
- next
- end
-
- vm_uuid = dom.uuid
- info = dom.info
-
- puts "VM UUID: %s" % [vm_uuid]
- info = dom.info
- puts info.to_s
-
- vm = Vm.find(:first, :conditions => [ "uuid = ?", vm_uuid ])
- if vm == nil
- puts "VM Not found in database, must be created by user. giving up."
- next
- end
+ conn.close
- check_state(vm, info)
- end
+ end
+end
- # Now we get a list of all vms that should be on this system and see if
- # they are all running.
- Vm.find(:all, :conditions => [ "host_id = ?", host.id ]).each do |vm|
-
- begin
- dom = conn.lookup_domain_by_uuid(vm.uuid)
- rescue
- # OK. We couldn't find the UUID that we thought was there. The only
- # explanation is that the domain is dead.
- puts "Failed to find domain " + vm.description
- kick_taskomatic(Vm::STATE_STOPPED, vm)
- next
- end
- info = dom.info
- check_state(vm, info)
+get_credentials
- conn.close
+loop do
+
+ # fork() seems to really mess with our db connection. Need to have this
+ # in the main connection as well. I verified it's not leaking connections/fds.
+ database_connect
+ hosts = Host.find(:all)
+
+ p_count = 0
+ hosts.each do |host|
+
+ p_count += 1
+ # Only allow up to 10 processes running at a time. If we go above 10
+ # Then we wait for one to exit before continuing.
+ if p_count > 10
+ Process.wait
+ p_count -= 1
end
+
+ fork do
+ check_status(host)
+ exit 0
+ end
+
end
+ while p_count > 0
+ Process.wait
+ p_count -= 1
+ end
+
+
STDOUT.flush
sleep sleeptime
end
--
1.5.5.1
More information about the ovirt-devel
mailing list