rpms/rdma/F-8 rdma-90-rdma.rules, NONE, 1.1 rdma-fixup-mtrr.awk, NONE, 1.1 rdma.conf, NONE, 1.1 rdma.init, NONE, 1.1 rdma.spec, NONE, 1.1
Doug Ledford (dledford)
fedora-extras-commits at redhat.com
Fri Jun 27 17:55:11 UTC 2008
- Previous message (by thread): rpms/koffice/devel koffice.spec,1.73,1.74
- Next message (by thread): rpms/rdma/F-9 rdma-90-rdma.rules, NONE, 1.1 rdma-fixup-mtrr.awk, NONE, 1.1 rdma.conf, NONE, 1.1 rdma.init, NONE, 1.1 rdma.spec, NONE, 1.1
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Author: dledford
Update of /cvs/extras/rpms/rdma/F-8
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv21869
Added Files:
rdma-90-rdma.rules rdma-fixup-mtrr.awk rdma.conf rdma.init
rdma.spec
Log Message:
* Mon Jun 09 2008 Doug Ledford <dledford at redhat.com> - 1.0-2
- Attempt to use --subsystem-match=infiniband in the rdma init script use
of udevtrigger so we don't trigger the whole system
- Add a requirement to stop opensm to the init script
--- NEW FILE rdma-90-rdma.rules ---
KERNEL=="umad*", NAME="infiniband/%k"
KERNEL=="issm*", NAME="infiniband/%k"
KERNEL=="ucm*", NAME="infiniband/%k", MODE="0666"
KERNEL=="uverbs*", NAME="infiniband/%k", MODE="0666"
KERNEL=="uat", NAME="infiniband/%k", MODE="0666"
KERNEL=="ucma", NAME="infiniband/%k", MODE="0666"
KERNEL=="rdma_cm", NAME="infiniband/%k", MODE="0666"
--- NEW FILE rdma-fixup-mtrr.awk ---
# This is a simple script that checks the contents of /proc/mtrr to see if
# the BIOS maker for the computer took the easy way out in terms of
# specifying memory regions when there is a hole below 4GB for PCI access
# and the machine has 4GB or more of RAM. When the contents of /proc/mtrr
# show a 4GB mapping of write-back cached RAM, minus punch out hole(s) of
# uncacheable regions (the area reserved for PCI access), then it becomes
# impossible for the ib_ipath driver to set write_combining on its PIO
# buffers. To correct the problem, remap the lower memory region in various
# chunks up to the start of the punch out hole(s), then delete the punch out
# hole(s) entirely as they aren't needed any more. That way, ib_ipath will
# be able to set write_combining on its PIO memory access region.
BEGIN {
regs = 0
}
function check_base(mem)
{
printf "Base memory data: base=0x%08x, size=0x%x\n", base[mem], size[mem] > "/dev/stderr"
if (size[mem] < (512 * 1024 * 1024))
return 0
if (type[mem] != "write-back")
return 0
if (base[mem] >= (4 * 1024 * 1024 * 1024))
return 0
return 1
}
function check_hole(hole)
{
printf "Hole data: base=0x%08x, size=0x%x\n", base[hole], size[hole] > "/dev/stderr"
if (size[hole] > (1 * 1024 * 1024 * 1024))
return 0
if (type[hole] != "uncachable")
return 0
if ((base[hole] + size[hole]) > (4 * 1024 * 1024 * 1024))
return 0
return 1
}
function build_entries(start, end, new_base, new_size, tmp_base)
{
# mtrr registers require alignment of blocks, so a 256MB chunk must
# be 256MB aligned. Additionally, all blocks must be a power of 2
# in size. So, do the largest power of two size that we can and
# still have start + block <= end, rinse and repeat.
tmp_base = start
do {
new_base = tmp_base
new_size = 4096
while (((new_base + new_size) < end) &&
((new_base % new_size) == 0))
new_size = lshift(new_size, 1)
if (((new_base + new_size) > end) ||
((new_base % new_size) != 0))
new_size = rshift(new_size, 1)
printf "base=0x%x size=0x%x type=%s\n",
new_base, new_size, type[mem] > "/dev/stderr"
printf "base=0x%x size=0x%x type=%s\n",
new_base, new_size, type[mem] > "/proc/mtrr"
fflush("")
tmp_base = new_base + new_size
} while (tmp_base < end)
}
{
gsub("^reg", "")
gsub(": base=", " ")
gsub(" [(].*), size=", " ")
gsub(": ", " ")
gsub(", count=.*$", "")
register[regs] = strtonum($1)
base[regs] = strtonum($2)
size[regs] = strtonum($3)
human_size[regs] = size[regs]
if (match($3, "MB")) { size[regs] *= 1024*1024; mult[regs] = "MB" }
else { size[regs] *= 1024; mult[regs] = "KB" }
type[regs] = $4
enabled[regs] = 1
end[regs] = base[regs] + size[regs]
regs++
}
END {
# First we need to find our base memory region. We only care about
# the memory register that starts at base 0. This is the only one
# that we can reliably know is our global memory region, and the
# only one that we can reliably check against overlaps. It's entirely
# possible that any memory region not starting at 0 and having an
# overlap with another memory region is in fact intentional and we
# shouldn't touch it.
for(i=0; i<regs; i++)
if (base[i] == 0)
break
# Did we get a valid base register?
if (i == regs)
exit 1
mem = i
if (!check_base(mem))
exit 1
cur_hole = 0
for(i=0; i<regs; i++) {
if (i == mem)
continue
if (base[i] < end[mem] && check_hole(i))
holes[cur_hole++] = i
}
if (cur_hole == 0) {
print "Nothing to do" > "/dev/stderr"
exit 1
}
printf "Found %d punch-out holes\n", cur_hole > "/dev/stderr"
# We need to sort the holes according to base address
for(j = 0; j < cur_hole - 1; j++) {
for(i = cur_hole - 1; i > j; i--) {
if(base[holes[i]] < base[holes[i-1]]) {
tmp = holes[i]
holes[i] = holes[i-1]
holes[i-1] = tmp
}
}
}
# OK, the common case would be that the BIOS is mapping holes out
# of the 4GB memory range, and that our hole(s) are consecutive and
# that our holes and our memory region end at the same place. However,
# things like machines with 8GB of RAM or more can foul up these
# common traits.
#
# So, our modus operandi is to disable all of the memory/hole regions
# to start, then build new base memory zones that in the end add
# up to the same as our original zone minus the holes. We know that
# we will never have a hole listed here that belongs to a valid
# hole punched in a write-combining memory region because you can't
# overlay write-combining on top of write-back and we know our base
# memory region is write-back, so in order for this hole to overlap
# our base memory region it can't be also overlapping a write-combining
# region.
printf "disable=%d\n", register[mem] > "/dev/stderr"
printf "disable=%d\n", register[mem] > "/proc/mtrr"
fflush("")
enabled[mem] = 0
for(i=0; i < cur_hole; i++) {
printf "disable=%d\n", register[holes[i]] > "/dev/stderr"
printf "disable=%d\n", register[holes[i]] > "/proc/mtrr"
fflush("")
enabled[holes[i]] = 0
}
build_entries(base[mem], base[holes[0]])
for(i=0; i < cur_hole - 1; i++)
if (base[holes[i+1]] > end[holes[i]])
build_entries(end[holes[i]], base[holes[i+1]])
if (end[mem] > end[holes[i]])
build_entries(end[holes[i]], end[mem])
# We changed up the mtrr regs, so signal to the rdma script to
# reload modules that need the mtrr regs to be right.
exit 0
}
--- NEW FILE rdma.conf ---
# Load IPoIB
IPOIB_LOAD=yes
# Load SRP module
SRP_LOAD=no
# Load iSER module
ISER_LOAD=no
# Load QLogic VNIC module
QLGC_VNIC_LOAD=no
# Should we modify the system mtrr registers? We may need to do this if you
# get messages from the ib_ipath driver saying that it couldn't enable
# write combining for the PIO buffs on the card.
FIXUP_MTRR_REGS=no
--- NEW FILE rdma.init ---
#!/bin/bash
#
# Bring up/down the kernel RDMA stack
#
# chkconfig: - 05 95
# description: Loads/Unloads InfiniBand and iWARP kernel modules
# config: /etc/rdma/rdma.conf
#
### BEGIN INIT INFO
# Provides: rdma
# Default-Stop: 0 1 2 3 4 5 6
# Required-Stop: $network $srpd $opensm
# Short-Description: Loads and unloads the InfiniBand and iWARP kernel modules
# Description: Loads and unloads the InfiniBand and iWARP kernel modules
### END INIT INFO
CONFIG=/etc/rdma/rdma.conf
. /etc/rc.d/init.d/functions
LOAD_ULP_MODULES=""
LOAD_CORE_USER_MODULES="ib_umad ib_uverbs ib_ucm rdma_ucm"
LOAD_CORE_CM_MODULES="iw_cm ib_cm rdma_cm"
LOAD_CORE_MODULES="ib_core ib_mad ib_sa ib_addr"
if [ -f $CONFIG ]; then
. $CONFIG
if [ "${IPOIB_LOAD}" == "yes" ]; then
LOAD_ULP_MODULES="ib_ipoib"
fi
if [ "${SRP_LOAD}" == "yes" ]; then
LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_srp"
fi
if [ "${ISER_LOAD}" == "yes" ]; then
LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_iser"
fi
else
LOAD_ULP_MODULES="ib_ipoib"
fi
UNLOAD_ULP_MODULES="ib_iser ib_srp ib_ipoib"
UNLOAD_HW_MODULES="iw_c2 iw_cxgb3 iw_nes ib_ehca ib_ipath ib_mthca mlx4_ib"
UNLOAD_CORE_USER_MODULES="rdma_ucm ib_ucm ib_uverbs ib_umad"
UNLOAD_CORE_CM_MODULES="rdma_cm ib_cm iw_cm"
UNLOAD_CORE_MODULES="ib_addr ib_sa ib_mad ib_core"
interfaces=`/sbin/ifconfig | grep "^ib[0-9]*" | cut -f 1 -d ' ' | sed -e 'y/\r/ /'`
# If module $1 is loaded return - 0 else - 1
is_module()
{
/sbin/lsmod | grep -w "$1" > /dev/null 2>&1
return $?
}
load_modules()
{
local RC=0
for module in $*; do
if ! is_module $module; then
/sbin/modprobe $module
res=$?
RC=$[ $RC + $res ]
if [ $res -ne 0 ]; then
echo
echo -n "Failed to load module $mod"
fi
fi
done
return $RC
}
unload_module()
{
local mod=$1
# Unload module $1
if is_module $mod; then
/sbin/rmmod $mod > /dev/null 2>&1
if [ $? -ne 0 ]; then
echo
echo "Failed to unload $mod"
return 1
fi
fi
return 0
}
# This function is a horrible hack to work around BIOS authors that should
# be shot. Specifically, certain BIOSes will map the entire 4GB address
# space as write-back cacheable when the machine has 4GB or more of RAM, and
# then they will exclude the reserved PCI I/O addresses from that 4GB
# cacheable mapping by making on overlapping uncacheable mapping. However,
# once you do that, it is then impossible to set *any* of the PCI I/O
# address space as write-combining. This is an absolute death-knell to
# certain IB hardware. So, we unroll this mapping here. Instead of
# punching a hole in a single 4GB mapping, we redo the base 4GB mapping as
# a series of discreet mappings that effectively are the same as the 4GB
# mapping minus the hole, and then we delete the uncacheable mappings that
# are used to punch the hole. This then leaves the PCI I/O address space
# unregistered (which defaults it to uncacheable), but available for
# write-combining mappings where needed.
check_mtrr_registers()
{
# If we actually change the mtrr registers, then the awk script will
# return true, and we need to unload the ib_ipath module if it's already
# loaded. The udevtrigger in load_hardware_modules will immediately
# reload the ib_ipath module for us, so there shouldn't be a problem.
[ -f /proc/mtrr -a -f /etc/rdma/fixup-mtrr.awk ] &&
awk -f /etc/rdma/fixup-mtrr.awk /proc/mtrr 2>/dev/null &&
if is_module ib_ipath; then
/sbin/rmmod ib_ipath
fi
}
load_hardware_modules()
{
local -i RC=0
[ "$FIXUP_MTRR_REGS" = "yes" ] && check_mtrr_registers
# WARNING!! If you are using this script to take down and bring up
# your IB interfaces on a machine that uses more than one low level
# Infiniband hardware driver, then there is no guarantee that the
# ordering of rdma interfaces after you take down and bring up the
# stack will be the same as the ordering of the interfaces on a
# clean boot.
#
# We match both class NETWORK and class INFINIBAND devices since our
# iWARP hardware is listed under class NETWORK. The side effect of
# this is that we might cause a non-iWARP network driver to be loaded.
udevtrigger --subsystem-match=pci --attr-nomatch=driver --attr-match=class=0x020000 --attr-match=class=0x0c0600
udevsettle
if [ -r /proc/device-tree ]; then
if [ -n "`ls /proc/device-tree | grep lhca`" ]; then
if ! is_module ib_ehca; then
load_modules ib_ehca
RC+=$?
fi
fi
fi
if is_module cxgb3 -a ! is_module iw_cxgb3; then
load_modules iw_cxgb3
RC+=$?
fi
if is_module mlx4_core -a ! is_module mlx4_ib; then
load_modules mlx4_ib
RC+=$?
fi
return $RC
}
errata_58()
{
# Check AMD chipset issue Errata #58
if test -x /sbin/lspci && test -x /sbin/setpci; then
if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then
CURVAL=`/sbin/setpci -d 1022:1100 69`
for val in $CURVAL
do
if [ "${val}" != "c0" ]; then
/sbin/setpci -d 1022:1100 69=c0
if [ $? -eq 0 ]; then
break
else
echo "Failed to apply AMD-8131 Errata #58 workaround"
fi
fi
done
fi
fi
}
errata_56()
{
# Check AMD chipset issue Errata #56
if test -x /sbin/lspci && test -x /sbin/setpci; then
if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then
bus=""
# Look for devices AMD-8131
for dev in `/sbin/setpci -v -f -d 1022:7450 19 | cut -d':' -f1,2`
do
bus=`/sbin/setpci -s $dev 19`
rev=`/sbin/setpci -s $dev 8`
# Look for Tavor attach to secondary bus of this devices
for device in `/sbin/setpci -f -s $bus: -d 15b3:5a46 19`
do
if [ $rev -lt 13 ]; then
/sbin/setpci -d 15b3:5a44 72=14
if [ $? -eq 0 ]; then
break
else
echo
echo "Failed to apply AMD-8131 Errata #56 workaround"
fi
else
continue
fi
# If more than one device is on the bus the issue a
# warning
num=`/sbin/setpci -f -s $bus: 0 | wc -l | sed 's/\ *//g'`
if [ $num -gt 1 ]; then
echo "Warning: your current PCI-X configuration might be incorrect."
echo "see AMD-8131 Errata 56 for more details."
fi
done
done
fi
fi
}
start()
{
local RC=0
local loaded=0
echo -n "Loading OpenIB kernel modules:"
load_hardware_modules
RC=$[ $RC + $? ]
load_modules $LOAD_CORE_MODULES
RC=$[ $RC + $? ]
load_modules $LOAD_CORE_CM_MODULES
RC=$[ $RC + $? ]
load_modules $LOAD_CORE_USER_MODULES
RC=$[ $RC + $? ]
load_modules $LOAD_ULP_MODULES
RC=$[ $RC + $? ]
# Add node description to sysfs
IBSYSDIR="/sys/class/infiniband"
if [ -d ${IBSYSDIR} ]; then
declare -i hca_id=1
for hca in ${IBSYSDIR}/*
do
if [ -w ${hca}/node_desc ]; then
echo -n "$(hostname | cut -f 1 -d .) HCA-${hca_id}" >> ${hca}/node_desc 2> /dev/null
fi
let hca_id++
done
fi
errata_58
errata_56
touch /var/lock/subsys/rdma
[ $RC -eq 0 ] && echo_success || echo_failure
echo
return $RC
}
stop()
{
# Check if applications which use infiniband are running
local apps="opensm osmtest srp_daemon"
local pid
local RC=0
echo -n "Unloading OpenIB kernel modules:"
for app in $apps
do
if ( ps -ef | grep $app | grep -v grep > /dev/null 2>&1 ); then
echo
echo "Found $app running."
echo "Please stop all RDMA applications before downing the stack."
echo_failure
echo
return 1
fi
done
if is_module qlgc_vnic; then
echo
echo "The qlgc_vnic service is still running."
echo "Please stop all RDMA applications before downing the stack."
echo_failure
echo
return 1
fi
if ! is_module ib_core; then
# Nothing to do, make sure lock file is gone and return
rm -f /var/lock/subsys/rdma
echo_success
echo
return 0
fi
# Down all IPoIB interfaces
if is_module ib_ipoib; then
for i in $interfaces
do
ifdown $i > /dev/null 2>&1
done
fi
# Unload OpenIB modules
MODULES="$UNLOAD_ULP_MODULES $UNLOAD_CORE_USER_MODULES"
MODULES="$MODULES $UNLOAD_CORE_CM_MODULES"
for mod in $MODULES
do
unload_module $mod
RC=$[ $RC + $? ]
done
# Insert a sleep here for all the ULP modules to have been fully removed
# before proceeding to unload the driver modules
sleep 1
MODULES="$UNLOAD_HW_MODULES $UNLOAD_CORE_MODULES"
for mod in $MODULES
do
unload_module $mod
RC=$[ $RC + $? ]
done
rm -f /var/lock/subsys/rdma
[ $RC -eq 0 ] && echo_success || echo_failure
echo
return $RC
}
status()
{
local -i cnt=0
local -i modules=0
local module=""
echo -ne "Low level hardware support loaded:\n\t"
for module in $UNLOAD_HW_MODULES; do
if is_module $module; then
echo -n "$module "
let cnt++
fi
done
[ $cnt -eq 0 ] && echo -n "none found"
modules+=cnt
echo
echo
echo -ne "Upper layer protocol modules:\n\t"
cnt=0
for module in $UNLOAD_ULP_MODULES; do
if is_module $module; then
echo -n "$module "
let cnt++
fi
done
[ $cnt -eq 0 ] && echo -n "none found"
modules+=cnt
echo
echo
echo -ne "User space access modules:\n\t"
cnt=0
for module in $UNLOAD_CORE_USER_MODULES; do
if is_module $module; then
echo -n "$module "
let cnt++
fi
done
[ $cnt -eq 0 ] && echo -n "none found"
modules+=cnt
echo
echo
echo -ne "Connection management modules:\n\t"
cnt=0
for module in $UNLOAD_CORE_CM_MODULES; do
if is_module $module; then
echo -n "$module "
let cnt++
fi
done
[ $cnt -eq 0 ] && echo -n "none found"
modules+=cnt
echo
echo
for module in $UNLOAD_CORE_MODULES; do
if is_module $module; then
let modules++
fi
done
if is_module ib_ipoib; then
echo -n "Configured IPoIB interfaces: "
cnt=0
for i in /etc/sysconfig/network-scripts/ifcfg-ib*
do
if [ -f $i ]; then
. $i
echo -n "$DEVICE "
let cnt++
fi
done
[ $cnt -eq 0 ] && echo -n "none"
echo
echo -n "Currently active IPoIB interfaces: "
cnt=0
for i in $interfaces
do
ifconfig $i | grep UP > /dev/null 2>&1
[ $? -eq 0 ] && echo -n "$i " && let cnt++
done
[ $cnt -eq 0 ] && echo -n "none"
echo
fi
if [ $modules -eq 0 ]; then
if [ -f /var/lock/subsys/rdma ]; then
return 2
else
return 3
fi
else
return 0
fi
}
restart ()
{
stop
start
}
condrestart ()
{
[ -e /var/lock/subsys/rdma ] && restart || return 0
}
usage ()
{
echo
echo "Usage: `basename $0` {start|stop|restart|condrestart|try-restart|force-reload|status}"
echo
return 2
}
case $1 in
start) start; RC=$? ;;
stop) stop; RC=$? ;;
restart) restart; RC=$? ;;
reload) RC=3 ;;
condrestart) condrestart; RC=$? ;;
try-restart) condrestart; RC=$? ;;
force-reload) condrestart; RC=$? ;;
status) status; RC=$? ;;
*) usage; RC=$? ;;
esac
exit $RC
--- NEW FILE rdma.spec ---
# Copyright (c) 2008 Red Hat, Inc.
# There is no URL or upstream source entry as this package constitutes
# upstream for itself.
Summary: Infiniband/iWARP Kernel Module Initializer
Name: rdma
Version: 1.0
Release: 2%{?dist}
License: GPLv2+
Group: System Environment/Base
Source0: rdma.conf
Source1: rdma.init
Source2: rdma-fixup-mtrr.awk
Source3: rdma-90-rdma.rules
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
BuildArch: noarch
Requires(post): chkconfig
Requires(preun): chkconfig
Requires: udev >= 095
%description
User space initialization scripts for the kernel InfiniBand/iWARP drivers
%prep
%build
%install
rm -rf ${RPM_BUILD_ROOT}
install -d ${RPM_BUILD_ROOT}%{_initrddir}
install -d ${RPM_BUILD_ROOT}%{_sysconfdir}/%{name}
install -d ${RPM_BUILD_ROOT}%{_sysconfdir}/udev/rules.d
install -m 0644 %{SOURCE0} ${RPM_BUILD_ROOT}%{_sysconfdir}/%{name}/%{name}.conf
install -m 0755 %{SOURCE1} ${RPM_BUILD_ROOT}%{_initrddir}/%{name}
install -m 0644 %{SOURCE2} ${RPM_BUILD_ROOT}%{_sysconfdir}/%{name}/fixup-mtrr.awk
install -m 0644 %{SOURCE3} ${RPM_BUILD_ROOT}%{_sysconfdir}/udev/rules.d/90-%{name}.rules
%clean
rm -rf ${RPM_BUILD_ROOT}
%post
if [ $1 = 1 ]; then
/sbin/chkconfig --add %{name}
fi
%preun
if [ $1 = 0 ]; then
/sbin/chkconfig --del %{name}
fi
%files
%defattr(-,root,root,-)
%dir %{_sysconfdir}/%{name}
%config(noreplace) %{_sysconfdir}/%{name}/%{name}.conf
%{_sysconfdir}/%{name}/fixup-mtrr.awk
%{_initrddir}/%{name}
%{_sysconfdir}/udev/rules.d/90-%{name}.rules
%changelog
* Mon Jun 09 2008 Doug Ledford <dledford at redhat.com> - 1.0-2
- Attempt to use --subsystem-match=infiniband in the rdma init script use
of udevtrigger so we don't trigger the whole system
- Add a requirement to stop opensm to the init script
* Sun Jun 08 2008 Doug Ledford <dledford at redhat.com> - 1.0-1
- Create an initial package for Fedora review
- Previous message (by thread): rpms/koffice/devel koffice.spec,1.73,1.74
- Next message (by thread): rpms/rdma/F-9 rdma-90-rdma.rules, NONE, 1.1 rdma-fixup-mtrr.awk, NONE, 1.1 rdma.conf, NONE, 1.1 rdma.init, NONE, 1.1 rdma.spec, NONE, 1.1
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the fedora-extras-commits
mailing list