[Linux-cluster] GS2 try_rgrp_unlink consuming lots of CPU
Miller, Gordon K
gordon.k.miller at boeing.com
Mon Oct 26 14:34:06 UTC 2009
Occasionally, we encounter a condition where the CPU system time increases dramatically (30% to 100% of total CPU time) for a period of several seconds to 10's of minutes. Using oprofile we observed that the majority of CPU time was being spent in gfs2_bitfit with rgblk_search and try_rgrp_unlink in the backtrace. Further instrumentation using SystemTap has shown try_rgrp_unlink being called repeatedly during the period of high system usage with durations averaging 400 milliseconds on each call. Often , try_rgrp_unlink will return the same inode as in previous calls. Attached is output from oprofile and a SystemTap probe on the return from try_rgrp_unlink with the number of times rgblk_search (rgblk_search_count) and gfs2_bitfit (bitfit_count) were called during this invocation of try_rgrp_unlink, the duration in seconds of the try_rgrp_unlink function, selected elements of the rgd structure and the returned inode (return->i_ino). In this case, the behavior persisted for 15 minutes beyond the capture listed here. The SystemTap scripts used in this capture follow the output. Our kernel version is 2.6.18-128.7.1 plus the patch to gfs2_bitfit contained in linux-2.6-gfs2-unaligned-access-in-gfs2_bitfit.patch.
Has anyone experienced this behavior?
Oprofile output:
CPU: Core Solo / Duo, speed 2000.21 MHz (estimated) Counted CPU_CLK_UNHALTED events (Unhalted clock cycles) with a unit mask of 0x00 (Unhalted core cycles) count 100000
samples cum. samples % cum. % linenr info image name app name symbol name
742159 742159 47.2355 47.2355 rgrp.c:181 gfs2.ko gfs2 gfs2_bitfit
208285 950444 13.2565 60.4920 (no location information) stap_ff8ca210ca2a6219e30b9c0725d9186c_27191 stap_ff8ca210ca2a6219e30b9c0725d9186c_27191 (no symbols)
202589 1153033 12.8940 73.3860 process.c:0 vmlinux vmlinux mwait_idle
...
11610 1304239 0.7389 83.0096 rgrp.c:1336 gfs2.ko gfs2 rgblk_search
...
2430 1394001 0.1547 88.7226 rgrp.c:912 gfs2.ko gfs2 try_rgrp_unlink
SystemTap output:
Time Host
19:48:23.224 l-ce2 try_rgrp_unlink return=4141184592 rgblk_search_count=56870 bitfit_count=153350 duration=0.406866 cpu=2 ppid=8791 pid=9195 tid=10562 java
19:48:23.224 l-ce2 last_unlinked=56897 rgd->rd_addr=17, rgd->rd_data0=22, rgd->rd_data=65548, rgd->rd_length=5 rgd->rd_last_alloc=58763 rgd->rd_flags=0x5
19:48:23.224 l-ce2 return->i_ino=56897
--
19:48:23.644 l-ce2 try_rgrp_unlink return=4141184592 rgblk_search_count=56870 bitfit_count=153350 duration=0.419159 cpu=1 ppid=8791 pid=10173 tid=10423 java
19:48:23.644 l-ce2 last_unlinked=56897 rgd->rd_addr=17, rgd->rd_data0=22, rgd->rd_data=65548, rgd->rd_length=5 rgd->rd_last_alloc=58763 rgd->rd_flags=0x5
19:48:23.644 l-ce2 return->i_ino=56897
--
19:48:24.056 l-ce2 try_rgrp_unlink return=4141184592 rgblk_search_count=56870 bitfit_count=153350 duration=0.412728 cpu=0 ppid=8791 pid=9195 tid=10536 java
19:48:24.056 l-ce2 last_unlinked=56897 rgd->rd_addr=17, rgd->rd_data0=22, rgd->rd_data=65548, rgd->rd_length=5 rgd->rd_last_alloc=58763 rgd->rd_flags=0x5
19:48:24.056 l-ce2 return->i_ino=56897
--
19:48:24.467 l-ce2 try_rgrp_unlink return=4141184592 rgblk_search_count=56870 bitfit_count=153350 duration=0.409639 cpu=3 ppid=8791 pid=9195 tid=9282 java
19:48:24.467 l-ce2 last_unlinked=56897 rgd->rd_addr=17, rgd->rd_data0=22, rgd->rd_data=65548, rgd->rd_length=5 rgd->rd_last_alloc=58763 rgd->rd_flags=0x5
19:48:24.467 l-ce2 return->i_ino=56897
--
19:48:24.876 l-ce2 try_rgrp_unlink return=4141184592 rgblk_search_count=56870 bitfit_count=153350 duration=0.409134 cpu=3 ppid=8791 pid=9195 tid=10540 java
19:48:24.876 l-ce2 last_unlinked=56897 rgd->rd_addr=17, rgd->rd_data0=22, rgd->rd_data=65548, rgd->rd_length=5 rgd->rd_last_alloc=58763 rgd->rd_flags=0x5
19:48:24.876 l-ce2 return->i_ino=56897
--
19:48:25.276 l-ce2 try_rgrp_unlink return=4141184592 rgblk_search_count=56870 bitfit_count=153350 duration=0.398694 cpu=2 ppid=8791 pid=9195 tid=10535 java
19:48:25.276 l-ce2 last_unlinked=56897 rgd->rd_addr=17, rgd->rd_data0=22, rgd->rd_data=65548, rgd->rd_length=5 rgd->rd_last_alloc=58763 rgd->rd_flags=0x5
19:48:25.276 l-ce2 return->i_ino=56897
--
19:48:25.693 l-ce2 try_rgrp_unlink return=4141184592 rgblk_search_count=56870 bitfit_count=153350 duration=0.417088 cpu=0 ppid=8769 pid=8791 tid=8892 java
19:48:25.693 l-ce2 last_unlinked=56897 rgd->rd_addr=17, rgd->rd_data0=22, rgd->rd_data=65548, rgd->rd_length=5 rgd->rd_last_alloc=58763 rgd->rd_flags=0x5
19:48:25.693 l-ce2 return->i_ino=56897
--
19:48:26.107 l-ce2 try_rgrp_unlink return=4147791632 rgblk_search_count=56844 bitfit_count=153324 duration=0.414038 cpu=0 ppid=8790 pid=20555 tid=20555 df
19:48:26.107 l-ce2 last_unlinked=56869 rgd->rd_addr=17, rgd->rd_data0=22, rgd->rd_data=65548, rgd->rd_length=5 rgd->rd_last_alloc=58763 rgd->rd_flags=0x5
19:48:26.107 l-ce2 return->i_ino=56869
--
19:48:26.509 l-ce2 try_rgrp_unlink return=4141184592 rgblk_search_count=56870 bitfit_count=153350 duration=0.402198 cpu=3 ppid=8791 pid=9068 tid=9418 java
19:48:26.509 l-ce2 last_unlinked=56897 rgd->rd_addr=17, rgd->rd_data0=22, rgd->rd_data=65548, rgd->rd_length=5 rgd->rd_last_alloc=58763 rgd->rd_flags=0x5
19:48:26.509 l-ce2 return->i_ino=56897
--
19:48:26.919 l-ce2 try_rgrp_unlink return=4147791632 rgblk_search_count=56844 bitfit_count=153324 duration=0.409572 cpu=3 ppid=8791 pid=9195 tid=9266 java
19:48:26.919 l-ce2 last_unlinked=56869 rgd->rd_addr=17, rgd->rd_data0=22, rgd->rd_data=65548, rgd->rd_length=5 rgd->rd_last_alloc=58763 rgd->rd_flags=0x5
19:48:26.919 l-ce2 return->i_ino=56869
SystemTap probe:
global rgblk_search_count
global bitfit_count
function tod:string() {
msec = gettimeofday_ns() /1000000
sec = msec / 1000
msec = msec - (sec * 1000)
return sprintf("%s.%03d", substr(ctime(sec), 11, 8), msec) }
probe module("gfs2").function("try_rgrp_unlink") {
tid=tid()
tids[tid]=tid
rgblk_search_count[tid]=0
bitfit_count[tid]=0
try_rgrp_unlink_start[tid]=gettimeofday_us()
}
probe module("gfs2").function("try_rgrp_unlink").return {
tid=tid()
duration=gettimeofday_us() - try_rgrp_unlink_start[tid]
dur_sec=duration/1000000
dur_usec=duration-(dur_sec * 1000000)
time_host=sprintf("%s %s", tod(), hostname)
printf("%s try_rgrp_unlink return=%d rgblk_search_count=%d bitfit_count=%d duration=%d.%06d cpu=%d ppid=%d pid=%d tid=%d %s\n",
time_host, $return, rgblk_search_count[tid], bitfit_count[tid], dur_sec, dur_usec, cpu(), ppid(), pid(), tid, execname())
printf("%s last_unlinked=%d rgd->rd_addr=%d, rgd->rd_data0=%d, rgd->rd_data=%d, rgd->rd_length=%d rgd->rd_last_alloc=%d rgd->rd_flags=0x%x \n",
time_host, kernel_long($last_unlinked), $rgd->rd_addr, $rgd->rd_data0, $rgd->rd_data, $rgd->rd_length, $rgd->rd_last_alloc, $rgd->rd_flags )
if ($return != 0)
printf("%s return->i_ino=%d\n", time_host, $return->i_ino)
print("\n")
delete try_rgrp_unlink_start[tid]
delete bitfit_count[tid]
delete rgblk_search_count[tid]
tids[tid]=0
}
probe module("gfs2").function("rgblk_search") {
tid=tid()
if (tid == tids[tid]) {
rgblk_search_count[tid]++
}
}
probe module("gfs2").function("gfs2_bitfit") {
tid=tid()
if (tid == tids[tid]) {
bitfit_count[tid]+=1
}
}
More information about the Linux-cluster
mailing list