[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Cluster-devel] [PATCH] Adding gfs2_lockcapture



---
 gfs2/lockgather/gfs2_lockcapture |  723 ++++++++++++++++++++++++++++++++++++++
 1 files changed, 723 insertions(+), 0 deletions(-)
 create mode 100644 gfs2/lockgather/gfs2_lockcapture

diff --git a/gfs2/lockgather/gfs2_lockcapture b/gfs2/lockgather/gfs2_lockcapture
new file mode 100644
index 0000000..d040738
--- /dev/null
+++ b/gfs2/lockgather/gfs2_lockcapture
@@ -0,0 +1,723 @@
+#!/usr/bin/env python
+"""
+This script will gather gfs2 and dlm lock information for a single cluster node
+for all the mounted GFS2 filesystems.
+
+TODO:
+* Should there be option to disable sysrq events in case it could trigger panic.
+* Add option to write log to file
+
+* Add a better description.
+* Add examples for all options.
+* Add better description of options and has steve to review those and tweak my
+  option descriptions.
+
+ author    : Shane Bradley
+ contact   : sbradley redhat com
+ version   : 0.9
+ copyright : GPLv2
+"""
+import sys
+import os
+import os.path
+import logging
+from optparse import OptionParser, Option
+import time
+import platform
+import shutil
+import subprocess
+import tarfile
+
+VERSION_NUMBER = "0.9-1"
+# #####################################################################
+# Global vars:
+# #####################################################################
+# Name of the logger
+MAIN_LOGGER_NAME = "%s" %(os.path.basename(sys.argv[0]))
+# Format of the logger
+MAIN_LOGGER_FORMAT = "%(levelname)s %(message)s"
+# Path to debug root
+PATH_TO_DEBUG_DIR="/sys/kernel/debug"
+# Path to the pid file that will be used for locking.
+PATH_TO_PID_FILENAME = "/var/run/%s.pid" %(os.path.basename(sys.argv[0]))
+
+
+# #####################################################################
+# Class to define what a clusternode is.
+# #####################################################################
+class ClusterNode:
+    def __init__(self, clusternodeName, clusterName, listOfGFS2Names):
+        self.__clusternodeName = clusternodeName
+        self.__clusterName = clusterName
+
+        # List of the mounted filesystem from the mount -l command.
+        self.__listOfMountedGFS2Filesystems = self.__getMountedGFS2Filesystems()
+
+        # List of mounted GFS2 labels for this cluster from mount -l command.
+        listOfGFS2MountedFilesystemLabels = self.__getMountedFilesystemLabel(self.__listOfMountedGFS2Filesystems)
+        self.__listOfGFS2MountedFilesystemLabels = []
+        if (not len(listOfGFS2Names) > 0):
+            # If no items in listOfGFS2Names then add them all.
+            self.__listOfGFS2MountedFilesystemLabels = listOfGFS2MountedFilesystemLabels
+        else:
+            for label in listOfGFS2MountedFilesystemLabels:
+                for name in listOfGFS2Names:
+                    if ((name == label) or ("%s:%s"%(self.__clusterName, name) == label)):
+                        self.__listOfGFS2MountedFilesystemLabels.append(label)
+
+    def __str__(self):
+        rString = ""
+        rString += "%s:%s" %(self.getClusterName(), self.getClusterNodeName())
+        for fsName in self.getMountedGFS2FilesystemNames():
+            rString += "\n\t%s" %(fsName)
+            for mountedFS in self.__listOfMountedGFS2Filesystems:
+                if (mountedFS.find(fsName) >= 0):
+                    rString += " --> %s" %(mountedFS)
+                    break
+        return rString.rstrip()
+
+    def __getMountedFilesystemLabel(self, listOfMountedFilesystems):
+        listOfMountedFilesystemsLabels = []
+        for mountedFilesystem in listOfMountedFilesystems:
+            splitMountedFilesystem = mountedFilesystem.split()
+            fsLabel = splitMountedFilesystem[-1].strip().strip("[").rstrip("]")
+            if (len(fsLabel) > 0):
+                # Verify it starts with name of the cluster.
+                if (fsLabel.startswith("%s:" %(self.getClusterName()))):
+                    listOfMountedFilesystemsLabels.append(fsLabel)
+        return listOfMountedFilesystemsLabels
+
+    def __getMountedGFS2Filesystems(self):
+        listOfMountedFilesystems = []
+        commandList= ["mount", "-l"]
+        stdout = ""
+        try:
+            task = subprocess.Popen(commandList, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            task.wait()
+            (stdout, stderr) = task.communicate()
+        except OSError:
+            commandOptionString = ""
+            for option in commandList:
+                commandOptionString += "%s " %(option)
+                message = "An error occurred running the command: $ %s" %(commandOptionString)
+                logging.getLogger(MAIN_LOGGER_NAME).error(message)
+            return listOfMountedFilesystems
+        stdoutSplit = stdout.split("\n")
+        for line in stdoutSplit:
+            splitLine = line.split()
+            if (len(splitLine) >= 5):
+                if (splitLine[4] == "gfs2"):
+                    listOfMountedFilesystems.append(line)
+        return listOfMountedFilesystems
+
+    def getClusterNodeName(self):
+        return self.__clusternodeName
+
+    def getClusterName(self):
+        return self.__clusterName
+
+    def getMountedGFS2FilesystemNames(self, includeClusterName=True):
+        # If true will prepend the cluster name to gfs2 fs name
+        if (includeClusterName):
+            return self.__listOfGFS2MountedFilesystemLabels
+        else:
+            listOfGFS2MountedFilesystemLabels = []
+            for fsLabel in self.__listOfGFS2MountedFilesystemLabels:
+                fsLabelSplit = fsLabel.split(":", 1)
+                if (len(fsLabelSplit) == 2):
+                    listOfGFS2MountedFilesystemLabels.append(fsLabelSplit[1])
+            return listOfGFS2MountedFilesystemLabels
+
+# #####################################################################
+# Helper functions.
+# #####################################################################
+def runCommand(command, listOfCommandOptions, standardOut=subprocess.PIPE, standardError=subprocess.PIPE, debug=False):
+    stdout = ""
+    stderr = ""
+    try:
+        commandList = [command]
+        commandList += listOfCommandOptions
+        task = subprocess.Popen(commandList, stdout=standardOut, stderr=standardError)
+        task.wait()
+        (stdout, stderr) = task.communicate()
+        return (task.returncode == 0)
+    except OSError:
+        commandOptionString = ""
+        for option in listOfCommandOptions:
+            commandOptionString += "%s " %(option)
+        message = "An error occurred running the command: $ %s %s" %(command, commandOptionString)
+        logging.getLogger(MAIN_LOGGER_NAME).error(message)
+        if (debug):
+            if (len(stdout) > 0):
+                print stdout
+            if (len(stderr) > 0):
+                print stderr
+    return False
+
+def writeToFile(pathToFilename, data, appendToFile=True, createFile=False):
+    [parentDir, filename] = os.path.split(pathToFilename)
+    if (os.path.isfile(pathToFilename) or (os.path.isdir(parentDir) and createFile)):
+        try:
+            filemode = "w"
+            if (appendToFile):
+                filemode = "a"
+            fout = open(pathToFilename, filemode)
+            fout.write(data + "\n")
+            fout.close()
+            return True
+        except UnicodeEncodeError, e:
+            message = "There was a unicode encode error writing to the file: %s." %(pathToFilename)
+            logging.getLogger(MAIN_LOGGER_NAME).error(message)
+            return False
+        except IOError:
+            message = "There was an error writing to the file: %s." %(pathToFilename)
+            logging.getLogger(MAIN_LOGGER_NAME).error(message)
+            return False
+    return False
+
+def mkdirs(pathToDSTDir):
+    if (os.path.isdir(pathToDSTDir)):
+        return True
+    elif ((not os.access(pathToDSTDir, os.F_OK)) and (len(pathToDSTDir) > 0)):
+        try:
+            os.makedirs(pathToDSTDir)
+        except (OSError, os.error):
+            message = "Could not create the directory: %s." %(pathToDSTDir)
+            logging.getLogger(MAIN_LOGGER_NAME).error(message)
+            return False
+        except (IOError, os.error):
+            message = "Could not create the directory with the path: %s." %(pathToDSTDir)
+            logging.getLogger(MAIN_LOGGER_NAME).error(message)
+            return False
+    return os.path.isdir(pathToDSTDir)
+
+def removePIDFile():
+    message = "Removing the pid file: %s" %(PATH_TO_PID_FILENAME)
+    logging.getLogger(MAIN_LOGGER_NAME).debug(message)
+    if (os.path.exists(PATH_TO_PID_FILENAME)):
+        try:
+            os.remove(PATH_TO_PID_FILENAME)
+        except IOError:
+            message = "There was an error removing the file: %s." %(PATH_TO_PID_FILENAME)
+            logging.getLogger(MAIN_LOGGER_NAME).error(message)
+
+def exitScript(removePidFile=True, errorCode=0):
+    if (removePidFile):
+        removePIDFile()
+    message = "The script will exit."
+    logging.getLogger(MAIN_LOGGER_NAME).info(message)
+    sys.exit(errorCode)
+
+# #####################################################################
+# Helper functions for gathering the lockdumps.
+# #####################################################################
+def getClusterNode(listOfGFS2Names):
+    # Return a ClusterNode object if the clusternode and cluster name are found
+    # in the output, else return None.
+    commandList= ["cman_tool", "status"]
+    stdout = ""
+    try:
+        task = subprocess.Popen(commandList, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        task.wait()
+        (stdout, stderr) = task.communicate()
+    except OSError:
+        commandOptionString = ""
+        for option in commandList:
+            commandOptionString += "%s " %(option)
+        message = "An error occurred running the command: $ %s" %(commandOptionString)
+        logging.getLogger(MAIN_LOGGER_NAME).error(message)
+        return None
+    stdoutSplit = stdout.split("\n")
+    clusterName = ""
+    clusternodeName = ""
+    for line in stdoutSplit:
+        if (line.startswith("Cluster Name:")):
+            clusterName = line.split("Cluster Name:")[1].strip().rstrip()
+        if (line.startswith("Node name: ")):
+            clusternodeName = line.split("Node name:")[1].strip().rstrip()
+    if ((len(clusterName) > 0) and (len(clusternodeName) > 0)):
+        return ClusterNode(clusternodeName, clusterName, listOfGFS2Names)
+    return None
+
+def mountFilesystemDebug(enableMounting=True):
+    if (os.path.ismount(PATH_TO_DEBUG_DIR)):
+        message = "The debug filesystem %s is mounted." %(PATH_TO_DEBUG_DIR)
+        logging.getLogger(MAIN_LOGGER_NAME).info(message)
+        return True
+    else:
+        message = "The debug filesystem %s is not mounted." %(PATH_TO_DEBUG_DIR)
+        logging.getLogger(MAIN_LOGGER_NAME).warning(message)
+        if (cmdLineOpts.enableMountDebugFS):
+            if(mountFilesystem("/bin/mount", "none", PATH_TO_DEBUG_DIR, "debugfs")):
+                message = "The debug filesystem was mounted: %s." %(PATH_TO_DEBUG_DIR)
+                logging.getLogger(MAIN_LOGGER_NAME).info(message)
+                return True
+    return False
+
+def mountFilesystem(pathToMountCommand, pathToDevice, pathToMountPoint, filesystemType):
+    if (os.path.ismount(PATH_TO_DEBUG_DIR)):
+        return True
+    listOfCommandOptions = ["-t", filesystemType, pathToDevice, pathToMountPoint]
+    if (not runCommand(pathToMountCommand, listOfCommandOptions)):
+        message = "There was an error mounting the filesystem type %s for the device %s to the mount point %s." %(filesystemType, pathToDevice, pathToMountPoint)
+        logging.getLogger(MAIN_LOGGER_NAME).error(message)
+        message = "The standard error is below: \n\t  %s" %(stderr)
+        logging.getLogger(MAIN_LOGGER_NAME).debug(message)
+    return  os.path.ismount(PATH_TO_DEBUG_DIR)
+
+def gatherGeneralInformation(pathToDSTDir):
+    # Maybe add cluster node name, uname -a, etc
+    systemString = "HOSTNAME: %s\nDATE: %s" %(platform.node(), time.strftime("%Y-%m-%d_%H:%M:%S"))
+    writeToFile(os.path.join(pathToDSTDir, "system.txt"), systemString, createFile=True)
+    # Get "cman_tool node -F id,type,name" data.
+    command = "cman_tool"
+    pathToCommandOutput = os.path.join(pathToDSTDir, "cman_tool-nodes.txt")
+    try:
+        fout = open(pathToCommandOutput, "w")
+        runCommand(command, ["nodes", "-F", "id,type,name"], standardOut=fout)
+        fout.close()
+    except IOError:
+        message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
+        logging.getLogger(MAIN_LOGGER_NAME).error(message)
+    # Get "cman_tool services" data.
+    command = "cman_tool"
+    pathToCommandOutput = os.path.join(pathToDSTDir, "cman_tool-services.txt")
+    try:
+        fout = open(pathToCommandOutput, "w")
+        runCommand(command, ["services"], standardOut=fout)
+        fout.close()
+    except IOError:
+        message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
+        logging.getLogger(MAIN_LOGGER_NAME).error(message)
+    # Get "clustat" data.
+    command = "clustat"
+    pathToCommandOutput = os.path.join(pathToDSTDir, "clustat.txt")
+    try:
+        fout = open(pathToCommandOutput, "w")
+        runCommand(command, [], standardOut=fout)
+        fout.close()
+    except IOError:
+        message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
+        logging.getLogger(MAIN_LOGGER_NAME).error(message)
+    # Get "mount -l" filesystem data.
+    command = "cat"
+    pathToCommandOutput = os.path.join(pathToDSTDir, "cat-proc_mounts.txt")
+    try:
+        fout = open(pathToCommandOutput, "w")
+        runCommand(command, ["/proc/mounts"], standardOut=fout)
+        fout.close()
+    except IOError:
+        message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
+        logging.getLogger(MAIN_LOGGER_NAME).error(message)
+    # Get "ps -eo user,pid,%cpu,%mem,vsz,rss,tty,stat,start,time,comm,wchan" data.
+    command = "ps"
+    pathToCommandOutput = os.path.join(pathToDSTDir, "ps.txt")
+    try:
+        fout = open(pathToCommandOutput, "w")
+        #runCommand(command, ["-eo", "user,pid,%cpu,%mem,vsz,rss,tty,stat,start,time,comm,wchan"], standardOut=fout)
+        runCommand(command, ["h", "-AL", "-o", "tid,s,cmd"], standardOut=fout)
+        fout.close()
+    except IOError:
+        message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
+        logging.getLogger(MAIN_LOGGER_NAME).error(message)
+    # Get "group_tool ls" data.
+    command = "group_tool"
+    pathToCommandOutput = os.path.join(pathToDSTDir, "group_tool-ls.txt")
+    try:
+        fout = open(pathToCommandOutput, "w")
+        runCommand(command, ["ls"], standardOut=fout)
+        fout.close()
+    except IOError:
+        message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
+        logging.getLogger(MAIN_LOGGER_NAME).error(message)
+    # Get "group_tool dump fence" data.
+    command = "group_tool"
+    pathToCommandOutput = os.path.join(pathToDSTDir, "group_tool-dump_fence.txt")
+    try:
+        fout = open(pathToCommandOutput, "w")
+        runCommand(command, ["dump", "fence"], standardOut=fout)
+        fout.close()
+    except IOError:
+        message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
+        logging.getLogger(MAIN_LOGGER_NAME).error(message)
+    # Get "group_tool dump gfs2" data.
+    command = "group_tool"
+    pathToCommandOutput = os.path.join(pathToDSTDir, "group_tool-dump_gfs2.txt")
+    try:
+        fout = open(pathToCommandOutput, "w")
+        runCommand(command, ["dump", "gfs2"], standardOut=fout)
+        fout.close()
+    except IOError:
+        message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
+        logging.getLogger(MAIN_LOGGER_NAME).error(message)
+
+def triggerSysRQEvents():
+    command = "echo"
+    pathToSysrqTriggerFile = "/proc/sysrq-trigger"
+    # m - dump information about memory allocation
+    # t - dump thread state information
+    triggers = ["m", "t"]
+    for trigger in triggers:
+        try:
+            fout = open(pathToSysrqTriggerFile, "w")
+            runCommand(command, [trigger], standardOut=fout)
+            fout.close()
+        except IOError:
+            message = "There was an error the command output for %s to the file %s." %(command, pathToSysrqTriggerFile)
+            logging.getLogger(MAIN_LOGGER_NAME).error(message)
+
+def gatherLogs(pathToDSTDir):
+    if (mkdirs(pathToDSTDir)):
+        # Copy messages logs that contain the sysrq data.
+        pathToLogFile = "/var/log/messages"
+        pathToDSTLogFile = os.path.join(pathToDSTDir, os.path.basename(pathToLogFile))
+        try:
+            shutil.copyfile(pathToLogFile, pathToDSTLogFile)
+        except shutil.Error:
+            message = "There was an error copying the file: %s to %s." %(pathToLogFile, pathToDSTLogFile)
+            logging.getLogger(MAIN_LOGGER_NAME).error(message)
+
+        pathToLogDir = "/var/log/cluster"
+        pathToDSTLogDir = os.path.join(pathToDSTDir, os.path.basename(pathToLogDir))
+        if (os.path.isdir(pathToLogDir)):
+            try:
+                shutil.copytree(pathToLogDir, pathToDSTLogDir)
+            except shutil.Error:
+                message = "There was an error copying the directory: %s to %s." %(pathToLogDir, pathToDSTLogDir)
+                logging.getLogger(MAIN_LOGGER_NAME).error(message)
+
+def gatherDLMLockDumps(pathToDSTDir, listOfGFS2Filesystems):
+    lockDumpType = "dlm"
+    pathToSrcDir = os.path.join(PATH_TO_DEBUG_DIR, lockDumpType)
+    pathToOutputDir = os.path.join(pathToDSTDir, lockDumpType)
+    message = "Copying the %s lockdump data from the directory for the %s." %(lockDumpType, pathToSrcDir)
+    logging.getLogger(MAIN_LOGGER_NAME).status(message)
+    for filename in os.listdir(pathToSrcDir):
+        for name in listOfGFS2Filesystems:
+            if (filename.startswith(name)):
+                pathToCurrentFilename = os.path.join(pathToSrcDir, filename)
+                pathToDSTDir = os.path.join(pathToOutputDir, name)
+                mkdirs(pathToDSTDir)
+                pathToDSTFilename = os.path.join(pathToDSTDir, filename)
+                try:
+                    shutil.copy(pathToCurrentFilename, pathToDSTFilename)
+                except shutil.Error:
+                    message = "There was an error copying the file: %s to %s." %(pathToCurrentFilename, pathToDSTFilename)
+                    logging.getLogger(MAIN_LOGGER_NAME).error(message)
+                except OSError:
+                    message = "There was an error copying the file: %s to %s." %(pathToCurrentFilename, pathToDSTFilename)
+                    logging.getLogger(MAIN_LOGGER_NAME).error(message)
+
+def gatherGFS2LockDumps(pathToDSTDir, listOfGFS2Filesystems):
+    lockDumpType = "gfs2"
+    pathToSrcDir = os.path.join(PATH_TO_DEBUG_DIR, lockDumpType)
+    pathToOutputDir = os.path.join(pathToDSTDir, lockDumpType)
+    for dirName in os.listdir(pathToSrcDir):
+        pathToCurrentDir = os.path.join(pathToSrcDir, dirName)
+        if ((os.path.isdir(pathToCurrentDir)) and (dirName in listOfGFS2Filesystems)):
+            mkdirs(pathToOutputDir)
+            pathToDSTDir = os.path.join(pathToOutputDir, dirName)
+            try:
+                message = "Copying the lockdump data for the %s filesystem: %s" %(lockDumpType, dirName)
+                logging.getLogger(MAIN_LOGGER_NAME).status(message)
+                shutil.copytree(pathToCurrentDir, pathToDSTDir)
+            except shutil.Error:
+                message = "There was an error copying the directory: %s to %s." %(pathToCurrentDir, pathToDSTDir)
+                logging.getLogger(MAIN_LOGGER_NAME).error(message)
+            except OSError:
+                message = "There was an error copying the directory: %s to %s." %(pathToCurrentDir, pathToDSTDir)
+                logging.getLogger(MAIN_LOGGER_NAME).error(message)
+
+def archiveData(pathToSrcDir):
+    # Compress the file so that it will have a smaller file name.
+    if (os.path.exists(pathToSrcDir)):
+        pathToTarFilename = "%s.tar.bz2" %(pathToSrcDir)
+        message = "Creating a compressed archvied file: %s" %(pathToTarFilename)
+        logging.getLogger(MAIN_LOGGER_NAME).info(message)
+        try:
+            tar = tarfile.open(pathToTarFilename, "w:bz2")
+            tar.add(pathToSrcDir, arcname=os.path.basename(pathToSrcDir))
+            tar.close()
+        except tarfile.TarError:
+            message = "There was an error creating the tarfile: %s." %(pathToTarFilename)
+            logging.getLogger(MAIN_LOGGER_NAME).error(message)
+            return ""
+        if (os.path.exists(pathToTarFilename)):
+            return pathToTarFilename
+    return ""
+
+# ##############################################################################
+# Get user selected options
+# ##############################################################################
+def __getOptions(version) :
+    cmdParser = OptionParserExtended(version)
+    cmdParser.add_option("-d", "--debug",
+                         action="store_true",
+                         dest="enableDebugLogging",
+                         help="Enables debug logging.",
+                         default=False)
+    cmdParser.add_option("-q", "--quiet",
+                         action="store_true",
+                         dest="disableLoggingToConsole",
+                         help="Disables logging to console.",
+                         default=False)
+    cmdParser.add_option("-i", "--info",
+                         action="store_true",
+                         dest="enablePrintInfo",
+                         help="Prints to console some basic information about the GFS2 filesystems mounted on the cluster node.",
+                         default=False)
+    cmdParser.add_option("-M", "--mount_debug_fs",
+                         action="store_true",
+                         dest="enableMountDebugFS",
+                         help="Enables the mounting of the debug filesystem if it is not mounted. Default is disabled.",
+                         default=False)
+    cmdParser.add_option("-o", "--path_to_output_dir",
+                         action="store",
+                         dest="pathToOutputDir",
+                         help="The path to the output directory where all the collect data will be stored. Default is /tmp/<date>-<hostname>-%s" %(os.path.basename(sys.argv[0])),
+                         type="string",
+                         default="")
+    cmdParser.add_option("-r", "--num_of_runs",
+                         action="store",
+                         dest="numberOfRuns",
+                         help="The number of lockdumps runs to do. Default is 2.",
+                         type="int",
+                         default=2)
+    cmdParser.add_option("-s", "--seconds_sleep",
+                         action="store",
+                         dest="secondsToSleep",
+                         help="The number of seconds sleep between runs. Default is 120 seconds.",
+                         type="int",
+                         default=120)
+    cmdParser.add_option("-t", "--archive",
+                         action="store_true",
+                         dest="enableArchiveOutputDir",
+                         help="Enables archiving and compressing of the output directory with tar and bzip2. Default is disabled.",
+                         default=False)
+    cmdParser.add_option("-n", "--fs_name",
+                         action="extend",
+                         dest="listOfGFS2Names",
+                         help="List of GFS2 filesystems that will have their lockdump data gathered.",
+                         type="string",
+                         default=[])    # Get the options and return the result.
+    (cmdLineOpts, cmdLineArgs) = cmdParser.parse_args()
+    return (cmdLineOpts, cmdLineArgs)
+
+# ##############################################################################
+# OptParse classes for commandline options
+# ##############################################################################
+class OptionParserExtended(OptionParser):
+    """
+    This is the class that gets the command line options the end user
+    selects.
+    """
+    def __init__(self, version) :
+        self.__commandName = os.path.basename(sys.argv[0])
+        versionMessage = "%s %s\n" %(self.__commandName, version)
+
+        commandDescription  ="%s will capture information about lockdata data for GFS2 and DLM required to analyze a GFS2 filesystem.\n"%(self.__commandName)
+
+        OptionParser.__init__(self, option_class=ExtendOption,
+                              version=versionMessage,
+                              description=commandDescription)
+
+    def print_help(self):
+        self.print_version()
+        examplesMessage = "\n"
+        examplesMessage = "\nPrints information about the available GFS2 filesystems that can have lockdump data captured."
+        examplesMessage += "\n$ %s -i\n" %(self.__commandName)
+        examplesMessage += "\nThis command will mount the debug directory if it is not mounted. It will do 3 runs of\n"
+        examplesMessage += "gathering the lockdump information in 10 second intervals for only the GFS2 filesystems\n"
+        examplesMessage += "with the names myGFS2vol2,myGFS2vol1. Then it will archive and compress the data collected."
+        examplesMessage += "\n$ %s -M -r 3 -s 10 -t -n myGFS2vol2,myGFS2vol1\n" %(self.__commandName)
+        OptionParser.print_help(self)
+        print examplesMessage
+
+
+class ExtendOption (Option):
+    """
+    Allow to specify comma delimited list of entries for arrays
+    and dictionaries.
+    """
+    ACTIONS = Option.ACTIONS + ("extend",)
+    STORE_ACTIONS = Option.STORE_ACTIONS + ("extend",)
+    TYPED_ACTIONS = Option.TYPED_ACTIONS + ("extend",)
+
+    def take_action(self, action, dest, opt, value, values, parser):
+        if (action == "extend") :
+            valueList=[]
+            try:
+                for v in value.split(","):
+                    # Need to add code for dealing with paths if there is option for paths.
+                    valueList.append(v)
+            except:
+                pass
+            else:
+                values.ensure_value(dest, []).extend(valueList)
+        else:
+            Option.take_action(self, action, dest, opt, value, values, parser)
+
+# ###############################################################################
+# Main Function
+# ###############################################################################
+if __name__ == "__main__":
+    try:
+        # #######################################################################
+        # Get the options from the commandline.
+        # #######################################################################
+        (cmdLineOpts, cmdLineArgs) = __getOptions(VERSION_NUMBER)
+
+        # #######################################################################
+        # Setup the logger and create config directory
+        # #######################################################################
+        # Create the logger
+        logLevel = logging.INFO
+        logger = logging.getLogger(MAIN_LOGGER_NAME)
+        logger.setLevel(logLevel)
+        # Create a new status function and level.
+        logging.STATUS = logging.INFO + 2
+        logging.addLevelName(logging.STATUS, "STATUS")
+        # Create a function for the STATUS_LEVEL since not defined by python. This
+        # means you can call it like the other predefined message
+        # functions. Example: logging.getLogger("loggerName").status(message)
+        setattr(logger, "status", lambda *args: logger.log(logging.STATUS, *args))
+        ch = logging.StreamHandler()
+        ch.setLevel(logLevel)
+        ch.setFormatter(logging.Formatter(MAIN_LOGGER_FORMAT))
+        logger.addHandler(ch)
+
+        # #######################################################################
+        # Set the logging levels.
+        # #######################################################################
+        if ((cmdLineOpts.enableDebugLogging) and (not cmdLineOpts.disableLoggingToConsole)):
+            logging.getLogger(MAIN_LOGGER_NAME).setLevel(logging.DEBUG)
+            ch.setLevel(logging.DEBUG)
+            message = "Debugging has been enabled."
+            logging.getLogger(MAIN_LOGGER_NAME).debug(message)
+        if (cmdLineOpts.disableLoggingToConsole):
+            logging.disable(logging.CRITICAL)
+
+        # #######################################################################
+        # Check to see if pid file exists and error if it does.
+        # #######################################################################
+        if (os.path.exists(PATH_TO_PID_FILENAME)):
+            message = "The PID file %s already exists and this script cannot run till it does not exist." %(PATH_TO_PID_FILENAME)
+            logging.getLogger(MAIN_LOGGER_NAME).error(message)
+            message = "Verify that there are no other existing processes running. If there are running processes those need to be stopped first and the file removed."
+            logging.getLogger(MAIN_LOGGER_NAME).info(message)
+            exitScript(removePidFile=False, errorCode=1)
+        else:
+            message = "Creating the pid file: %s" %(PATH_TO_PID_FILENAME)
+            logging.getLogger(MAIN_LOGGER_NAME).debug(message)
+            # Creata the pid file so we dont have more than 1 process of this
+            # script running.
+            writeToFile(PATH_TO_PID_FILENAME, str(os.getpid()), createFile=True)
+
+        # Get the clusternode name.
+        clusternode = getClusterNode(cmdLineOpts.listOfGFS2Names)
+        if (clusternode == None):
+            message = "The cluster or cluster node name could not be found from \"cman_tool status\"."
+            logging.getLogger(MAIN_LOGGER_NAME).error(message)
+            exitScript(removePidFile=False, errorCode=1)
+        if (cmdLineOpts.enablePrintInfo):
+            logging.disable(logging.CRITICAL)
+            print "List of all the mounted GFS2 filesystems that can have their lockdump data captured:"
+            print clusternode
+            exitScript()
+        # #######################################################################
+        # Create the output directory to verify it can be created before
+        # proceeding unless it is already created from a previous run data needs
+        # to be analyzed. Probably could add more debugging on if file or dir.
+        # #######################################################################
+        message = "The gathering of the lockdumps will be performed on the clusternode \"%s\" which is part of the cluster \"%s\"." %(clusternode.getClusterNodeName(), clusternode.getClusterName())
+        logging.getLogger(MAIN_LOGGER_NAME).info(message)
+        pathToOutputDir = cmdLineOpts.pathToOutputDir
+        if (not len(pathToOutputDir) > 0):
+            pathToOutputDir = "%s" %(os.path.join("/tmp", "%s-%s-%s" %(time.strftime("%Y-%m-%d_%H%M%S"), clusternode.getClusterNodeName(), os.path.basename(sys.argv[0]))))
+        if (os.path.exists(pathToOutputDir)):
+            message = "The directory already exists and could contain previous lockdump data: %s" %(pathToOutputDir)
+            logging.getLogger(MAIN_LOGGER_NAME).error(message)
+            exitScript(errorCode=1)
+        else:
+            message = "This directory that will be used to capture all the data: %s" %(pathToOutputDir)
+            logging.getLogger(MAIN_LOGGER_NAME).info(message)
+            if (not mkdirs(pathToOutputDir)):
+                exitScript(errorCode=1)
+
+        # #######################################################################
+        # Check to see if the debug directory is mounted. If not then
+        # log an error.
+        # #######################################################################
+        result = mountFilesystemDebug(cmdLineOpts.enableMountDebugFS)
+        if (not result):
+            message = "Please mount the debug filesystem before running this script. For example: $ mount none -t debugfs %s" %(PATH_TO_DEBUG_DIR)
+            logging.getLogger(MAIN_LOGGER_NAME).info(message)
+            exitScript(errorCode=1)
+
+        # #######################################################################
+        # Gather data and the lockdumps.
+        # #######################################################################
+        message = "The process of gathering all the required files will begin before capturing the lockdumps."
+        logging.getLogger(MAIN_LOGGER_NAME).info(message)
+        for i in range(0,cmdLineOpts.numberOfRuns):
+            # Add clusternode name under each run dir to make combining multple
+            # clusternode gfs2_lockgather data together and all data in each run directory.
+            pathToOutputRunDir = os.path.join(pathToOutputDir, "run%d/%s" %(i, clusternode.getClusterNodeName()))
+            if (not mkdirs(pathToOutputRunDir)):
+                exitOnError()
+            # Gather various bits of data from the clusternode.
+            message = "Gathering some general information about the clusternode %s for run %d." %(clusternode.getClusterNodeName(), i)
+            logging.getLogger(MAIN_LOGGER_NAME).status(message)
+            gatherGeneralInformation(pathToOutputRunDir)
+            # Trigger sysrq events to capture memory and thread information
+            message = "Triggering the sysrq events for the clusternode %s for run %d." %(clusternode.getClusterNodeName(), i)
+            logging.getLogger(MAIN_LOGGER_NAME).status(message)
+            triggerSysRQEvents()
+            # Gather the dlm locks.
+            lockDumpType = "dlm"
+            message = "Gathering the %s lock dumps for clusternode %s for run %d." %(lockDumpType, clusternode.getClusterNodeName(), i)
+            logging.getLogger(MAIN_LOGGER_NAME).status(message)
+            gatherDLMLockDumps(pathToOutputRunDir, clusternode.getMountedGFS2FilesystemNames(includeClusterName=False))
+            # Gather the glock locks from gfs2.
+            lockDumpType = "gfs2"
+            message = "Gathering the %s lock dumps for clusternode %s for run %d." %(lockDumpType, clusternode.getClusterNodeName(), i)
+            logging.getLogger(MAIN_LOGGER_NAME).status(message)
+            gatherGFS2LockDumps(pathToOutputRunDir, clusternode.getMountedGFS2FilesystemNames())
+            # Gather log files
+            message = "Gathering the log files for the clusternode %s for run %d." %(clusternode.getClusterNodeName(), i)
+            logging.getLogger(MAIN_LOGGER_NAME).status(message)
+            gatherLogs(os.path.join(pathToOutputRunDir, "logs"))
+            if (cmdLineOpts.secondsToSleep > 0):
+                message = "The script will sleep for %d seconds between each run of capturing the lockdumps." %(cmdLineOpts.secondsToSleep)
+                logging.getLogger(MAIN_LOGGER_NAME).info(message)
+                time.sleep(cmdLineOpts.secondsToSleep)
+        # #######################################################################
+        # Archive the file if enabled and print the location of the output
+        # directory.
+        # #######################################################################
+        # After it is done the we should print out where the files that were
+        # generated are located and what to do.
+        message = "All the files have been gathered and this directory contains all the captured data: %s" %(pathToOutputDir)
+        logging.getLogger(MAIN_LOGGER_NAME).info(message)
+
+        # #######################################################################
+        # Archive the directory that contains all the data and archive it.
+        # #######################################################################
+        if (cmdLineOpts.enableArchiveOutputDir):
+            message = "The lockdump data will now be archived. This could some time depending on the size of the data collected."
+            logging.getLogger(MAIN_LOGGER_NAME).info(message)
+            pathToTarFilename = archiveData(pathToOutputDir)
+            if (os.path.exists(pathToTarFilename)):
+                message = "The compressed archvied file was created: %s" %(pathToTarFilename)
+                logging.getLogger(MAIN_LOGGER_NAME).info(message)
+
+        # #######################################################################
+    except KeyboardInterrupt:
+        print ""
+        message =  "This script will exit since control-c was executed by end user."
+        logging.getLogger(MAIN_LOGGER_NAME).error(message)
+        exitScript(errorCode=1)
+    # #######################################################################
+    # Exit the application with zero exit code since we cleanly exited.
+    # #######################################################################
+    exitScript()
-- 
1.7.1


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]