[Crash-utility] Problem on getting kernel backtrace with a `virsh dump' dumped kvm dumpfile

hutao at cn.fujitsu.com hutao at cn.fujitsu.com
Fri Aug 27 06:43:36 UTC 2010


Hi,

I encountered a problem on getting backtrace with a `virsh dump' dumped
kvm dumpfile, the bt command did not get kernel backtrace properly.

guest kernel: 2.6.32
crash: 5.0.6 patched with qemu_ram_version_4.patch(attached)

steps to get dumpfile:

  1. virsh start vm
  2. connect to vm, say by vnc
  3. On guest, build and run the code:

int main(void)
{
	while (1);

	return 0;
}

  4. On host, run `virsh dump vm /mnt/data/kernel-2.6.32.dump3-userspace-endless-loop'

Then run crash:

  crash /mnt/data/kernel/linux-2.6.32/System.map /mnt/data/kernel/linux-2.6.32/vmlinux /mnt/data/kernel-2.6.32.dump3-userspace-endless-loop

got the result:


crash 5.0.6
Copyright (C) 2002-2010  Red Hat, Inc.
Copyright (C) 2004, 2005, 2006  IBM Corporation
Copyright (C) 1999-2006  Hewlett-Packard Co
Copyright (C) 2005, 2006  Fujitsu Limited
Copyright (C) 2006, 2007  VA Linux Systems Japan K.K.
Copyright (C) 2005  NEC Corporation
Copyright (C) 1999, 2002, 2007  Silicon Graphics, Inc.
Copyright (C) 1999, 2000, 2001, 2002  Mission Critical Linux, Inc.
This program is free software, covered by the GNU General Public License,
and you are welcome to change it and/or distribute copies of it under
certain conditions.  Enter "help copying" to see the conditions.
This program has absolutely no warranty.  Enter "help warranty" for details.
 
GNU gdb (GDB) 7.0                               
Copyright (C) 2009 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-unknown-linux-gnu"...

  SYSTEM MAP: /mnt/data/kernel/linux-2.6.32/System.map                 
DEBUG KERNEL: /mnt/data/kernel/linux-2.6.32/vmlinux (2.6.32)
    DUMPFILE: /mnt/data/kernel-2.6.32.dump3-userspace-endless-loop
        CPUS: 1
        DATE: Fri Aug 27 05:18:12 2010
      UPTIME: 00:00:51
LOAD AVERAGE: 0.44, 0.11, 0.03
       TASKS: 67
    NODENAME: localhost.localdomain
     RELEASE: 2.6.32
     VERSION: #2 SMP PREEMPT Wed Aug 25 15:26:48 CST 2010
     MACHINE: x86_64  (2925 Mhz)
      MEMORY: 511.6 MB
       PANIC: "Oops: 0003 [#1] PREEMPT SMP " (check log for details)
         PID: 0
     COMMAND: "swapper"
        TASK: ffffffff8158df70  [THREAD_INFO: ffffffff8154e000]
         CPU: 0
       STATE: TASK_RUNNING 
     WARNING: panic task not found

crash> bt
PID: 0      TASK: ffffffff8158df70  CPU: 0   COMMAND: "swapper"
 #0 [ffffffff8154fe28] schedule at ffffffff8138baa3
bt: invalid kernel virtual address: 41  type: "call byte"
bt: invalid kernel virtual address: 44e6835ad  type: "call byte"
bt: load_memfile_offset: read: Success
bt: read error: kernel virtual address: fffffffffffffffc  type: "call byte"
bt: invalid kernel virtual address: e7ab  type: "call byte"
bt: invalid kernel virtual address: e273  type: "call byte"
bt: invalid kernel virtual address: 13a7b  type: "call byte"
bt: invalid kernel virtual address: 935cb  type: "call byte"
bt: load_memfile_offset: read: Success
bt: read error: kernel virtual address: fffffffffffffffb  type: "call byte"
bt: invalid kernel virtual address: 935cb  type: "call byte"
 #1 [ffffffff8154fef0] cpu_idle at ffffffff8100ad1e
crash> 


Note the output of `bt' command. Without running that endless-loop code then
`bt' got:


crash> bt
PID: 0      TASK: ffffffff8158df70  CPU: 0   COMMAND: "swapper"
 #0 [ffffffff8154fe28] schedule at ffffffff8138baa3
 #1 [ffffffff8154fe48] apic_timer_interrupt at ffffffff8100c65e
 #2 [ffffffff8154fed0] need_resched at ffffffff810125a8
 #3 [ffffffff8154fee0] default_idle at ffffffff81012e03
 #4 [ffffffff8154fef0] cpu_idle at ffffffff8100acd6
crash> 


Any suggestions on how to solve the problem?


Regards,
Hu Tao
-------------- next part --------------
--- crash-5.0.0/qemu-load.c.orig
+++ crash-5.0.0/qemu-load.c
@@ -169,6 +169,7 @@ get_qemu128 (FILE *fp, union qemu_uint12
 #define RAM_SAVE_FLAG_MEM_SIZE	0x04
 #define RAM_SAVE_FLAG_PAGE	0x08
 #define RAM_SAVE_FLAG_EOS	0x10
+#define RAM_SAVE_FLAG_CONTINUE	0x20
 #define RAM_SAVE_ADDR_MASK	(~4095LL)
 
 #define RAM_OFFSET_COMPRESSED	(~(off_t)255)
@@ -192,24 +193,63 @@ ram_alloc (struct qemu_device_ram *dram,
 	dram->last_ram_offset = size;
 }
 
+static int
+get_string (FILE *fp, char *name)
+{
+	size_t items;
+	int sz = (uint8_t) getc (fp);
+	if (sz == EOF)
+		return -1;
+	items = fread (name, sz, 1, fp);
+	name[sz] = 0;
+	return sz;
+}
+
+static void
+ram_read_blocks (FILE *fp, uint64_t size)
+{
+	char name[257];
+	/* The RAM block table is a list of block names followed by
+	   their sizes.  Read it until the sizes sum up to SIZE bytes.  */
+	while (size) {
+		get_string (fp, name);
+		size -= get_be64 (fp);
+	}
+}
+
 static uint32_t
 ram_load (struct qemu_device *d, FILE *fp, enum qemu_save_section sec)
 {
+	char name[257];
 	struct qemu_device_ram *dram = (struct qemu_device_ram *)d;
 	uint64_t header;
 
-	do {
+	for (;;) {
 		uint64_t addr;
 		off_t entry;
 
 		header = get_be64 (fp);
+		if (feof (fp) || ferror (fp))
+			return 0;
+		if (header & RAM_SAVE_FLAG_EOS)
+			break;
+
 		assert (!(header & RAM_SAVE_FLAG_FULL));
 
 		addr = header & RAM_SAVE_ADDR_MASK;
-		if (header & RAM_SAVE_FLAG_MEM_SIZE)
+
+		if (header & RAM_SAVE_FLAG_MEM_SIZE) {
 			ram_alloc (dram, addr);
+			if (d->version_id >= 4)
+				ram_read_blocks(fp, addr);
+			continue;
+		}
+
+		if (d->version_id >= 4 && !(header & RAM_SAVE_FLAG_CONTINUE)
+		    && !(header & RAM_SAVE_FLAG_EOS))
+			get_string(fp, name);
 
-		else if (header & RAM_SAVE_FLAG_COMPRESS) {
+		if (header & RAM_SAVE_FLAG_COMPRESS) {
 //			dram->offsets[addr / 4096] =
 			entry = RAM_OFFSET_COMPRESSED | getc(fp);
 			store_mapfile_offset(addr, &entry);
@@ -220,8 +260,7 @@ ram_load (struct qemu_device *d, FILE *f
 			store_mapfile_offset(addr, &entry);
 			fseek (fp, 4096, SEEK_CUR);
 		}
-
-	} while (!(header & RAM_SAVE_FLAG_EOS) && !feof (fp) && !ferror (fp));
+	}
 
 	dram->fp = fp;
 	return QEMU_FEATURE_RAM;
@@ -264,7 +303,7 @@ ram_init_load (struct qemu_device_list *
 		ram_free
 	};
 
-	assert (version_id == 3);
+	assert (version_id == 3 || version_id == 4);
 	kvm->mapinfo.ram_version_id = version_id;
 	return device_alloc (dl, sizeof (struct qemu_device_ram),
 			     &ram, section_id, instance_id, version_id);
@@ -780,19 +819,13 @@ device_get (const struct qemu_device_loa
 	char name[257];
 	uint32_t section_id, instance_id, version_id;
 //	bool live;
-	size_t items;
-	int sz;
 
 	section_id = get_be32 (fp);
 	if (sec != QEMU_VM_SECTION_START &&
 	    sec != QEMU_VM_SECTION_FULL)
 		return device_find (dl, section_id);
 
-	sz = getc (fp);
-	if (sz == EOF)
-		return NULL;
-	items = fread (name, sz, 1, fp);
-	name[sz] = 0;
+	get_string(fp, name);
 
 	instance_id = get_be32 (fp);
 	version_id = get_be32 (fp);


More information about the Crash-utility mailing list