[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: [Crash-utility] Running idle threads show wrong CPU numbers



On Wed, 2010-02-10 at 10:08 -0500, Dave Anderson wrote:
> ----- "Michael Holzheu" <holzheu linux vnet ibm com> wrote:
> 
> > Hi again,
> 
> > > When I change get_smp_cpus() to return "get_highest_cpu_online() + 1" I
> > > see five swapper idle tasks when using "ps". The problem I now have is
> > > that I have to provide a backtrace for the offline cpus. But the offline
> > > CPUs do not have any stack on s390. Is there a way to tell crash that
> > > there is no backtrace available? Probably I overlooked something...
> > 
> > Ok, I think I got it now. In case of an offline CPU, I will use
> > "task_struct_thread_ksp" like I do it for non active tasks.
> > 
> > When I do that I get for the swapper tasks with the offline CPUs:
> > 
> > PID: 0      TASK: 18d38340          CPU: 2   COMMAND: "swapper"
> >  #0 [18d3feb8] ret_from_fork at 117e12
> > 
> > PID: 0      TASK: 18d40440          CPU: 3   COMMAND: "swapper"
> >  #0 [18d47eb8] ret_from_fork at 117e12
> 
> I'm not why you should do anything.  The cpu is offline and for all
> practical purposes it doesn't exist, so why bother?

Because you can do a "bt" on the swapper task with the offline CPU.
Then s390x_get_stack_frame() is called where I figure out the stack
pointer and instruction address. In that function I check if the task is
currently running on a CPU and in that case I get the information from
the associated s390 lowcore, where the registers are stored in case of a
dump. If the task is not running I get the information from the thread
struct.

> The patch I have queued just uses get_highest_cpu_online()+1 and
> does nothing else.  But I only tested it on a live system, and
> any backtrace attempt on the offlined swapper task just shows
> (active).  What happens when you do a "bt -a" with a dumpfile?

It shows all swapper tasks (online and offline), but I get errors for
the backtrace for the offline CPUs.

The attached patch would solve the problem (and eliminate most of the
probably redundant s390(x)_has_cpu() function.


With this patch "ps" shows:

   PID    PPID  CPU       TASK        ST  %MEM     VSZ    RSS  COMM
>     0      0   0       800ef0       RU   0.0       0      0  [swapper]
>     0      0   1      18d30240      RU   0.0       0      0  [swapper]
>     0      0   2      18d38340      RU   0.0       0      0  [swapper]
>     0      0   3      18d40440      RU   0.0       0      0  [swapper]
>     0      0   4      18d48540      RU   0.0       0      0  [swapper]
      1      0   1      18d18040      IN   0.2    2244   1020  init
...

And "bt -a" shows:

PID: 0      TASK: 800ef0            CPU: 0   COMMAND: "swapper"
 LOWCORE INFO:
  -psw      : 0x0706000180000000 0x0000000000115564
  -function : vtime_stop_cpu at 115564
  -prefix   : 0x18d28000
  -cpu timer: 0x7fff00c1 0x00c584ef
...

PID: 0      TASK: 18d30240          CPU: 1   COMMAND: "swapper"
 LOWCORE INFO:
  -psw      : 0x0706000180000000 0x0000000000115564
  -function : vtime_stop_cpu at 115564
...

PID: 0      TASK: 18d38340          CPU: 2   COMMAND: "swapper"
 #0 [18d3feb8] ret_from_fork at 117e12
...

PID: 0      TASK: 18d40440          CPU: 3   COMMAND: "swapper"
 #0 [18d47eb8] ret_from_fork at 117e12
...

PID: 0      TASK: 18d48540          CPU: 4   COMMAND: "swapper"
 LOWCORE INFO:
  -psw      : 0x0706000180000000 0x0000000000115564
  -function : vtime_stop_cpu at 115564
  -prefix   : 0x1416a000

Michael
---
 s390.c  |   57 ++++++++++++++-------------------------------------------
 s390x.c |   57 ++++++++++++++-------------------------------------------
 2 files changed, 28 insertions(+), 86 deletions(-)

--- a/s390.c
+++ b/s390.c
@@ -542,46 +542,17 @@ s390_cpu_of_task(unsigned long task)
 }
 
 /*
- * returns true, if task currently is executed by a cpu
+ * returns true, if task of bt currently is executed by a cpu
  */ 
 static int 
-s390_has_cpu(unsigned long task)
+s390_has_cpu(struct bt_info *bt)
 {
-	if(VALID_MEMBER(task_struct_cpus_runnable)){
-                /* Linux 2.4 */
-                unsigned long cpus_runnable;
-                readmem(task+OFFSET(task_struct_cpus_runnable),KVADDR,
-                        &cpus_runnable,sizeof(cpus_runnable),
-                        "cpus_runnable", FAULT_ON_ERROR);
-                if(cpus_runnable != ~0U)
-                        return TRUE;
-                else
-                        return FALSE;
-        } else {
-		/* Linux 2.6 */
-		unsigned long runqueue_addr, runqueue_offset;
-		unsigned long cpu_offset, per_cpu_offset_addr, running_task;
-		char *runqueue;
-		int cpu;
-
-		cpu = s390_cpu_of_task(task);
-		runqueue = GETBUF(SIZE(runqueue));
-
-		runqueue_offset=symbol_value("per_cpu__runqueues");
-		per_cpu_offset_addr=symbol_value("__per_cpu_offset");
-		readmem(per_cpu_offset_addr + cpu * sizeof(long),KVADDR,
-			&cpu_offset, sizeof(long),"per_cpu_offset",
-			FAULT_ON_ERROR);
-		runqueue_addr=runqueue_offset + cpu_offset;
-		readmem(runqueue_addr,KVADDR,runqueue,SIZE(runqueue),
-			"runqueue", FAULT_ON_ERROR);
-		running_task = ULONG(runqueue + OFFSET(runqueue_curr));
-		FREEBUF(runqueue);
-		if(running_task == task)
-			return TRUE;
-		else
-			return FALSE;
-	}
+	int cpu = bt->tc->processor;
+
+	if (is_task_active(bt->task) && (kt->cpu_flags[cpu] & ONLINE))
+		return TRUE;
+	else
+		return FALSE;
 }
 
 /*
@@ -635,7 +606,7 @@ s390_back_trace_cmd(struct bt_info *bt)
 	ksp = bt->stkptr;
 
 	/* print lowcore and get async stack when task has cpu */
-	if(s390_has_cpu(bt->task)){
+	if(s390_has_cpu(bt)){
 		char lowcore[LOWCORE_SIZE];
 		unsigned long psw_flags;
 		int cpu = s390_cpu_of_task(bt->task);
@@ -687,7 +658,7 @@ s390_back_trace_cmd(struct bt_info *bt)
 			stack = bt->stackbuf;
 			stack_base = stack_start;
 		} else if((backchain > async_start) && (backchain < async_end)
-			  && s390_has_cpu(bt->task)){
+			  && s390_has_cpu(bt)){
 			stack = async_stack;
 			stack_base = async_start;
 		} else {
@@ -903,12 +874,12 @@ s390_get_stack_frame(struct bt_info *bt,
 	int r14_offset;
 	char lowcore[LOWCORE_SIZE];
 
-	if(s390_has_cpu(bt->task))
+	if(s390_has_cpu(bt))
 		s390_get_lowcore(s390_cpu_of_task(bt->task),lowcore);
 
 	/* get the stack pointer */
 	if(esp){
-		if(s390_has_cpu(bt->task)){
+		if(s390_has_cpu(bt)){
 			ksp = ULONG(lowcore + MEMBER_OFFSET("_lowcore",
 				"gpregs_save_area") + (15 * S390_WORD_SIZE));
 		} else {
@@ -926,7 +897,7 @@ s390_get_stack_frame(struct bt_info *bt,
 	if(!eip)
 		return;
 
-	if(s390_has_cpu(bt->task) && esp){
+	if(s390_has_cpu(bt) && esp){
 		*eip = ULONG(lowcore + OFFSET(s390_lowcore_psw_save_area) +
 			S390_WORD_SIZE) & S390_ADDR_MASK;
 	} else {
@@ -1015,7 +986,7 @@ s390_dis_filter(ulong vaddr, char *inbuf
 int
 s390_get_smp_cpus(void)
 {
-	return get_cpus_online();
+	return (get_highest_cpu_online() + 1);
 }
 
 /*
--- a/s390x.c
+++ b/s390x.c
@@ -554,46 +554,17 @@ s390x_cpu_of_task(unsigned long task)
 }
 
 /*
- * returns true, if task currently is executed by a cpu
+ * returns true, if task of bt currently is executed by a cpu
  */ 
 static int 
-s390x_has_cpu(unsigned long task)
+s390x_has_cpu(struct bt_info *bt)
 {
-	if(VALID_MEMBER(task_struct_cpus_runnable)){
-		/* Linux 2.4 */
-		unsigned long cpus_runnable;
-		readmem(task+OFFSET(task_struct_cpus_runnable),KVADDR,
-			&cpus_runnable,sizeof(cpus_runnable),
-			"cpus_runnable", FAULT_ON_ERROR);
-		if(cpus_runnable != ~0ULL)
-			return TRUE;
-		else
-			return FALSE;
-	} else {
-		/* Linux 2.6 */
-		unsigned long runqueue_addr, runqueue_offset;
-		unsigned long cpu_offset, per_cpu_offset_addr, running_task;
-		char *runqueue;
-		int cpu;
-
-		cpu = s390x_cpu_of_task(task);
-		runqueue = GETBUF(SIZE(runqueue));
-
-		runqueue_offset=symbol_value("per_cpu__runqueues");
-		per_cpu_offset_addr=symbol_value("__per_cpu_offset");
-		readmem(per_cpu_offset_addr + cpu * sizeof(long),KVADDR,
-			&cpu_offset, sizeof(long),"per_cpu_offset",
-			FAULT_ON_ERROR);
-		runqueue_addr=runqueue_offset + cpu_offset;
-		readmem(runqueue_addr,KVADDR,runqueue,SIZE(runqueue),
-			"runqueue", FAULT_ON_ERROR);
-		running_task = ULONG(runqueue + OFFSET(runqueue_curr));
-		FREEBUF(runqueue);
-		if(running_task == task)
-			return TRUE; 
-		else
-			return FALSE;
-	}
+	int cpu = bt->tc->processor;
+
+	if (is_task_active(bt->task) && (kt->cpu_flags[cpu] & ONLINE))
+		return TRUE;
+	else
+		return FALSE;
 }
 
 /*
@@ -647,7 +618,7 @@ s390x_back_trace_cmd(struct bt_info *bt)
 	ksp = bt->stkptr;
 
 	/* print lowcore and get async stack when task has cpu */
-	if(s390x_has_cpu(bt->task)){
+	if(s390x_has_cpu(bt)){
 		char lowcore[LOWCORE_SIZE];
 		unsigned long psw_flags;
 		int cpu = s390x_cpu_of_task(bt->task);
@@ -700,7 +671,7 @@ s390x_back_trace_cmd(struct bt_info *bt)
 			stack = bt->stackbuf;
 			stack_base = stack_start;
 		} else if((backchain > async_start) && (backchain < async_end)
-			  && s390x_has_cpu(bt->task)){
+			  && s390x_has_cpu(bt)){
 			stack = async_stack;
 			stack_base = async_start;
 		} else {
@@ -936,12 +907,12 @@ s390x_get_stack_frame(struct bt_info *bt
 	int r14_offset;
 	char lowcore[LOWCORE_SIZE];
 
-	if(s390x_has_cpu(bt->task))
+	if(s390x_has_cpu(bt))
 		s390x_get_lowcore(s390x_cpu_of_task(bt->task),lowcore);
 
 	/* get the stack pointer */
 	if(esp){
-		if(s390x_has_cpu(bt->task)){
+		if(s390x_has_cpu(bt)){
 			ksp = ULONG(lowcore + MEMBER_OFFSET("_lowcore",
 				"gpregs_save_area") + (15 * S390X_WORD_SIZE));
 		} else {
@@ -959,7 +930,7 @@ s390x_get_stack_frame(struct bt_info *bt
 	if(!eip)
 		return;
 
-	if(s390x_has_cpu(bt->task) && esp){
+	if(s390x_has_cpu(bt) && esp){
 		*eip = ULONG(lowcore + OFFSET(s390_lowcore_psw_save_area) +
 			S390X_WORD_SIZE);
 	} else {
@@ -1048,7 +1019,7 @@ s390x_dis_filter(ulong vaddr, char *inbu
 int
 s390x_get_smp_cpus(void)
 {
-	return get_cpus_online();
+	return (get_highest_cpu_online() + 1);
 }
 
 /*

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]