rpms/kernel/devel kernel-2.6.spec, 1.1971.2.5, 1.1971.2.6 linux-2.6-xen-compile-fixes.patch, 1.1.12.2, 1.1.12.3 linux-2.6-xen-cpusteal-hv.patch, 1.2.2.1, 1.2.2.2 linux-2.6-xen-cpusteal-kernel.patch, 1.2.2.1, 1.2.2.2

fedora-cvs-commits at redhat.com fedora-cvs-commits at redhat.com
Tue Feb 28 21:45:33 UTC 2006


Author: quintela

Update of /cvs/dist/rpms/kernel/devel
In directory cvs.devel.redhat.com:/tmp/cvs-serv12970

Modified Files:
      Tag: private-xen-rebased-1971-branch
	kernel-2.6.spec linux-2.6-xen-compile-fixes.patch 
	linux-2.6-xen-cpusteal-hv.patch 
	linux-2.6-xen-cpusteal-kernel.patch 
Log Message:
fix x86 xen compilation && rebase on 1.1993


Index: kernel-2.6.spec
===================================================================
RCS file: /cvs/dist/rpms/kernel/devel/kernel-2.6.spec,v
retrieving revision 1.1971.2.5
retrieving revision 1.1971.2.6
diff -u -r1.1971.2.5 -r1.1971.2.6
--- kernel-2.6.spec	28 Feb 2006 20:16:09 -0000	1.1971.2.5
+++ kernel-2.6.spec	28 Feb 2006 21:45:27 -0000	1.1971.2.6
@@ -698,7 +698,7 @@
 %if %{buildxen}
 cd xen
 %patch20000 -p1
-%patch20010 -p1
+%patch20010 -p2
 cd ..
 %endif
 cd linux-%{kversion}.%{_target_cpu}
@@ -785,7 +785,7 @@
 %patch706 -p1
 %patch709 -p1
 #%patch710 -p1
-%patch711 -p1
+%patch711 -p2
 %endif
 
 #
@@ -1599,6 +1599,7 @@
 %changelog
 * Tue Feb 28 2006 Juan Quintela <quintela at redhat.com> - 2.6.%{sublevel}-%(R="$Revision$"; RR="${R##: }"; echo ${RR%%?})_FC5%{rhbsys}
 - rebase with rawhide 1.1993.
+- rebase with rawhide 1.1993
 
 * Tue Feb 28 2006 David Woodhouse <dwmw2 at redhat.com>
 - Fix gettimeofday() in the 64-bit PowerPC vDSO

linux-2.6-xen-compile-fixes.patch:
 linux-2.6.15.i686/arch/i386/mach-xen/Makefile                |    2 -
 linux-2.6.15.ia64/fs/Kconfig                                 |    1 
 linux-2.6.15.new/arch/i386/mm/init-xen.c                     |    1 
 linux-2.6.15.xen/arch/i386/kernel/Makefile                   |    3 --
 linux-2.6.15.xen/arch/i386/kernel/smp-xen.c                  |    1 
 linux-2.6.15.xen/arch/i386/mm/init-xen.c                     |    1 
 linux-2.6.15.xen/arch/x86_64/kernel/process-xen.c            |    6 ----
 linux-2.6.15.xen/arch/x86_64/mm/fault-xen.c                  |    2 -
 linux-2.6.15.xen/include/asm-i386/mach-xen/asm/desc.h        |   14 +++++++++++
 linux-2.6.15.xen/include/asm-i386/mach-xen/asm/mmu.h         |    7 +++++
 linux-2.6.15.xen/include/asm-i386/mach-xen/asm/pgalloc.h     |    1 
 linux-2.6.15.xen/include/asm-i386/mach-xen/asm/processor.h   |    7 ++++-
 linux-2.6.15.xen/include/asm-i386/mach-xen/asm/swiotlb.h     |    1 
 linux-2.6.15.xen/include/asm-x86_64/mach-xen/asm/pgalloc.h   |    7 +++++
 linux-2.6.15.xen/include/asm-x86_64/mach-xen/asm/pgtable.h   |    2 -
 linux-2.6.15.xen/include/asm-x86_64/mach-xen/asm/processor.h |    2 +
 16 files changed, 44 insertions(+), 14 deletions(-)

Index: linux-2.6-xen-compile-fixes.patch
===================================================================
RCS file: /cvs/dist/rpms/kernel/devel/linux-2.6-xen-compile-fixes.patch,v
retrieving revision 1.1.12.2
retrieving revision 1.1.12.3
diff -u -r1.1.12.2 -r1.1.12.3
--- linux-2.6-xen-compile-fixes.patch	24 Feb 2006 19:42:19 -0000	1.1.12.2
+++ linux-2.6-xen-compile-fixes.patch	28 Feb 2006 21:45:29 -0000	1.1.12.3
@@ -215,3 +215,12 @@
  
  config HUGETLB_PAGE
  	def_bool HUGETLBFS
+diff -urNp --exclude-from=/home/quintela/config/misc/dontdiff linux-2.6.15.orig/arch/i386/mach-xen/Makefile linux-2.6.15.i686/arch/i386/mach-xen/Makefile
+--- linux-2.6.15.orig/arch/i386/mach-xen/Makefile	2006-02-28 18:47:02.000000000 +0100
++++ linux-2.6.15.i686/arch/i386/mach-xen/Makefile	2006-02-28 19:38:41.000000000 +0100
+@@ -3,5 +3,3 @@
+ #
+ 
+ obj-y				:= setup.o topology.o
+-  
+-topology-y			:= ../mach-default/topology.o

linux-2.6-xen-cpusteal-hv.patch:
 common/dom0_ops.c      |    9 ++
 common/domain.c        |   13 ++++
 common/keyhandler.c    |    6 -
 common/sched_bvt.c     |   13 +---
 common/sched_sedf.c    |    8 --
 common/schedule.c      |  152 +++++++++++++++++++++----------------------------
 include/public/vcpu.h  |   34 ++++++++++
 include/xen/sched-if.h |    6 -
 include/xen/sched.h    |   12 +--
 9 files changed, 134 insertions(+), 119 deletions(-)

Index: linux-2.6-xen-cpusteal-hv.patch
===================================================================
RCS file: /cvs/dist/rpms/kernel/devel/linux-2.6-xen-cpusteal-hv.patch,v
retrieving revision 1.2.2.1
retrieving revision 1.2.2.2
diff -u -r1.2.2.1 -r1.2.2.2
--- linux-2.6-xen-cpusteal-hv.patch	28 Feb 2006 20:16:10 -0000	1.2.2.1
+++ linux-2.6-xen-cpusteal-hv.patch	28 Feb 2006 21:45:29 -0000	1.2.2.2
@@ -1,50 +1,511 @@
-Allow guest domains to get information from the hypervisor on how much
-cpu time their virtual cpus have used.  This is needed to estimate the
-cpu steal time.
+From patchbot-unstable at lists.xensource.com Sat Feb 25 17:49:47 2006
+Date: Sat, 25 Feb 2006 22:48:07 +0000
+From: Xen patchbot -unstable <patchbot-unstable at lists.xensource.com>
+Reply-To: xen-devel at lists.xensource.com
+To: xen-changelog at lists.xensource.com
+Subject: [Xen-changelog] New VCPUOP_get_runstate_info hypercall. Returns information about the current
 
-Signed-off-by: Rik van Riel <riel at redhat.com>
+# HG changeset patch
+# User kaf24 at firebug.cl.cam.ac.uk
+# Node ID 2303fb4682e7cd4feb330fd2aec69672facb4ec6
+# Parent  a9f3abcc41499b7be971412d66c08d0e9740ff66
+New VCPUOP_get_runstate_info hypercall. Returns information about the current
+run state of a VCPU (running, runnable, blocked, etc.) and the total time
+spent in each state since the VCPU was created.
 
---- xen/include/public/vcpu.h.steal	2006-02-07 18:01:41.000000000 -0500
-+++ xen/include/public/vcpu.h	2006-02-17 13:51:45.000000000 -0500
-@@ -51,6 +51,14 @@
+Signed-off-by: Keir Fraser <keir at xensource.com>
+
+diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/dom0_ops.c
+--- a/xen/common/dom0_ops.c	Sat Feb 25 11:27:53 2006
++++ b/xen/common/dom0_ops.c	Sat Feb 25 16:58:37 2006
+@@ -46,6 +46,7 @@
+     struct vcpu   *v;
+     u64 cpu_time = 0;
+     int flags = DOMFLAGS_BLOCKED;
++    struct vcpu_runstate_info runstate;
+     
+     info->domain = d->domain_id;
+     info->nr_online_vcpus = 0;
+@@ -55,7 +56,8 @@
+      * - domain is marked as running if any of its vcpus is running
+      */
+     for_each_vcpu ( d, v ) {
+-        cpu_time += v->cpu_time;
++        vcpu_runstate_get(v, &runstate);
++        cpu_time += runstate.time[RUNSTATE_running];
+         info->max_vcpu_id = v->vcpu_id;
+         if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
+         {
+@@ -497,6 +499,7 @@
+     { 
+         struct domain *d;
+         struct vcpu   *v;
++        struct vcpu_runstate_info runstate;
+ 
+         ret = -ESRCH;
+         if ( (d = find_domain_by_id(op->u.getvcpuinfo.domain)) == NULL )
+@@ -510,10 +513,12 @@
+         if ( (v = d->vcpu[op->u.getvcpuinfo.vcpu]) == NULL )
+             goto getvcpuinfo_out;
+ 
++        vcpu_runstate_get(v, &runstate);
++
+         op->u.getvcpuinfo.online   = !test_bit(_VCPUF_down, &v->vcpu_flags);
+         op->u.getvcpuinfo.blocked  = test_bit(_VCPUF_blocked, &v->vcpu_flags);
+         op->u.getvcpuinfo.running  = test_bit(_VCPUF_running, &v->vcpu_flags);
+-        op->u.getvcpuinfo.cpu_time = v->cpu_time;
++        op->u.getvcpuinfo.cpu_time = runstate.time[RUNSTATE_running];
+         op->u.getvcpuinfo.cpu      = v->processor;
+         op->u.getvcpuinfo.cpumap   = 0;
+         memcpy(&op->u.getvcpuinfo.cpumap,
+diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/domain.c
+--- a/xen/common/domain.c	Sat Feb 25 11:27:53 2006
++++ b/xen/common/domain.c	Sat Feb 25 16:58:37 2006
+@@ -451,6 +451,19 @@
+     case VCPUOP_is_up:
+         rc = !test_bit(_VCPUF_down, &v->vcpu_flags);
+         break;
++
++    case VCPUOP_get_runstate_info:
++    {
++        struct vcpu_runstate_info runstate;
++        vcpu_runstate_get(v, &runstate);
++        if ( copy_to_user(arg, &runstate, sizeof(runstate)) )
++            rc = -EFAULT;
++        break;
++    }
++
++    default:
++        rc = -ENOSYS;
++        break;
+     }
+ 
+     return rc;
+diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/keyhandler.c
+--- a/xen/common/keyhandler.c	Sat Feb 25 11:27:53 2006
++++ b/xen/common/keyhandler.c	Sat Feb 25 16:58:37 2006
+@@ -169,8 +169,6 @@
+ }
+ 
+ extern void dump_runq(unsigned char key);
+-extern void print_sched_histo(unsigned char key);
+-extern void reset_sched_histo(unsigned char key);
+ #ifndef NDEBUG
+ extern void audit_domains_key(unsigned char key);
+ #endif
+@@ -206,10 +204,6 @@
+         'd', dump_registers, "dump registers"); 
+     register_keyhandler(
+         'h', show_handlers, "show this message");
+-    register_keyhandler(
+-        'l', print_sched_histo, "print sched latency histogram");
+-    register_keyhandler(
+-        'L', reset_sched_histo, "reset sched latency histogram");
+     register_keyhandler(
+         'q', dump_domains, "dump domain (and guest debug) info");
+     register_keyhandler(
+diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/sched_bvt.c
+--- a/xen/common/sched_bvt.c	Sat Feb 25 11:27:53 2006
++++ b/xen/common/sched_bvt.c	Sat Feb 25 16:58:37 2006
+@@ -132,13 +132,13 @@
+     vcpu_schedule_unlock_irq(v);
+ }
+ 
+-static inline u32 calc_avt(struct vcpu *d, s_time_t now)
++static inline u32 calc_avt(struct vcpu *v, s_time_t now)
+ {
+     u32 ranfor, mcus;
+-    struct bvt_dom_info *inf = BVT_INFO(d->domain);
+-    struct bvt_vcpu_info *einf = EBVT_INFO(d);
+-    
+-    ranfor = (u32)(now - d->lastschd);
++    struct bvt_dom_info *inf = BVT_INFO(v->domain);
++    struct bvt_vcpu_info *einf = EBVT_INFO(v);
++    
++    ranfor = (u32)(now - v->runstate.state_entry_time);
+     mcus = (ranfor + MCU - 1)/MCU;
+ 
+     return einf->avt + mcus * inf->mcu_advance;
+@@ -262,7 +262,7 @@
+     curr_evt = calc_evt(curr, calc_avt(curr, now));
+     /* Calculate the time the current domain would run assuming
+        the second smallest evt is of the newly woken domain */
+-    r_time = curr->lastschd +
++    r_time = curr->runstate.state_entry_time +
+         ((einf->evt - curr_evt) / BVT_INFO(curr->domain)->mcu_advance) +
+         ctx_allow;
+ 
+@@ -558,7 +558,6 @@
+         printk("%3d: %u has=%c ", loop++, v->domain->domain_id,
+                test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F');
+         bvt_dump_runq_el(v);
+-        printk("c=0x%X%08X\n", (u32)(v->cpu_time>>32), (u32)v->cpu_time);
+         printk("         l: %p n: %p  p: %p\n",
+                &vcpu_inf->run_list, vcpu_inf->run_list.next,
+                vcpu_inf->run_list.prev);
+diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/sched_sedf.c
+--- a/xen/common/sched_sedf.c	Sat Feb 25 11:27:53 2006
++++ b/xen/common/sched_sedf.c	Sat Feb 25 16:58:37 2006
+@@ -1408,18 +1408,14 @@
+ {
+     printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
+            test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
+-    printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64
++    printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu"
+            " sc=%i xtr(%s)=%"PRIu64" ew=%hu",
+            EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
+-           EDOM_INFO(d)->weight, d->cpu_time,
++           EDOM_INFO(d)->weight,
+            EDOM_INFO(d)->score[EXTRA_UTIL_Q],
+            (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
+            EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
+     
+-    if ( d->cpu_time != 0 )
+-        printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
+-               / d->cpu_time);
+-
+ #ifdef SEDF_STATS
+     if ( EDOM_INFO(d)->block_time_tot != 0 )
+         printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
+diff -r a9f3abcc4149 -r 2303fb4682e7 xen/common/schedule.c
+--- a/xen/common/schedule.c	Sat Feb 25 11:27:53 2006
++++ b/xen/common/schedule.c	Sat Feb 25 16:58:37 2006
+@@ -36,14 +36,6 @@
+ static char opt_sched[10] = "sedf";
+ string_param("sched", opt_sched);
+ 
+-/*#define WAKE_HISTO*/
+-/*#define BLOCKTIME_HISTO*/
+-#if defined(WAKE_HISTO)
+-#define BUCKETS 31
+-#elif defined(BLOCKTIME_HISTO)
+-#define BUCKETS 200
+-#endif
+-
+ #define TIME_SLOP      (s32)MICROSECS(50)     /* allow time to slip a bit */
+ 
+ /* Various timer handlers. */
+@@ -73,6 +65,36 @@
+ /* Per-CPU periodic timer sends an event to the currently-executing domain. */
+ static struct timer t_timer[NR_CPUS]; 
+ 
++static inline void vcpu_runstate_change(
++    struct vcpu *v, int new_state, s_time_t new_entry_time)
++{
++    ASSERT(v->runstate.state != new_state);
++    ASSERT(spin_is_locked(&schedule_data[v->processor].schedule_lock));
++
++    v->runstate.time[v->runstate.state] +=
++        new_entry_time - v->runstate.state_entry_time;
++    v->runstate.state_entry_time = new_entry_time;
++    v->runstate.state = new_state;
++}
++
++void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
++{
++    if ( likely(v == current) )
++    {
++        /* Fast lock-free path. */
++        memcpy(runstate, &v->runstate, sizeof(*runstate));
++        ASSERT(runstate->state = RUNSTATE_running);
++        runstate->time[RUNSTATE_running] += NOW() - runstate->state_entry_time;
++    }
++    else
++    {
++        vcpu_schedule_lock_irq(v);
++        memcpy(runstate, &v->runstate, sizeof(*runstate));
++        runstate->time[runstate->state] += NOW() - runstate->state_entry_time;
++        vcpu_schedule_unlock_irq(v);
++    }
++}
++
+ struct domain *alloc_domain(void)
+ {
+     struct domain *d;
+@@ -119,6 +141,9 @@
+     v->cpu_affinity = is_idle_domain(d) ?
+         cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
+ 
++    v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
++    v->runstate.state_entry_time = NOW();
++
+     if ( (vcpu_id != 0) && !is_idle_domain(d) )
+         set_bit(_VCPUF_down, &v->vcpu_flags);
+ 
+@@ -165,8 +190,15 @@
+     unsigned long flags;
+ 
+     vcpu_schedule_lock_irqsave(v, flags);
++
+     if ( likely(!vcpu_runnable(v)) )
++    {
++        if ( v->runstate.state == RUNSTATE_runnable )
++            vcpu_runstate_change(v, RUNSTATE_offline, NOW());
++
+         SCHED_OP(sleep, v);
++    }
++
+     vcpu_schedule_unlock_irqrestore(v, flags);
+ 
+     TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
+@@ -187,11 +219,19 @@
+     unsigned long flags;
+ 
+     vcpu_schedule_lock_irqsave(v, flags);
++
+     if ( likely(vcpu_runnable(v)) )
+     {
++        if ( v->runstate.state >= RUNSTATE_blocked )
++            vcpu_runstate_change(v, RUNSTATE_runnable, NOW());
+         SCHED_OP(wake, v);
+-        v->wokenup = NOW();
+-    }
++    }
++    else if ( !test_bit(_VCPUF_blocked, &v->vcpu_flags) )
++    {
++        if ( v->runstate.state == RUNSTATE_blocked )
++            vcpu_runstate_change(v, RUNSTATE_offline, NOW());
++    }
++
+     vcpu_schedule_unlock_irqrestore(v, flags);
+ 
+     TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
+@@ -376,8 +416,6 @@
+ 
+     stop_timer(&schedule_data[cpu].s_timer);
+     
+-    prev->cpu_time += now - prev->lastschd;
+-
+     /* get policy-specific decision on scheduling... */
+     next_slice = ops.do_schedule(now);
+ 
+@@ -386,8 +424,6 @@
+ 
+     schedule_data[cpu].curr = next;
+     
+-    next->lastschd = now;
+-
+     set_timer(&schedule_data[cpu].s_timer, now + r_time);
+ 
+     if ( unlikely(prev == next) )
+@@ -397,38 +433,23 @@
+     }
+ 
+     TRACE_2D(TRC_SCHED_SWITCH_INFPREV,
+-             prev->domain->domain_id, now - prev->lastschd);
++             prev->domain->domain_id,
++             now - prev->runstate.state_entry_time);
+     TRACE_3D(TRC_SCHED_SWITCH_INFNEXT,
+-             next->domain->domain_id, now - next->wokenup, r_time);
+-
+-    /*
+-     * Logic of wokenup field in domain struct:
+-     * Used to calculate "waiting time", which is the time that a domain
+-     * spends being "runnable", but not actually running. wokenup is set
+-     * set whenever a domain wakes from sleeping. However, if wokenup is not
+-     * also set here then a preempted runnable domain will get a screwed up
+-     * "waiting time" value next time it is scheduled.
+-     */
+-    prev->wokenup = now;
+-
+-#if defined(WAKE_HISTO)
+-    if ( !is_idle_vcpu(next) && next->wokenup )
+-    {
+-        ulong diff = (ulong)(now - next->wokenup);
+-        diff /= (ulong)MILLISECS(1);
+-        if (diff <= BUCKETS-2)  schedule_data[cpu].hist[diff]++;
+-        else                    schedule_data[cpu].hist[BUCKETS-1]++;
+-    }
+-    next->wokenup = (s_time_t)0;
+-#elif defined(BLOCKTIME_HISTO)
+-    prev->lastdeschd = now;
+-    if ( !is_idle_vcpu(next) )
+-    {
+-        ulong diff = (ulong)((now - next->lastdeschd) / MILLISECS(10));
+-        if (diff <= BUCKETS-2)  schedule_data[cpu].hist[diff]++;
+-        else                    schedule_data[cpu].hist[BUCKETS-1]++;
+-    }
+-#endif
++             next->domain->domain_id,
++             (next->runstate.state == RUNSTATE_runnable) ?
++             (now - next->runstate.state_entry_time) : 0,
++             r_time);
++
++    ASSERT(prev->runstate.state == RUNSTATE_running);
++    vcpu_runstate_change(
++        prev,
++        (test_bit(_VCPUF_blocked, &prev->vcpu_flags) ? RUNSTATE_blocked :
++         (vcpu_runnable(prev) ? RUNSTATE_runnable : RUNSTATE_offline)),
++        now);
++
++    ASSERT(next->runstate.state != RUNSTATE_running);
++    vcpu_runstate_change(next, RUNSTATE_running, now);
+ 
+     ASSERT(!test_bit(_VCPUF_running, &next->vcpu_flags));
+     set_bit(_VCPUF_running, &next->vcpu_flags);
+@@ -567,47 +588,6 @@
+ 
+     local_irq_restore(flags);
+ }
+-
+-#if defined(WAKE_HISTO) || defined(BLOCKTIME_HISTO)
+-
+-void print_sched_histo(unsigned char key)
+-{
+-    int i, j, k;
+-    for_each_online_cpu ( k )
+-    {
+-        j = 0;
+-        printf ("CPU[%02d]: scheduler latency histogram (ms:[count])\n", k);
+-        for ( i = 0; i < BUCKETS; i++ )
+-        {
+-            if ( schedule_data[k].hist[i] != 0 )
+-            {
+-                if ( i < BUCKETS-1 )
+-                    printk("%2d:[%7u]    ", i, schedule_data[k].hist[i]);
+-                else
+-                    printk(" >:[%7u]    ", schedule_data[k].hist[i]);
+-                if ( !(++j % 5) )
+-                    printk("\n");
+-            }
+-        }
+-        printk("\n");
+-    }
+-      
+-}
+-
+-void reset_sched_histo(unsigned char key)
+-{
+-    int i, j;
+-    for ( j = 0; j < NR_CPUS; j++ )
+-        for ( i=0; i < BUCKETS; i++ ) 
+-            schedule_data[j].hist[i] = 0;
+-}
+-
+-#else
+-
+-void print_sched_histo(unsigned char key) { }
+-void reset_sched_histo(unsigned char key) { }
+-
+-#endif
+ 
+ /*
+  * Local variables:
+diff -r a9f3abcc4149 -r 2303fb4682e7 xen/include/public/vcpu.h
+--- a/xen/include/public/vcpu.h	Sat Feb 25 11:27:53 2006
++++ b/xen/include/public/vcpu.h	Sat Feb 25 16:58:37 2006
+@@ -51,6 +51,40 @@
  /* Returns 1 if the given VCPU is up. */
  #define VCPUOP_is_up                3
  
 +/*
-+ * Get information on how much CPU time this VCPU has used, etc...
-+ *
-+ * @extra_arg == pointer to an empty dom0_getvcpuinfo_t, the "OUT" variables
-+ *               of which filled in with scheduler info.
++ * Return information about the state and running time of a VCPU.
++ * @extra_arg == pointer to xen_vcpu_info structure.
++ */
++#define VCPUOP_get_runstate_info    4
++typedef struct vcpu_runstate_info {
++    /* VCPU's current state (RUNSTATE_*). */
++    int      state;
++    /* When was current state entered (system time, ns)? */
++    uint64_t state_entry_time;
++    /*
++     * Time spent in each RUNSTATE_* (ns). The sum of these times is
++     * guaranteed not to drift from system time.
++     */
++    uint64_t time[4];
++} vcpu_runstate_info_t;
++
++/* VCPU is currently running on a physical CPU. */
++#define RUNSTATE_running  0
++
++/* VCPU is runnable, but not currently scheduled on any physical CPU. */
++#define RUNSTATE_runnable 1
++
++/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */
++#define RUNSTATE_blocked  2
++
++/*
++ * VCPU is not runnable, but it is not blocked.
++ * This is a 'catch all' state for things like hotplug and pauses by the
++ * system administrator (or for critical sections in the hypervisor).
++ * RUNSTATE_blocked dominates this state (it is the preferred state).
 + */
-+#define VCPUOP_cpu_info             4
++#define RUNSTATE_offline  3
 +
  #endif /* __XEN_PUBLIC_VCPU_H__ */
  
  /*
---- xen/common/domain.c.steal	2006-02-07 18:01:40.000000000 -0500
-+++ xen/common/domain.c	2006-02-17 13:52:44.000000000 -0500
-@@ -451,8 +451,24 @@
-     case VCPUOP_is_up:
-         rc = !test_bit(_VCPUF_down, &v->vcpu_flags);
-         break;
-+
-+    case VCPUOP_cpu_info:
-+	{
-+	    struct dom0_getvcpuinfo vi = { 0, };
-+	    vi.online = !test_bit(_VCPUF_down, &v->vcpu_flags);
-+	    vi.blocked = test_bit(_VCPUF_blocked, &v->vcpu_flags);
-+	    vi.running  = test_bit(_VCPUF_running, &v->vcpu_flags);
-+	    vi.cpu_time = v->cpu_time;
-+	    vi.cpu = v->processor;
-+	    rc = 0;
-+
-+	    if ( copy_to_user(arg, &vi, sizeof(dom0_getvcpuinfo_t)) )
-+		rc = -EFAULT;
-+	    break;
-+	}
-     }
+diff -r a9f3abcc4149 -r 2303fb4682e7 xen/include/xen/sched-if.h
+--- a/xen/include/xen/sched-if.h	Sat Feb 25 11:27:53 2006
++++ b/xen/include/xen/sched-if.h	Sat Feb 25 16:58:37 2006
+@@ -8,9 +8,6 @@
+ #ifndef __XEN_SCHED_IF_H__
+ #define __XEN_SCHED_IF_H__
+ 
+-#define BUCKETS  10
+-/*300*/
+-
+ struct schedule_data {
+     spinlock_t          schedule_lock;  /* spinlock protecting curr        */
+     struct vcpu        *curr;           /* current task                    */
+@@ -18,9 +15,6 @@
+     void               *sched_priv;
+     struct timer        s_timer;        /* scheduling timer                */
+     unsigned long       tick;           /* current periodic 'tick'         */
+-#ifdef BUCKETS
+-    u32                 hist[BUCKETS];  /* for scheduler latency histogram */
+-#endif
+ } __cacheline_aligned;
+ 
+ extern struct schedule_data schedule_data[];
+diff -r a9f3abcc4149 -r 2303fb4682e7 xen/include/xen/sched.h
+--- a/xen/include/xen/sched.h	Sat Feb 25 11:27:53 2006
++++ b/xen/include/xen/sched.h	Sat Feb 25 16:58:37 2006
+@@ -8,6 +8,7 @@
+ #include <xen/smp.h>
+ #include <public/xen.h>
+ #include <public/dom0_ops.h>
++#include <public/vcpu.h>
+ #include <xen/time.h>
+ #include <xen/timer.h>
+ #include <xen/grant_table.h>
+@@ -63,14 +64,12 @@
+ 
+     struct vcpu     *next_in_list;
  
+-    struct timer  timer;         /* one-shot timer for timeout values */
++    struct timer     timer;         /* one-shot timer for timeout values */
+     unsigned long    sleep_tick;    /* tick at which this vcpu started sleep */
+ 
+-    s_time_t         lastschd;      /* time this domain was last scheduled */
+-    s_time_t         lastdeschd;    /* time this domain was last descheduled */
+-    s_time_t         cpu_time;      /* total CPU time received till now */
+-    s_time_t         wokenup;       /* time domain got woken up */
+     void            *sched_priv;    /* scheduler-specific data */
 +
-     return rc;
- }
++    struct vcpu_runstate_info runstate;
+ 
+     unsigned long    vcpu_flags;
+ 
+@@ -397,7 +396,6 @@
+ #define _DOMF_debugging        4
+ #define DOMF_debugging         (1UL<<_DOMF_debugging)
  
+-
+ static inline int vcpu_runnable(struct vcpu *v)
+ {
+     return ( (atomic_read(&v->pausecnt) == 0) &&
+@@ -415,6 +413,8 @@
+ 
+ int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
+ 
++void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
++
+ static inline void vcpu_unblock(struct vcpu *v)
+ {
+     if ( test_and_clear_bit(_VCPUF_blocked, &v->vcpu_flags) )
+
+_______________________________________________
+Xen-changelog mailing list
+Xen-changelog at lists.xensource.com
+http://lists.xensource.com/xen-changelog

linux-2.6-xen-cpusteal-kernel.patch:
 b/linux/include/xen/interface/vcpu.h             |   34 ++++++++
 linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c |   95 ++++++++++++++++++++---
 2 files changed, 117 insertions(+), 12 deletions(-)

Index: linux-2.6-xen-cpusteal-kernel.patch
===================================================================
RCS file: /cvs/dist/rpms/kernel/devel/linux-2.6-xen-cpusteal-kernel.patch,v
retrieving revision 1.2.2.1
retrieving revision 1.2.2.2
diff -u -r1.2.2.1 -r1.2.2.2
--- linux-2.6-xen-cpusteal-kernel.patch	28 Feb 2006 20:16:10 -0000	1.2.2.1
+++ linux-2.6-xen-cpusteal-kernel.patch	28 Feb 2006 21:45:29 -0000	1.2.2.2
@@ -1,24 +1,42 @@
-Estimate and account for the CPU steal time.  That is, the time during
-which we wanted to run, but the hypervisor scheduled in another
-process instead.  This can be used to help administrators in capacity
-planning, by clearly being able to distinguish whether the system is
-too slow or this guest is simply not getting enough CPU time.
+From patchbot-unstable at lists.xensource.com Sat Feb 25 17:49:47 2006
+Date: Sat, 25 Feb 2006 22:48:07 +0000
+From: Xen patchbot -unstable <patchbot-unstable at lists.xensource.com>
+Reply-To: xen-devel at lists.xensource.com
+To: xen-changelog at lists.xensource.com
+Subject: [Xen-changelog] Update Linux time IRQ handler to understand the new stolen/blocked cycle counts
 
-This is the i386 version, I'll send the implementation for x86_64 soon.
+# HG changeset patch
+# User kaf24 at firebug.cl.cam.ac.uk
+# Node ID c375c210945282c2c5fd6cb86f51422e211ed8a2
+# Parent  2303fb4682e7cd4feb330fd2aec69672facb4ec6
+Update Linux time IRQ handler to understand the new stolen/blocked cycle counts
+exported by Xen. This is based heavily on a patch from Rik van Riel, but
+extended to distinguish between idle/blocked cycles and stolen cycles.
+
+There is still stuff todo:
+ 1. Xen should export the time values in shared memory, to save a hypercall
+    on every time interrupt (even though that is only every 10ms, worst case).
+ 2. As in s390, Xen's cputime_t should be measured at finer granularity than
+    jiffies. Nanoseconds would be a better unit.
+ 3. Break out the internals of the account_steal_time() interface so that we don't
+    need to abuse it so wretchedly.
 
 Signed-off-by: Rik van Riel <riel at redhat.com>
+Signed-off-by: Keir Fraser <keir at xensource.com>
 
---- linux-2.6.15.i686/arch/i386/kernel/time-xen.c.steal	2006-02-17 16:44:40.000000000 -0500
-+++ linux-2.6.15.i686/arch/i386/kernel/time-xen.c	2006-02-20 18:31:26.000000000 -0500
-@@ -48,6 +48,7 @@
+diff -r 2303fb4682e7 -r c375c2109452 linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c
+--- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c	Sat Feb 25 16:58:37 2006
++++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c	Sat Feb 25 19:07:28 2006
+@@ -48,6 +48,8 @@
  #include <linux/mca.h>
  #include <linux/sysctl.h>
  #include <linux/percpu.h>
 +#include <linux/kernel_stat.h>
++#include <linux/posix-timers.h>
  
  #include <asm/io.h>
  #include <asm/smp.h>
-@@ -77,6 +78,7 @@
+@@ -70,6 +72,7 @@
  #include <asm/arch_hooks.h>
  
  #include <xen/evtchn.h>
@@ -26,63 +44,192 @@
  
  #if defined (__i386__)
  #include <asm/i8259.h>
-@@ -624,7 +626,43 @@ irqreturn_t timer_interrupt(int irq, voi
-          * Local CPU jiffy work. No need to hold xtime_lock, and I'm not sure
-          * if there is risk of deadlock if we do (since update_process_times
-          * may do scheduler rebalancing work and thus acquire runqueue locks).
-+	 *
-+	 * If we have not run for a while, chances are this vcpu got scheduled
-+	 * away.  Try to estimate how much time was stolen.
-          */
-+	if (delta_cpu > (s64)(2 * NS_PER_TICK)) {
-+		dom0_getvcpuinfo_t vcpu = { 0, };
-+		cputime64_t total;	/* In jiffies, not nanoseconds. */
-+		s64 cputicks, deltaticks, steal;
-+
-+		if (HYPERVISOR_vcpu_op(VCPUOP_cpu_info, cpu, &vcpu) == 0) {
-+			total = kstat_cpu(cpu).cpustat.user +
-+				kstat_cpu(cpu).cpustat.nice +
-+				kstat_cpu(cpu).cpustat.system +
-+				kstat_cpu(cpu).cpustat.softirq +
-+				kstat_cpu(cpu).cpustat.irq;
-+
-+			/* do_div modifies the variable in place. YUCK */
-+			deltaticks = delta_cpu;
-+			cputicks = vcpu.cpu_time;
-+			do_div(cputicks, NS_PER_TICK);
-+			do_div(deltaticks, NS_PER_TICK);
-+
-+			steal = total + deltaticks - cputicks;
-+			if (unlikely(steal < 0))
-+				steal = 0;
-+			else if (unlikely(steal > deltaticks))
-+				steal = deltaticks;
-+
-+			if (steal > 0) {
-+				delta_cpu -= steal * NS_PER_TICK;
-+				per_cpu(processed_system_time, cpu) +=
-+							steal * NS_PER_TICK;
-+				account_steal_time(current, (cputime_t)steal);
-+			}
+@@ -122,6 +125,10 @@
+ /* Keep track of last time we did processing/updating of jiffies and xtime. */
+ static u64 processed_system_time;   /* System time (ns) at last processing. */
+ static DEFINE_PER_CPU(u64, processed_system_time);
++
++/* How much CPU time was spent blocked and how much was 'stolen'? */
++static DEFINE_PER_CPU(u64, processed_stolen_time);
++static DEFINE_PER_CPU(u64, processed_blocked_time);
+ 
+ /* Must be signed, as it's compared with s64 quantities which can be -ve. */
+ #define NS_PER_TICK (1000000000LL/HZ)
+@@ -567,9 +574,10 @@
+ 
+ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+ {
+-	s64 delta, delta_cpu;
++	s64 delta, delta_cpu, stolen, blocked;
+ 	int i, cpu = smp_processor_id();
+ 	struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
++	struct vcpu_runstate_info runstate;
+ 
+ 	write_seqlock(&xtime_lock);
+ 
+@@ -611,19 +619,79 @@
+ 
+ 	write_sequnlock(&xtime_lock);
+ 
+-	/*
+-         * Local CPU jiffy work. No need to hold xtime_lock, and I'm not sure
+-         * if there is risk of deadlock if we do (since update_process_times
+-         * may do scheduler rebalancing work and thus acquire runqueue locks).
+-         */
+-	while (delta_cpu >= NS_PER_TICK) {
+-		delta_cpu -= NS_PER_TICK;
+-		per_cpu(processed_system_time, cpu) += NS_PER_TICK;
+-		update_process_times(user_mode(regs));
+-		profile_tick(CPU_PROFILING, regs);
+-	}
++	/* Obtain stolen/blocked cycles, if the hypervisor supports it. */
++	if (HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info,
++			       cpu, &runstate) == 0) {
++		/*
++		 * Account stolen ticks.
++		 * HACK: Passing NULL to account_steal_time()
++		 * ensures that the ticks are accounted as stolen.
++		 */
++		stolen = runstate.time[RUNSTATE_runnable] +
++			runstate.time[RUNSTATE_offline] -
++			per_cpu(processed_stolen_time, cpu);
++		if (unlikely(stolen < 0)) /* clock jitter */
++			stolen = 0;
++		delta_cpu -= stolen;
++		if (unlikely(delta_cpu < 0)) {
++			stolen += delta_cpu;
++			delta_cpu = 0;
++		}
++		do_div(stolen, NS_PER_TICK);
++		per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK;
++		account_steal_time(NULL, (cputime_t)stolen);
++
++		/*
++		 * Account blocked ticks.
++		 * HACK: Passing idle_task to account_steal_time()
++		 * ensures that the ticks are accounted as idle/wait.
++		 */
++		blocked = runstate.time[RUNSTATE_blocked] -
++			per_cpu(processed_blocked_time, cpu);
++		if (unlikely(blocked < 0)) /* clock jitter */
++			blocked = 0;
++		delta_cpu -= blocked;
++		if (unlikely(delta_cpu < 0)) {
++			blocked += delta_cpu;
++			delta_cpu = 0;
 +		}
++		do_div(blocked, NS_PER_TICK);
++		per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK;
++		account_steal_time(idle_task(cpu), (cputime_t)blocked);
++
++		per_cpu(processed_system_time, cpu) +=
++			(stolen + blocked) * NS_PER_TICK;
++	}
++
++	if (delta_cpu > 0) {
++		do_div(delta_cpu, NS_PER_TICK);
++		if (user_mode(regs))
++			account_user_time(current, (cputime_t)delta_cpu);
++		else
++			account_system_time(current, HARDIRQ_OFFSET,
++					    (cputime_t)delta_cpu);
++		per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
 +	}
 +
- 	while (delta_cpu >= NS_PER_TICK) {
- 		delta_cpu -= NS_PER_TICK;
- 		per_cpu(processed_system_time, cpu) += NS_PER_TICK;
---- linux-2.6.15.i686/include/xen/interface/vcpu.h.steal	2006-02-17 16:14:17.000000000 -0500
-+++ linux-2.6.15.i686/include/xen/interface/vcpu.h	2006-02-17 16:14:52.000000000 -0500
-@@ -51,6 +51,14 @@
++	run_local_timers();
++	if (rcu_pending(cpu))
++		rcu_check_callbacks(cpu, user_mode(regs));
++	scheduler_tick();
++	run_posix_cpu_timers(current);
+ 
+ 	return IRQ_HANDLED;
++}
++
++static void init_missing_ticks_accounting(int cpu)
++{
++	struct vcpu_runstate_info runstate = { 0 };
++
++	HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, cpu, &runstate);
++
++	per_cpu(processed_blocked_time, cpu) = runstate.time[RUNSTATE_blocked];
++	per_cpu(processed_stolen_time, cpu) =
++		runstate.time[RUNSTATE_runnable] +
++		runstate.time[RUNSTATE_offline];
+ }
+ 
+ /* not static: needed by APM */
+@@ -814,6 +882,7 @@
+ 
+ 	processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
+ 	per_cpu(processed_system_time, 0) = processed_system_time;
++	init_missing_ticks_accounting(0);
+ 
+ 	update_wallclock();
+ 
+@@ -891,6 +960,7 @@
+ 
+ 	processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
+ 	per_cpu(processed_system_time, 0) = processed_system_time;
++	init_missing_ticks_accounting(0);
+ 
+ 	update_wallclock();
+ }
+@@ -909,6 +979,7 @@
+ 		/* Use cpu0 timestamp: cpu's shadow is not initialised yet. */
+ 		per_cpu(processed_system_time, cpu) = 
+ 			per_cpu(shadow_time, 0).system_timestamp;
++		init_missing_ticks_accounting(cpu);
+ 	} while (read_seqretry(&xtime_lock, seq));
+ 
+ 	sprintf(timer_name[cpu], "timer%d", cpu);
+
+_______________________________________________
+Xen-changelog mailing list
+Xen-changelog at lists.xensource.com
+http://lists.xensource.com/xen-changelog
+
+
+
+diff -r a9f3abcc4149 -r 2303fb4682e7 xen/include/public/vcpu.h
+--- a/linux/include/xen/interface/vcpu.h	Sat Feb 25 11:27:53 2006
++++ b/linux/include/xen/interface/vcpu.h	Sat Feb 25 16:58:37 2006
+@@ -51,6 +51,40 @@
  /* Returns 1 if the given VCPU is up. */
  #define VCPUOP_is_up                3
  
 +/*
-+ * Get information on how much CPU time this VCPU has used, etc...
-+ *
-+ * @extra_arg == pointer to an empty dom0_getvcpuinfo_t, the "OUT" variables
-+ *               of which filled in with scheduler info.
++ * Return information about the state and running time of a VCPU.
++ * @extra_arg == pointer to xen_vcpu_info structure.
++ */
++#define VCPUOP_get_runstate_info    4
++typedef struct vcpu_runstate_info {
++    /* VCPU's current state (RUNSTATE_*). */
++    int      state;
++    /* When was current state entered (system time, ns)? */
++    uint64_t state_entry_time;
++    /*
++     * Time spent in each RUNSTATE_* (ns). The sum of these times is
++     * guaranteed not to drift from system time.
++     */
++    uint64_t time[4];
++} vcpu_runstate_info_t;
++
++/* VCPU is currently running on a physical CPU. */
++#define RUNSTATE_running  0
++
++/* VCPU is runnable, but not currently scheduled on any physical CPU. */
++#define RUNSTATE_runnable 1
++
++/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */
++#define RUNSTATE_blocked  2
++
++/*
++ * VCPU is not runnable, but it is not blocked.
++ * This is a 'catch all' state for things like hotplug and pauses by the
++ * system administrator (or for critical sections in the hypervisor).
++ * RUNSTATE_blocked dominates this state (it is the preferred state).
 + */
-+#define VCPUOP_cpu_info             4
++#define RUNSTATE_offline  3
 +
  #endif /* __XEN_PUBLIC_VCPU_H__ */
  




More information about the fedora-cvs-commits mailing list