rpms/kernel/F-9 linux-2.6-cpuidle-1-do-not-use-poll_idle-unless-user-asks-for-it.patch, NONE, 1.1 linux-2.6-cpuidle-2-menu-governor-fix-wrong-usage-of-measured_us.patch, NONE, 1.1 linux-2.6-cpuidle-3-make-ladder-governor-honor-latency-requirements.patch, NONE, 1.1 kernel.spec, 1.750, 1.751

Chuck Ebbert cebbert at fedoraproject.org
Sun Aug 31 01:29:14 UTC 2008


Author: cebbert

Update of /cvs/pkgs/rpms/kernel/F-9
In directory cvs1.fedora.phx.redhat.com:/tmp/cvs-serv10606

Modified Files:
	kernel.spec 
Added Files:
	linux-2.6-cpuidle-1-do-not-use-poll_idle-unless-user-asks-for-it.patch 
	linux-2.6-cpuidle-2-menu-governor-fix-wrong-usage-of-measured_us.patch 
	linux-2.6-cpuidle-3-make-ladder-governor-honor-latency-requirements.patch 
Log Message:
Fix cpuidle misbehavior. (#459214)

linux-2.6-cpuidle-1-do-not-use-poll_idle-unless-user-asks-for-it.patch:

--- NEW FILE linux-2.6-cpuidle-1-do-not-use-poll_idle-unless-user-asks-for-it.patch ---
From: venkatesh.pallipadi at intel.com <venkatesh.pallipadi at intel.com>
Date: Thu, 31 Jul 2008 02:21:42 +0000 (-0700)
Subject: cpuidle: Do not use poll_idle unless user asks for it
X-Git-Tag: v2.6.27-rc4~59^2^2~2
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=a2bd92023357e47f22a34d4cb1635453546662bc

cpuidle: Do not use poll_idle unless user asks for it

poll_idle was added to CPUIDLE, just as a low latency idle handler, to be
used in cases when user desires CPUs not to enter any idle state at all. It
was supposed to be a run time idle=poll option to the user. But, it was indeed
getting used during normal menu and ladder governor default case, with no
special user setting (Reported by Linus Torvalds).

Change below ensures that poll_idle will not be used unless user explicitly
asks pm_qos infrastructure for zero latency requirement.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi at intel.com>
Signed-off-by: Andi Kleen <ak at linux.intel.com>
---

diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index ba7b9a6..27ab3bf 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -67,10 +67,17 @@ static int ladder_select_state(struct cpuidle_device *dev)
 	struct ladder_device *ldev = &__get_cpu_var(ladder_devices);
 	struct ladder_device_state *last_state;
 	int last_residency, last_idx = ldev->last_state_idx;
+	int latency_req = pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY);
 
 	if (unlikely(!ldev))
 		return 0;
 
+	/* Special case when user has set very strict latency requirement */
+	if (unlikely(latency_req == 0)) {
+		ladder_do_selection(ldev, last_idx, 0);
+		return 0;
+	}
+
 	last_state = &ldev->states[last_idx];
 
 	if (dev->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID)
@@ -81,8 +88,7 @@ static int ladder_select_state(struct cpuidle_device *dev)
 	/* consider promotion */
 	if (last_idx < dev->state_count - 1 &&
 	    last_residency > last_state->threshold.promotion_time &&
-	    dev->states[last_idx + 1].exit_latency <=
-			pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
+	    dev->states[last_idx + 1].exit_latency <= latency_req) {
 		last_state->stats.promotion_count++;
 		last_state->stats.demotion_count = 0;
 		if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
@@ -92,7 +98,7 @@ static int ladder_select_state(struct cpuidle_device *dev)
 	}
 
 	/* consider demotion */
-	if (last_idx > 0 &&
+	if (last_idx > CPUIDLE_DRIVER_STATE_START &&
 	    last_residency < last_state->threshold.demotion_time) {
 		last_state->stats.demotion_count++;
 		last_state->stats.promotion_count = 0;
@@ -117,7 +123,7 @@ static int ladder_enable_device(struct cpuidle_device *dev)
 	struct ladder_device_state *lstate;
 	struct cpuidle_state *state;
 
-	ldev->last_state_idx = 0;
+	ldev->last_state_idx = CPUIDLE_DRIVER_STATE_START;
 
 	for (i = 0; i < dev->state_count; i++) {
 		state = &dev->states[i];
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 78d77c5..b8f3e21 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -34,21 +34,28 @@ static DEFINE_PER_CPU(struct menu_device, menu_devices);
 static int menu_select(struct cpuidle_device *dev)
 {
 	struct menu_device *data = &__get_cpu_var(menu_devices);
+	int latency_req = pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY);
 	int i;
 
+	/* Special case when user has set very strict latency requirement */
+	if (unlikely(latency_req == 0)) {
+		data->last_state_idx = 0;
+		return 0;
+	}
+
 	/* determine the expected residency time */
 	data->expected_us =
 		(u32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000;
 
 	/* find the deepest idle state that satisfies our constraints */
-	for (i = 1; i < dev->state_count; i++) {
+	for (i = CPUIDLE_DRIVER_STATE_START + 1; i < dev->state_count; i++) {
 		struct cpuidle_state *s = &dev->states[i];
 
 		if (s->target_residency > data->expected_us)
 			break;
 		if (s->target_residency > data->predicted_us)
 			break;
-		if (s->exit_latency > pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY))
+		if (s->exit_latency > latency_req)
 			break;
 	}
 

linux-2.6-cpuidle-2-menu-governor-fix-wrong-usage-of-measured_us.patch:

--- NEW FILE linux-2.6-cpuidle-2-menu-governor-fix-wrong-usage-of-measured_us.patch ---
From: venkatesh.pallipadi at intel.com <venkatesh.pallipadi at intel.com>
Date: Thu, 31 Jul 2008 02:21:43 +0000 (-0700)
Subject: cpuidle: Menu governor fix wrong usage of measured_us
X-Git-Tag: v2.6.27-rc4~59^2^2~1
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=320eee776357db52d6fcfb11cff985b1976a4595

cpuidle: Menu governor fix wrong usage of measured_us

There is a bug in menu governor where we have
		if (data->elapsed_us < data->elapsed_us + measured_us)

with measured_us already having elapsed_us added in tickless case here
	unsigned int measured_us =
		cpuidle_get_last_residency(dev) + data->elapsed_us;

Also, it should be last_residency, not measured_us, that need to be used to
do comparing and distinguish between expected & non-expected events.

Refactor menu_reflect() to fix these two problems.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi at intel.com>
Signed-off-by: Wei Gang <gang.wei at intel.com>
Signed-off-by: Andi Kleen <ak at linux.intel.com>
---

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index b8f3e21..8d7cf3f 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -74,9 +74,9 @@ static void menu_reflect(struct cpuidle_device *dev)
 {
 	struct menu_device *data = &__get_cpu_var(menu_devices);
 	int last_idx = data->last_state_idx;
-	unsigned int measured_us =
-		cpuidle_get_last_residency(dev) + data->elapsed_us;
+	unsigned int last_idle_us = cpuidle_get_last_residency(dev);
 	struct cpuidle_state *target = &dev->states[last_idx];
+	unsigned int measured_us;
 
 	/*
 	 * Ugh, this idle state doesn't support residency measurements, so we
@@ -84,20 +84,27 @@ static void menu_reflect(struct cpuidle_device *dev)
 	 * for one full standard timer tick.  However, be aware that this
 	 * could potentially result in a suboptimal state transition.
 	 */
-	if (!(target->flags & CPUIDLE_FLAG_TIME_VALID))
-		measured_us = USEC_PER_SEC / HZ;
+	if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID)))
+		last_idle_us = USEC_PER_SEC / HZ;
 
-	/* Predict time remaining until next break event */
-	if (measured_us + BREAK_FUZZ < data->expected_us - target->exit_latency) {
-		data->predicted_us = max(measured_us, data->last_measured_us);
+	/*
+	 * measured_us and elapsed_us are the cumulative idle time, since the
+	 * last time we were woken out of idle by an interrupt.
+	 */
+	if (data->elapsed_us <= data->elapsed_us + last_idle_us)
+		measured_us = data->elapsed_us + last_idle_us;
+	else
+		measured_us = -1;
+
+	/* Predict time until next break event */
+	data->predicted_us = max(measured_us, data->last_measured_us);
+
+	if (last_idle_us + BREAK_FUZZ <
+	    data->expected_us - target->exit_latency) {
 		data->last_measured_us = measured_us;
 		data->elapsed_us = 0;
 	} else {
-		if (data->elapsed_us < data->elapsed_us + measured_us)
-			data->elapsed_us = measured_us;
-		else
-			data->elapsed_us = -1;
-		data->predicted_us = max(measured_us, data->last_measured_us);
+		data->elapsed_us = measured_us;
 	}
 }
 

linux-2.6-cpuidle-3-make-ladder-governor-honor-latency-requirements.patch:

--- NEW FILE linux-2.6-cpuidle-3-make-ladder-governor-honor-latency-requirements.patch ---
From: venkatesh.pallipadi at intel.com <venkatesh.pallipadi at intel.com>
Date: Thu, 31 Jul 2008 02:21:44 +0000 (-0700)
Subject: cpuidle: Make ladder governor honor latency requirements fully
X-Git-Tag: v2.6.27-rc4~59^2^2
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=06d9e908b2248f983b186aaf569c58e1430db85d

cpuidle: Make ladder governor honor latency requirements fully

ladder governor only honored latency requirement when promoting C-states.
Instead. it should check for latency requirement on each idle call,
and demote to appropriate C-state when there is a latency requirement change.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi at intel.com>
Signed-off-by: Andi Kleen <ak at linux.intel.com>
---

diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 27ab3bf..a4bec3f 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -99,6 +99,18 @@ static int ladder_select_state(struct cpuidle_device *dev)
 
 	/* consider demotion */
 	if (last_idx > CPUIDLE_DRIVER_STATE_START &&
+	    dev->states[last_idx].exit_latency > latency_req) {
+		int i;
+
+		for (i = last_idx - 1; i > CPUIDLE_DRIVER_STATE_START; i--) {
+			if (dev->states[i].exit_latency <= latency_req)
+				break;
+		}
+		ladder_do_selection(ldev, last_idx, i);
+		return i;
+	}
+
+	if (last_idx > CPUIDLE_DRIVER_STATE_START &&
 	    last_residency < last_state->threshold.demotion_time) {
 		last_state->stats.demotion_count++;
 		last_state->stats.promotion_count = 0;


Index: kernel.spec
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/F-9/kernel.spec,v
retrieving revision 1.750
retrieving revision 1.751
diff -u -r1.750 -r1.751
--- kernel.spec	31 Aug 2008 01:22:56 -0000	1.750
+++ kernel.spec	31 Aug 2008 01:28:44 -0000	1.751
@@ -639,6 +639,9 @@
 Patch700: linux-2.6-nfs-client-mounts-hang.patch
 
 Patch800: linux-2.6-acpi-processor-use-signed-int.patch
+Patch810: linux-2.6-cpuidle-1-do-not-use-poll_idle-unless-user-asks-for-it.patch
+Patch820: linux-2.6-cpuidle-2-menu-governor-fix-wrong-usage-of-measured_us.patch
+Patch830: linux-2.6-cpuidle-3-make-ladder-governor-honor-latency-requirements.patch
 
 Patch1101: linux-2.6-default-mmf_dump_elf_headers.patch
 Patch1400: linux-2.6-smarter-relatime.patch
@@ -1076,6 +1079,10 @@
 # ACPI
 # obvious bug in processor driver
 ApplyPatch linux-2.6-acpi-processor-use-signed-int.patch
+# fix cpuidle misbehavior
+ApplyPatch linux-2.6-cpuidle-1-do-not-use-poll_idle-unless-user-asks-for-it.patch
+ApplyPatch linux-2.6-cpuidle-2-menu-governor-fix-wrong-usage-of-measured_us.patch
+ApplyPatch linux-2.6-cpuidle-3-make-ladder-governor-honor-latency-requirements.patch
 
 # Various low-impact patches to aid debugging.
 ApplyPatch linux-2.6-debug-sizeof-structs.patch
@@ -1802,6 +1809,9 @@
 %kernel_variant_files -a /%{image_install_path}/xen*-%{KVERREL}.xen -e /etc/ld.so.conf.d/kernelcap-%{KVERREL}.xen.conf %{with_xen} xen
 
 %changelog
+* Sat Aug 30 2008 Chuck Ebbert <cebbert at redhat.com> 2.6.26.3-24
+- Fix cpuidle misbehavior. (#459214)
+
 * Sat Aug 30 2008 Chuck Ebbert <cebbert at redhat.com> 2.6.26.3-23
 - Add two bio patches scheduled for -stable.
 




More information about the fedora-extras-commits mailing list