rpms/kernel/F-12 linux-2.6-ksm-updates-from-32.patch, NONE, 1.1 kernel.spec, 1.1913, 1.1914 linux-2.6-ksm-kvm.patch, 1.3, 1.4

Justin M. Forbes jforbes at fedoraproject.org
Wed Nov 11 17:13:02 UTC 2009


Author: jforbes

Update of /cvs/pkgs/rpms/kernel/F-12
In directory cvs1.fedora.phx.redhat.com:/tmp/cvs-serv30108

Modified Files:
	kernel.spec linux-2.6-ksm-kvm.patch 
Added Files:
	linux-2.6-ksm-updates-from-32.patch 
Log Message:
Add KSM fixes from 2.6.32 and fix KSM for i686 users (#532215)

linux-2.6-ksm-updates-from-32.patch:
 b/mm/ksm.c            |   15 +++++++++++----
 include/linux/ksm.h   |   20 --------------------
 include/linux/sched.h |    2 +-
 mm/ksm.c              |   46 ++++++++++++++++++++++++++--------------------
 mm/mmap.c             |    2 --
 mm/mremap.c           |    9 +++++----
 mm/oom_kill.c         |    2 +-
 mm/swapfile.c         |    5 ++---
 8 files changed, 46 insertions(+), 55 deletions(-)

--- NEW FILE linux-2.6-ksm-updates-from-32.patch ---
Date: 	Sat, 5 Sep 2009 22:22:23 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
To: Andrew Morton <akpm at linux-foundation.org>
cc: Izik Eidus <ieidus at redhat.com>, Andrea Arcangeli <aarcange at redhat.com>,
        linux-kernel at vger.kernel.org, linux-mm at kvack.org
Subject: [PATCH 1/3] ksm: clean up obsolete references

A few cleanups, given the munlock fix: the comment on ksm_test_exit()
no longer applies, and it can be made private to ksm.c; there's no
more reference to mmu_gather or tlb.h, and mmap.c doesn't need ksm.h.

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---

 include/linux/ksm.h |   20 --------------------
 mm/ksm.c            |   14 +++++++++++++-
 mm/mmap.c           |    1 -
 3 files changed, 13 insertions(+), 22 deletions(-)

--- mmotm/include/linux/ksm.h	2009-09-05 14:40:16.000000000 +0100
+++ linux/include/linux/ksm.h	2009-09-05 16:41:55.000000000 +0100
@@ -12,8 +12,6 @@
 #include <linux/sched.h>
 #include <linux/vmstat.h>
 
-struct mmu_gather;
-
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
@@ -27,19 +25,6 @@ static inline int ksm_fork(struct mm_str
 	return 0;
 }
 
-/*
- * For KSM to handle OOM without deadlock when it's breaking COW in a
- * likely victim of the OOM killer, exit_mmap() has to serialize with
- * ksm_exit() after freeing mm's pages but before freeing its page tables.
- * That leaves a window in which KSM might refault pages which have just
- * been finally unmapped: guard against that with ksm_test_exit(), and
- * use it after getting mmap_sem in ksm.c, to check if mm is exiting.
- */
-static inline bool ksm_test_exit(struct mm_struct *mm)
-{
-	return atomic_read(&mm->mm_users) == 0;
-}
-
 static inline void ksm_exit(struct mm_struct *mm)
 {
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
@@ -78,11 +63,6 @@ static inline int ksm_fork(struct mm_str
 {
 	return 0;
 }
-
-static inline bool ksm_test_exit(struct mm_struct *mm)
-{
-	return 0;
-}
 
 static inline void ksm_exit(struct mm_struct *mm)
 {
--- mmotm/mm/ksm.c	2009-09-05 14:40:16.000000000 +0100
+++ linux/mm/ksm.c	2009-09-05 16:41:55.000000000 +0100
@@ -32,7 +32,6 @@
 #include <linux/mmu_notifier.h>
 #include <linux/ksm.h>
 
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
 /*
@@ -285,6 +284,19 @@ static inline int in_stable_tree(struct
 }
 
 /*
+ * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
+ * page tables after it has passed through ksm_exit() - which, if necessary,
+ * takes mmap_sem briefly to serialize against them.  ksm_exit() does not set
+ * a special flag: they can just back out as soon as mm_users goes to zero.
+ * ksm_test_exit() is used throughout to make this test for exit: in some
+ * places for correctness, in some places just to avoid unnecessary work.
+ */
+static inline bool ksm_test_exit(struct mm_struct *mm)
+{
+	return atomic_read(&mm->mm_users) == 0;
+}
+
+/*
  * We use break_ksm to break COW on a ksm page: it's a stripped down
  *
  *	if (get_user_pages(current, mm, addr, 1, 1, 1, &page, NULL) == 1)
--- mmotm/mm/mmap.c	2009-09-05 14:40:16.000000000 +0100
+++ linux/mm/mmap.c	2009-09-05 16:41:55.000000000 +0100
@@ -27,7 +27,6 @@
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
-#include <linux/ksm.h>
 #include <linux/mmu_notifier.h>
 #include <linux/perf_counter.h>
 #include <linux/hugetlb.h>
--

Date: 	Sat, 5 Sep 2009 22:25:21 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
To: Andrew Morton <akpm at linux-foundation.org>
cc: Izik Eidus <ieidus at redhat.com>, Andrea Arcangeli <aarcange at redhat.com>,
        linux-kernel at vger.kernel.org, linux-mm at kvack.org
Subject: [PATCH 2/3] ksm: unmerge is an origin of OOMs

Just as the swapoff system call allocates many pages of RAM to various
processes, perhaps triggering OOM, so "echo 2 >/sys/kernel/mm/ksm/run"
(unmerge) is liable to allocate many pages of RAM to various processes,
perhaps triggering OOM; and each is normally run from a modest admin
process (swapoff or shell), easily repeated until it succeeds.

So treat unmerge_and_remove_all_rmap_items() in the same way that we
treat try_to_unuse(): generalize PF_SWAPOFF to PF_OOM_ORIGIN, and
bracket both with that, to ask the OOM killer to kill them first,
to prevent them from spawning more and more OOM kills.

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---

 include/linux/sched.h |    2 +-
 mm/ksm.c              |    2 ++
 mm/oom_kill.c         |    2 +-
 mm/swapfile.c         |    4 ++--
 4 files changed, 6 insertions(+), 4 deletions(-)

--- mmotm/include/linux/sched.h	2009-09-05 14:40:16.000000000 +0100
+++ linux/include/linux/sched.h	2009-09-05 16:41:55.000000000 +0100
@@ -1755,7 +1755,7 @@ extern cputime_t task_gtime(struct task_
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
 #define PF_KSWAPD	0x00040000	/* I am kswapd */
-#define PF_SWAPOFF	0x00080000	/* I am in swapoff */
+#define PF_OOM_ORIGIN	0x00080000	/* Allocating much memory to others */
 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
 #define PF_KTHREAD	0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
--- mmotm/mm/ksm.c	2009-09-05 14:40:16.000000000 +0100
+++ linux/mm/ksm.c	2009-09-05 16:41:55.000000000 +0100
@@ -1564,7 +1564,9 @@ static ssize_t run_store(struct kobject
 	if (ksm_run != flags) {
 		ksm_run = flags;
 		if (flags & KSM_RUN_UNMERGE) {
+			current->flags |= PF_OOM_ORIGIN;
 			err = unmerge_and_remove_all_rmap_items();
+			current->flags &= ~PF_OOM_ORIGIN;
 			if (err) {
 				ksm_run = KSM_RUN_STOP;
 				count = err;
--- mmotm/mm/oom_kill.c	2009-09-05 14:40:16.000000000 +0100
+++ linux/mm/oom_kill.c	2009-09-05 16:41:55.000000000 +0100
@@ -103,7 +103,7 @@ unsigned long badness(struct task_struct
 	/*
 	 * swapoff can easily use up all memory, so kill those first.
 	 */
-	if (p->flags & PF_SWAPOFF)
+	if (p->flags & PF_OOM_ORIGIN)
 		return ULONG_MAX;
 
 	/*
--- mmotm/mm/swapfile.c	2009-09-05 14:40:16.000000000 +0100
+++ linux/mm/swapfile.c	2009-09-05 16:41:55.000000000 +0100
@@ -1573,9 +1573,9 @@ SYSCALL_DEFINE1(swapoff, const char __us
 	p->flags &= ~SWP_WRITEOK;
 	spin_unlock(&swap_lock);
 
-	current->flags |= PF_SWAPOFF;
+	current->flags |= PF_OOM_ORIGIN;
 	err = try_to_unuse(type);
-	current->flags &= ~PF_SWAPOFF;
+	current->flags &= ~PF_OOM_ORIGIN;
 
 	if (err) {
 		/* re-insert swap space back into swap_list */
--

Date: 	Sat, 5 Sep 2009 22:26:48 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
To: Andrew Morton <akpm at linux-foundation.org>
cc: Izik Eidus <ieidus at redhat.com>, Andrea Arcangeli <aarcange at redhat.com>,
        linux-kernel at vger.kernel.org, linux-mm at kvack.org
Subject: [PATCH 3/3] ksm: mremap use err from ksm_madvise

mremap move's use of ksm_madvise() was assuming -ENOMEM on failure,
because ksm_madvise used to say -EAGAIN for that; but ksm_madvise now
says -ENOMEM (letting madvise convert that to -EAGAIN), and can also
say -ERESTARTSYS when signalled: so pass the error from ksm_madvise.

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---

 mm/mremap.c |    8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

--- mmotm/mm/mremap.c	2009-09-05 14:40:16.000000000 +0100
+++ linux/mm/mremap.c	2009-09-05 16:41:55.000000000 +0100
@@ -175,6 +175,7 @@ static unsigned long move_vma(struct vm_
 	unsigned long excess = 0;
 	unsigned long hiwater_vm;
 	int split = 0;
+	int err;
 
 	/*
 	 * We'd prefer to avoid failure later on in do_munmap:
@@ -190,9 +191,10 @@ static unsigned long move_vma(struct vm_
 	 * pages recently unmapped.  But leave vma->vm_flags as it was,
 	 * so KSM can come around to merge on vma and new_vma afterwards.
 	 */
-	if (ksm_madvise(vma, old_addr, old_addr + old_len,
-						MADV_UNMERGEABLE, &vm_flags))
-		return -ENOMEM;
+	err = ksm_madvise(vma, old_addr, old_addr + old_len,
+						MADV_UNMERGEABLE, &vm_flags);
+	if (err)
+		return err;
 
 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
 	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
--

Date: 	Mon, 21 Sep 2009 13:43:15 +0100 (BST)
From: Hugh Dickins <hugh.dickins at tiscali.co.uk>
To: Andrew Morton <akpm at linux-foundation.org>
cc: Izik Eidus <ieidus at redhat.com>, Andrea Arcangeli <aarcange at redhat.com>,
        linux-kernel at vger.kernel.org, linux-mm at kvack.org
Subject: [PATCH] ksm: fix rare page leak

In the rare case when stable_tree_insert() finds a match when the prior
stable_tree_search() did not, it forgot to free the page reference (the
omission looks intentional, but I think that's because something else
used to be done there).

Fix that by one put_page() for all three cases, call it tree_page
rather than page2[0], clarify the comment on this exceptional case,
and remove the comment in stable_tree_search() which contradicts it!

Signed-off-by: Hugh Dickins <hugh.dickins at tiscali.co.uk>
---

 mm/ksm.c |   29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

--- mmotm/mm/ksm.c	2009-09-14 16:34:37.000000000 +0100
+++ linux/mm/ksm.c	2009-09-21 13:12:07.000000000 +0100
@@ -904,10 +904,6 @@ static struct rmap_item *stable_tree_sea
 		if (!tree_rmap_item)
 			return NULL;
 
-		/*
-		 * We can trust the value of the memcmp as we know the pages
-		 * are write protected.
-		 */
 		ret = memcmp_pages(page, page2[0]);
 
 		if (ret < 0) {
@@ -939,18 +935,18 @@ static struct rmap_item *stable_tree_ins
 {
 	struct rb_node **new = &root_stable_tree.rb_node;
 	struct rb_node *parent = NULL;
-	struct page *page2[1];
 
 	while (*new) {
 		struct rmap_item *tree_rmap_item, *next_rmap_item;
+		struct page *tree_page;
 		int ret;
 
 		tree_rmap_item = rb_entry(*new, struct rmap_item, node);
 		while (tree_rmap_item) {
 			BUG_ON(!in_stable_tree(tree_rmap_item));
 			cond_resched();
-			page2[0] = get_ksm_page(tree_rmap_item);
-			if (page2[0])
+			tree_page = get_ksm_page(tree_rmap_item);
+			if (tree_page)
 				break;
 			next_rmap_item = tree_rmap_item->next;
 			remove_rmap_item_from_tree(tree_rmap_item);
@@ -959,22 +955,19 @@ static struct rmap_item *stable_tree_ins
 		if (!tree_rmap_item)
 			return NULL;
 
-		ret = memcmp_pages(page, page2[0]);
+		ret = memcmp_pages(page, tree_page);
+		put_page(tree_page);
 
 		parent = *new;
-		if (ret < 0) {
-			put_page(page2[0]);
+		if (ret < 0)
 			new = &parent->rb_left;
-		} else if (ret > 0) {
-			put_page(page2[0]);
+		else if (ret > 0)
 			new = &parent->rb_right;
-		} else {
+		else {
 			/*
-			 * It is not a bug when we come here (the fact that
-			 * we didn't find the page inside the stable tree):
-			 * because when we searched for the page inside the
-			 * stable tree it was still not write-protected,
-			 * so therefore it could have changed later.
+			 * It is not a bug that stable_tree_search() didn't
+			 * find this node: because at that time our page was
+			 * not yet write-protected, so may have changed since.
 			 */
 			return NULL;
 		}
--

From: Izik Eidus <ieidus at redhat.com>
To: akpm at linux-foundation.org
Cc: linux-kernel at vger.kernel.org, linux-mm at kvack.org,
        hugh.dickins at tiscali.co.uk, aarcange at redhat.com,
        Izik Eidus <ieidus at redhat.com>
Subject: [PATCH] ksm: change default values to better fit into mainline kernel
Date: 	Wed, 23 Sep 2009 23:05:47 +0300

Now that ksm is in mainline it is better to change the default values
to better fit to most of the users.

This patch change the ksm default values to be:
ksm_thread_pages_to_scan = 100 (instead of 200)
ksm_thread_sleep_millisecs = 20 (like before)
ksm_run = KSM_RUN_STOP (instead of KSM_RUN_MERGE - meaning ksm is
                        disabled by default)
ksm_max_kernel_pages = nr_free_buffer_pages / 4 (instead of 2046)

The important aspect of this patch is: it disable ksm by default, and set
the number of the kernel_pages that can be allocated to be a reasonable
number.

Signed-off-by: Izik Eidus <ieidus at redhat.com>
---
 mm/ksm.c |   14 +++++++++++---
 1 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 37cc373..f7edac3 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/rbtree.h>
 #include <linux/mmu_notifier.h>
+#include <linux/swap.h>
 #include <linux/ksm.h>
 
 #include <asm/tlbflush.h>
@@ -162,10 +163,10 @@ static unsigned long ksm_pages_unshared;
 static unsigned long ksm_rmap_items;
 
 /* Limit on the number of unswappable pages used */
-static unsigned long ksm_max_kernel_pages = 2000;
+static unsigned long ksm_max_kernel_pages;
 
 /* Number of pages ksmd should scan in one batch */
-static unsigned int ksm_thread_pages_to_scan = 200;
+static unsigned int ksm_thread_pages_to_scan = 100;
 
 /* Milliseconds ksmd should sleep between batches */
 static unsigned int ksm_thread_sleep_millisecs = 20;
@@ -173,7 +174,7 @@ static unsigned int ksm_thread_sleep_millisecs = 20;
 #define KSM_RUN_STOP	0
 #define KSM_RUN_MERGE	1
 #define KSM_RUN_UNMERGE	2
-static unsigned int ksm_run = KSM_RUN_MERGE;
+static unsigned int ksm_run = KSM_RUN_STOP;
 
 static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
 static DEFINE_MUTEX(ksm_thread_mutex);
@@ -183,6 +184,11 @@ static DEFINE_SPINLOCK(ksm_mmlist_lock);
 		sizeof(struct __struct), __alignof__(struct __struct),\
 		(__flags), NULL)
 
+static void __init ksm_init_max_kernel_pages(void)
+{
+	ksm_max_kernel_pages = nr_free_buffer_pages() / 4;
+}
+
 static int __init ksm_slab_init(void)
 {
 	rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
@@ -1667,6 +1673,8 @@ static int __init ksm_init(void)
 	struct task_struct *ksm_thread;
 	int err;
 
+	ksm_init_max_kernel_pages();
+
 	err = ksm_slab_init();
 	if (err)
 		goto out;
-- 
1.5.6.5


Index: kernel.spec
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/F-12/kernel.spec,v
retrieving revision 1.1913
retrieving revision 1.1914
diff -u -p -r1.1913 -r1.1914
--- kernel.spec	8 Nov 2009 01:44:40 -0000	1.1913
+++ kernel.spec	11 Nov 2009 17:13:01 -0000	1.1914
@@ -696,6 +696,7 @@ Patch1550: linux-2.6-ksm.patch
 Patch1551: linux-2.6-ksm-kvm.patch
 Patch1552: linux-2.6-ksm-updates.patch
 Patch1553: linux-2.6-ksm-fix-munlock.patch
+Patch1554: linux-2.6-ksm-updates-from-32.patch
 Patch1579: linux-2.6-virtio_blk-revert-QUEUE_FLAG_VIRT-addition.patch
 Patch1583: linux-2.6-xen-fix-is_disconnected_device-exists_disconnected_device.patch
 Patch1584: linux-2.6-xen-improvement-to-wait_for_devices.patch
@@ -1406,6 +1407,7 @@ ApplyPatch hid-ignore-all-recent-imon-de
 ApplyPatch linux-2.6-ksm.patch
 ApplyPatch linux-2.6-ksm-updates.patch
 ApplyPatch linux-2.6-ksm-fix-munlock.patch
+ApplyPatch linux-2.6-ksm-updates-from-32.patch
 # Optimize KVM for KSM support
 ApplyPatch linux-2.6-ksm-kvm.patch
 
@@ -2163,6 +2165,10 @@ fi
 # and build.
 
 %changelog
+* Wed Nov 11 2009 Justin M. Forbes <jforbes at redhat.com> 2.6.31.5-128
+- Fix KSM for i686 users. (#532215)
+- Add KSM fixes from 2.6.32
+
 * Sun Nov 08 2009 David Woodhouse <David.Woodhouse at intel.com> 2.6.31.5-127
 - Apply fix for fallback when HP/Acer BIOS bug detected (#524808)
 - Re-enable DMAR.

linux-2.6-ksm-kvm.patch:
 arch/x86/include/asm/kvm_host.h |    1 
 arch/x86/kvm/mmu.c              |   91 ++++++++++++++++++++++++++++++++--------
 arch/x86/kvm/paging_tmpl.h      |   15 +++++-
 virt/kvm/kvm_main.c             |   14 ++++++
 4 files changed, 101 insertions(+), 20 deletions(-)

Index: linux-2.6-ksm-kvm.patch
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/F-12/linux-2.6-ksm-kvm.patch,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -p -r1.3 -r1.4
--- linux-2.6-ksm-kvm.patch	26 Aug 2009 19:25:56 -0000	1.3
+++ linux-2.6-ksm-kvm.patch	11 Nov 2009 17:13:01 -0000	1.4
@@ -99,7 +99,7 @@ Signed-off-by: Justin M. Forbes <jforbes
 +			spte = rmap_next(kvm, rmapp, NULL);
 +		} else {
 +			new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
-+			new_spte |= new_pfn << PAGE_SHIFT;
++			new_spte |= (u64)new_pfn << PAGE_SHIFT;
 +
 +			if (!pte_write(ptep_val(ptep))) {
 +				new_spte &= ~PT_WRITABLE_MASK;




More information about the fedora-extras-commits mailing list