[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

RE: [fedora-virt] f10 x86_64 xen VM guests fail to boot on f8 host (guest setting NX bit in L1 PTE?)



On Wed, 2009-01-21 at 12:22 +0900, Jon Swanson wrote:
> Thank you again Mark, Ian, and Phil.
> 
> As Phil pointed out, my working host has the NX feature, the broken host
> does not.
> 
> I also could not create a bugzilla account, but was able to use a
> co-worker's credentials to create this ticket:
> https://bugzilla.redhat.com/show_bug.cgi?id=480880
> 
> Ian, I didn't have any luck with the noexec=off.

It turns out that nonx_setup() and check_efer() both run quite a while
after all the set_page_prot calls in xen_setup_kernel() which include
_PAGE_NX via PAGE_KERNEL_RO.

On 32 bit __supported_pte_mask starts off without NX in it and it gets
added later if the system supports it. This is safe but means that the
pages frobbed by the early Xen setup won't have NX set when they could
(unless they all get frobbed again later?)

On 64 bit __supported_pte_mask contains NX at start of day and it is
taken away later on if the system turns out not to support it.

Native seems to mainly use _KERNPG_TABLE which does not include NX, can
you try this patch? (lots of printks because I don't have any non-NX
hardware to test properly).

diff -r ec792b22009f arch/x86/mm/init_64.c
--- a/arch/x86/mm/init_64.c	Fri Jan 23 15:27:45 2009 +0000
+++ b/arch/x86/mm/init_64.c	Fri Jan 23 15:58:03 2009 +0000
@@ -103,12 +103,15 @@
  */
 static int __init nonx_setup(char *str)
 {
+	printk(KERN_CRIT "noexec_setup %s\n", str);
 	if (!str)
 		return -EINVAL;
 	if (!strncmp(str, "on", 2)) {
+		printk(KERN_CRIT "noexec_setup: enabling NX\n");
 		__supported_pte_mask |= _PAGE_NX;
 		do_not_nx = 0;
 	} else if (!strncmp(str, "off", 3)) {
+			printk(KERN_CRIT "noexec_setup: disabling NX\n");
 		do_not_nx = 1;
 		__supported_pte_mask &= ~_PAGE_NX;
 	}
@@ -121,8 +124,13 @@
 	unsigned long efer;
 
 	rdmsrl(MSR_EFER, efer);
-	if (!(efer & EFER_NX) || do_not_nx)
+	if (!(efer & EFER_NX) || do_not_nx) {
+		printk(KERN_CRIT "check_efer: disabling NX\n");
 		__supported_pte_mask &= ~_PAGE_NX;
+	} else
+		printk(KERN_CRIT "check_efer: leaving NX alone. supported_pte_mask %s the NX bit\n",
+		       __supported_pte_mask & _PAGE_NX ? "includes" : "excludes");
+
 }
 
 int force_personality32;
diff -r ec792b22009f arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c	Fri Jan 23 15:27:45 2009 +0000
+++ b/arch/x86/xen/enlighten.c	Fri Jan 23 15:58:03 2009 +0000
@@ -54,6 +54,9 @@
 #include "xen-ops.h"
 #include "mmu.h"
 #include "multicalls.h"
+
+#define _KERNPG_TABLE_RO __pgprot(_KERNPG_TABLE & ~_PAGE_RW)
+//#define _KERNPG_TABLE_RO (_KERNPG_TABLE)
 
 EXPORT_SYMBOL_GPL(hypercall_page);
 
@@ -1476,6 +1479,15 @@
 {
 	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
 	pte_t pte = pfn_pte(pfn, prot);
+	static int once = 5;
+
+	if (once > 0 && pte_val(pte) & _PAGE_NX) {
+		once--;
+		printk(KERN_CRIT "set_page_prot to %#lx (incl NX) supported_pte_mask %#lx %s the NX bit\n",
+		       pgprot_val(prot), __supported_pte_mask, __supported_pte_mask & _PAGE_NX ? "includes" : "excludes");
+		printk(KERN_CRIT "pte is %#lx\n", pte_val(pte));
+		WARN_ON(1);
+	}
 
 	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
 		BUG();
@@ -1522,9 +1534,9 @@
 	}
 
 	for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
-		set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
+		set_page_prot(&level1_ident_pgt[pteidx], _KERNPG_TABLE_RO);
 
-	set_page_prot(pmd, PAGE_KERNEL_RO);
+	set_page_prot(pmd, _KERNPG_TABLE_RO);
 }
 
 static __init void xen_ident_map_ISA(void)
@@ -1601,12 +1613,12 @@
 	xen_map_identity_early(level2_ident_pgt, max_pfn);
 
 	/* Make pagetable pieces RO */
-	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
-	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
-	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
-	set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
-	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
-	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
+	set_page_prot(init_level4_pgt, _KERNPG_TABLE_RO);
+	set_page_prot(level3_ident_pgt, _KERNPG_TABLE_RO);
+	set_page_prot(level3_kernel_pgt, _KERNPG_TABLE_RO);
+	set_page_prot(level3_user_vsyscall, _KERNPG_TABLE_RO);
+	set_page_prot(level2_kernel_pgt, _KERNPG_TABLE_RO);
+	set_page_prot(level2_fixmap_pgt, _KERNPG_TABLE_RO);
 
 	/* Pin down new L4 */
 	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
@@ -1670,9 +1682,9 @@
 	set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
 			__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
 
-	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
-	set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
-	set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
+	set_page_prot(level2_kernel_pgt, _KERNPG_TABLE_RO);
+	set_page_prot(swapper_pg_dir, _KERNPG_TABLE_RO);
+	set_page_prot(empty_zero_page, _KERNPG_TABLE_RO);
 
 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]