[Crash-utility] Re: [PATCH] s390: Make page table functions more generic

Dave Anderson anderson at redhat.com
Thu Nov 15 18:50:46 UTC 2007


Michael Holzheu wrote:
> Hi Dave,
> 
> For s390(x) kernels the page table allocation method will be changed.
> Instead of 3 levels, it will be now possible to allocate 4 levels.
> 
> The current implementation of the page table walk functions in crash
> makes assumptions on how the page tables are allocated by the kernel.
> E.g. three levels are hard coded.
> 
> This patch changes that and the page table walk is done only according
> to the s390 architecture without assumptions on the implementation in
> the kernel.
> 
> So both old and new kernels are supported.
> 

Hi Michael,

I have complete faith in you...  ;-)

And this certainly simplifies things considerably, which
is always good.

But -- can you give me a warm-and-fuzzy feeling by confirming
that you tested this on a RHEL kernel?  And that "make warn"
compiles cleanly?

Thanks,
   Dave

> ---
> 
>  s390.c  |  144 +++++++++++++++++++++++-------------------------
>  s390x.c |  191 ++++++++++++++++++++++++++--------------------------------------
>  2 files changed, 151 insertions(+), 184 deletions(-)
> 
> diff -Naur crash-4.0-4.8/s390.c crash-4.0-4.8-page-table-walk/s390.c
> --- crash-4.0-4.8/s390.c	2007-10-30 16:51:54.000000000 +0100
> +++ crash-4.0-4.8-page-table-walk/s390.c	2007-11-15 15:44:07.000000000 +0100
> @@ -21,17 +21,6 @@
>  #define S390_WORD_SIZE    4
>  #define S390_ADDR_MASK    0x7fffffff
>  
> -#define S390_PAGE_SHIFT   12
> -#define S390_PAGE_SIZE    (1UL << S390_PAGE_SHIFT)
> -#define S390_PAGE_MASK     (~(S390_PAGE_SIZE-1))
> -
> -#define S390_PGDIR_SHIFT  20
> -#define S390_PGDIR_SIZE   (1UL << S390_PGDIR_SHIFT)
> -#define S390_PGDIR_MASK   (~(S390_PGDIR_SIZE-1))
> -
> -#define S390_PTRS_PER_PGD       2048
> -#define S390_PTRS_PER_PTE       256
> -
>  #define S390_PMD_BASE_MASK      (~((1UL<<6)-1))
>  #define S390_PT_BASE_MASK       S390_PMD_BASE_MASK
>  #define S390_PAGE_BASE_MASK     (~((1UL<<12)-1))
> @@ -44,26 +33,10 @@
>  #define S390_PAGE_INVALID       0x400    /* HW invalid */
>  #define S390_PAGE_INVALID_MASK  0x601ULL /* for linux 2.6 */
>  #define S390_PAGE_INVALID_NONE  0x401ULL /* for linux 2.6 */
> -#define S390_PAGE_TABLE_LEN     0xf      /* only full page-tables */
> -#define S390_PAGE_TABLE_INV     0x20     /* invalid page-table */
>  
>  #define S390_PTE_INVALID_MASK   0x80000900
>  #define S390_PTE_INVALID(x) ((x) & S390_PTE_INVALID_MASK)
>  
> -#define S390_PMD_INVALID_MASK   0x80000000
> -#define S390_PMD_INVALID(x) ((x) & S390_PMD_INVALID_MASK)
> -
> -/* pgd/pmd/pte query macros */
> -#define s390_pmd_none(x) ((x) & S390_PAGE_TABLE_INV)
> -#define s390_pmd_bad(x) (((x) & (~S390_PMD_BASE_MASK & \
> -                                 ~S390_PAGE_TABLE_INV)) != \
> -                                 S390_PAGE_TABLE_LEN)
> -
> -#define s390_pte_none(x) (((x) & (S390_PAGE_INVALID | S390_RO_S390 | \
> -                                  S390_PAGE_PRESENT)) == \
> -                                  S390_PAGE_INVALID)
> -
> -
>  #define ASYNC_STACK_SIZE  STACKSIZE() // can be 4096 or 8192
>  #define KERNEL_STACK_SIZE STACKSIZE() // can be 4096 or 8192
>  
> @@ -73,8 +46,6 @@
>   * declarations of static functions
>   */
>  static void s390_print_lowcore(char*, struct bt_info*,int);
> -static unsigned long s390_pgd_offset(unsigned long, unsigned long);
> -static unsigned long s390_pte_offset(unsigned long, unsigned long);
>  static int s390_kvtop(struct task_context *, ulong, physaddr_t *, int);
>  static int s390_uvtop(struct task_context *, ulong, physaddr_t *, int);
>  static int s390_vtop(unsigned long, ulong, physaddr_t*, int);
> @@ -292,60 +263,87 @@
>  /*
>   * page table traversal functions
>   */
> -static unsigned long 
> -s390_pgd_offset(unsigned long pgd_base, unsigned long vaddr)
> -{
> -	unsigned long pgd_off, pmd_base;
>  
> -	pgd_off = ((vaddr >> S390_PGDIR_SHIFT) & (S390_PTRS_PER_PGD - 1))
> -		* S390_WORD_SIZE;
> -	readmem(pgd_base + pgd_off, PHYSADDR, &pmd_base,sizeof(long),
> -		"pgd_base",FAULT_ON_ERROR);
> -	return pmd_base;
> -}
> -
> -unsigned long s390_pte_offset(unsigned long pte_base, unsigned long vaddr)
> +/* Segment table traversal function */
> +static ulong _kl_sg_table_deref_s390(ulong vaddr, ulong table, int len)
>  {
> -	unsigned pte_off, pte_val;
> +	ulong offset, entry;
> +
> +	offset = ((vaddr >> 20) & 0x7ffUL) * 4;
> +	if (offset >= (len + 1)*64)
> +		/* Offset is over the table limit. */
> +		return 0;
> +	readmem(table + offset, KVADDR, &entry, sizeof(entry), "entry",
> +		FAULT_ON_ERROR);
>  
> -	pte_off = ((vaddr >> S390_PAGE_SHIFT) & (S390_PTRS_PER_PTE - 1))
> -		* S390_WORD_SIZE;
> -	readmem(pte_base + pte_off, PHYSADDR, &pte_val, sizeof(long),
> -		"pte_val",FAULT_ON_ERROR);
> -	return pte_val;
> +	/*
> +	 * Check if the segment table entry could be read and doesn't have
> +	 * any of the reserved bits set.
> +	 */
> +	if (entry & 0x80000000UL)
> +		return 0;
> +	/* Check if the segment table entry has the invalid bit set. */
> +	if (entry & 0x40UL)
> +		return 0;
> +	/* Segment table entry is valid and well formed. */
> +	return entry;
> +}
> +
> +/* Page table traversal function */
> +static ulong _kl_pg_table_deref_s390(ulong vaddr, ulong table, int len)
> +{
> +	ulong offset, entry;
> +
> +	offset = ((vaddr >> 12) & 0xffUL) * 4;
> +	if (offset >= (len + 1)*64)
> +		/* Offset is over the table limit. */
> +		return 0;
> +	readmem(table + offset, KVADDR, &entry, sizeof(entry), "entry",
> +		FAULT_ON_ERROR);
> +	/*
> +	 * Check if the page table entry could be read and doesn't have
> +	 * any of the reserved bits set.
> +	 */
> +	if (entry & 0x80000900UL)
> +		return 0;
> +	/* Check if the page table entry has the invalid bit set. */
> +	if (entry & 0x400UL)
> +		return 0;
> +	/* Page table entry is valid and well formed. */
> +	return entry;
>  }
>  
> -/*
> - * Generic vtop function for user and kernel addresses
> - */
> +/* lookup virtual address in page tables */
>  static int
> -s390_vtop(unsigned long pgd_base, ulong kvaddr, physaddr_t *paddr, int verbose)
> +s390_vtop(unsigned long table, ulong vaddr, physaddr_t *phys_addr, int verbose)
>  {
> -	unsigned pte_base, pte_val;
> +	ulong entry, paddr;
> +	int len;
>  
> -	/* get the pgd entry */
> -	pte_base = s390_pgd_offset(pgd_base,kvaddr);
> -	if(S390_PMD_INVALID(pte_base) ||
> -	   s390_pmd_bad(pte_base) ||
> -	   s390_pmd_none(pte_base)) {
> -		*paddr = 0;
> -		return FALSE;
> -	}
> -	/* get the pte */
> -	pte_base = pte_base & S390_PT_BASE_MASK;
> -	pte_val = s390_pte_offset(pte_base,kvaddr);
> -	if(S390_PTE_INVALID(pte_val) ||
> -	   s390_pte_none(pte_val)){
> -		*paddr = 0;
> +	/*
> +	 * Get the segment table entry.
> +	 * We assume that the segment table length field in the asce
> +	 * is set to the maximum value of 127 (which translates to
> +	 * a segment table with 2048 entries) and that the addressing
> +	 * mode is 31 bit.
> +	 */
> +	entry = _kl_sg_table_deref_s390(vaddr, table, 127);
> +	if (!entry)
>  		return FALSE;
> -	}
> -	if(!s390_pte_present(pte_val)){
> -		/* swapped out */
> -		*paddr = pte_val;
> +	table = entry & 0x7ffffc00UL;
> +	len = entry & 0xfUL;
> +
> +	/* Get the page table entry */
> +	entry = _kl_pg_table_deref_s390(vaddr, table, len);
> +	if (!entry)
>  		return FALSE;
> -	}
> -	*paddr = (pte_val & S390_PAGE_BASE_MASK) |
> -		  (kvaddr & (~(S390_PAGE_MASK)));
> +
> +	/* Isolate the page origin from the page table entry. */
> +	paddr = entry & 0x7ffff000UL;
> +
> +	/* Add the page offset and return the final value. */
> +	*phys_addr = paddr + (vaddr & 0xfffUL);
> +
>  	return TRUE;
>  }
>  
> diff -Naur crash-4.0-4.8/s390x.c crash-4.0-4.8-page-table-walk/s390x.c
> --- crash-4.0-4.8/s390x.c	2007-10-30 16:51:54.000000000 +0100
> +++ crash-4.0-4.8-page-table-walk/s390x.c	2007-11-15 15:44:33.000000000 +0100
> @@ -20,24 +20,6 @@
>  
>  #define S390X_WORD_SIZE   8
>  
> -#define S390X_PAGE_SHIFT  12
> -#define S390X_PAGE_SIZE   (1ULL << S390X_PAGE_SHIFT)
> -#define S390X_PAGE_MASK   (~(S390X_PAGE_SIZE-1))
> -
> -#define S390X_PGDIR_SHIFT 31
> -#define S390X_PGDIR_SIZE  (1ULL << S390X_PGDIR_SHIFT)
> -#define S390X_PGDIR_MASK  (~(S390X_PGDIR_SIZE-1))
> -
> -#define S390X_PMD_SHIFT   20
> -#define S390X_PMD_SIZE    (1ULL << S390X_PMD_SHIFT)
> -#define S390X_PMD_MASK    (~(S390X_PMD_SIZE-1))
> -
> -#define S390X_PTRS_PER_PGD       2048
> -#define S390X_PTRS_PER_PMD       2048
> -#define S390X_PTRS_PER_PTE       256
> -
> -#define S390X_PMD_BASE_MASK      (~((1ULL<<12)-1))
> -#define S390X_PT_BASE_MASK       (~((1ULL<<11)-1))
>  #define S390X_PAGE_BASE_MASK     (~((1ULL<<12)-1))
>  
>  /* Flags used in entries of page dirs and page tables.
> @@ -48,37 +30,11 @@
>  #define S390X_PAGE_INVALID      0x400ULL /* HW invalid */
>  #define S390X_PAGE_INVALID_MASK 0x601ULL /* for linux 2.6 */
>  #define S390X_PAGE_INVALID_NONE 0x401ULL /* for linux 2.6 */
> -#define S390X_PMD_ENTRY_INV     0x20ULL  /* invalid segment table entry      */
> -#define S390X_PGD_ENTRY_INV     0x20ULL  /* invalid region table entry       */
> -#define S390X_PMD_ENTRY         0x00
> -#define S390X_PGD_ENTRY_FIRST   0x05     /* first part of pmd is valid */
> -#define S390X_PGD_ENTRY_SECOND  0xc7     /* second part of pmd is valid */
> -#define S390X_PGD_ENTRY_FULL    0x07     /* complete pmd is valid */
>  
>  /* bits 52, 55 must contain zeroes in a pte */
>  #define S390X_PTE_INVALID_MASK  0x900ULL
>  #define S390X_PTE_INVALID(x) ((x) & S390X_PTE_INVALID_MASK)
>  
> -/* pgd/pmd/pte query macros */
> -#define s390x_pgd_none(x) ((x) & S390X_PGD_ENTRY_INV)
> -#define s390x_pgd_bad(x) !( (((x) & S390X_PGD_ENTRY_FIRST) == \
> -                                    S390X_PGD_ENTRY_FIRST) || \
> -                                    (((x) & S390X_PGD_ENTRY_SECOND) == \
> -                                    S390X_PGD_ENTRY_SECOND) || \
> -                                    (((x) & S390X_PGD_ENTRY_FULL) == \
> -                                    S390X_PGD_ENTRY_FULL))
> -
> -#define s390x_pmd_none(x) ((x) & S390X_PMD_ENTRY_INV)
> -#define s390x_pmd_bad(x) (((x) & (~S390X_PT_BASE_MASK & \
> -                                  ~S390X_PMD_ENTRY_INV)) != \
> -                                  S390X_PMD_ENTRY)
> -
> -#define s390x_pte_none(x) (((x) & (S390X_PAGE_INVALID | \
> -                                   S390X_PAGE_RO | \
> -                                   S390X_PAGE_PRESENT)) == \
> -                                   S390X_PAGE_INVALID)
> -
> -
>  #define ASYNC_STACK_SIZE  STACKSIZE() // can be 8192 or 16384
>  #define KERNEL_STACK_SIZE STACKSIZE() // can be 8192 or 16384
>  
> @@ -88,9 +44,6 @@
>   * declarations of static functions
>   */
>  static void s390x_print_lowcore(char*, struct bt_info*,int);
> -static unsigned long s390x_pgd_offset(unsigned long, unsigned long);
> -static unsigned long s390x_pmd_offset(unsigned long, unsigned long);
> -static unsigned long s390x_pte_offset(unsigned long, unsigned long);
>  static int s390x_kvtop(struct task_context *, ulong, physaddr_t *, int);
>  static int s390x_uvtop(struct task_context *, ulong, physaddr_t *, int);
>  static int s390x_vtop(unsigned long, ulong, physaddr_t*, int);
> @@ -304,81 +257,97 @@
>  	}
>  }
>  
> -/* 
> +/*
>   * page table traversal functions 
>   */
> -unsigned long s390x_pgd_offset(unsigned long pgd_base, unsigned long vaddr)
> -{
> -	unsigned long pgd_off, pmd_base;
> -
> -	pgd_off = ((vaddr >> S390X_PGDIR_SHIFT) &
> -		   (S390X_PTRS_PER_PGD - 1)) * 8;
> -	readmem(pgd_base + pgd_off, PHYSADDR, &pmd_base, sizeof(long),
> -		"pmd_base",FAULT_ON_ERROR);
> -
> -	return pmd_base;
> -}
>  
> -unsigned long s390x_pmd_offset(unsigned long pmd_base, unsigned long vaddr)
> -{
> -	unsigned long pmd_off, pte_base;
> -
> -	pmd_off = ((vaddr >> S390X_PMD_SHIFT) & (S390X_PTRS_PER_PMD - 1))
> -		* 8;
> -	readmem(pmd_base + pmd_off, PHYSADDR, &pte_base, sizeof(long),
> -		"pte_base",FAULT_ON_ERROR);
> -	return pte_base;
> -}
> -
> -unsigned long s390x_pte_offset(unsigned long pte_base, unsigned long vaddr)
> -{
> -	unsigned long pte_off, pte_val;
> -
> -	pte_off = ((vaddr >> S390X_PAGE_SHIFT) & (S390X_PTRS_PER_PTE - 1))
> -		* 8;
> -	readmem(pte_base + pte_off, PHYSADDR, &pte_val, sizeof(long),
> -		"pte_val",FAULT_ON_ERROR);
> -	return pte_val;
> +/* Region or segment table traversal function */
> +static ulong _kl_rsg_table_deref_s390x(ulong vaddr, ulong table,
> +					 int len, int level)
> +{
> +	ulong offset, entry;
> +
> +	offset = ((vaddr >> (11*level + 20)) & 0x7ffULL) * 8;
> +	if (offset >= (len + 1)*4096)
> +		/* Offset is over the table limit. */
> +		return 0;
> +	readmem(table + offset, KVADDR, &entry, sizeof(entry), "entry",
> +		FAULT_ON_ERROR);
> +	/*
> +	 * Check if the segment table entry could be read and doesn't have
> +	 * any of the reserved bits set.
> +	 */
> +	if ((entry & 0xcULL) != (level << 2))
> +		return 0;
> +	/* Check if the region table entry has the invalid bit set. */
> +	if (entry & 0x40ULL)
> +		return 0;
> +	/* Region table entry is valid and well formed. */
> +	return entry;
>  }
>  
> -/*
> - * Generic vtop function for user and kernel addresses
> - */
> -static int
> -s390x_vtop(unsigned long pgd_base, ulong kvaddr, physaddr_t *paddr, int verbose)
> +/* Page table traversal function */
> +static ulong _kl_pg_table_deref_s390x(ulong vaddr, ulong table)
>  {
> -	unsigned long pmd_base, pte_base, pte_val;
> +	ulong offset, entry;
>  
> -	/* get the pgd entry */
> -	pmd_base = s390x_pgd_offset(pgd_base,kvaddr);
> -	if(s390x_pgd_bad(pmd_base) ||
> -	   s390x_pgd_none(pmd_base)){
> -		*paddr = 0;
> +	offset = ((vaddr >> 12) & 0xffULL) * 8;
> +	readmem(table + offset, KVADDR, &entry, sizeof(entry), "entry",
> +		FAULT_ON_ERROR);
> +	/*
> +	 * Check if the page table entry could be read and doesn't have
> +	 * any of the reserved bits set.
> +	 */
> +	if (entry & 0x900ULL)
> +		return 0;
> +	/* Check if the page table entry has the invalid bit set. */
> +	if (entry & 0x400ULL)
> +		return 0;
> +	/* Page table entry is valid and well formed. */
> +	return entry;
> +}
> +
> +/* lookup virtual address in page tables */
> +int s390x_vtop(ulong table, ulong vaddr, physaddr_t *phys_addr, int verbose)
> +{
> +	ulong entry, paddr;
> +	int level, len;
> +
> +	/*
> +	 * Walk the region and segment tables.
> +	 * We assume that the table length field in the asce is set to the
> +	 * maximum value of 3 (which translates to a region first, region
> +	 * second, region third or segment table with 2048 entries) and that
> +	 * the addressing mode is 64 bit.
> +	 */
> +	len = 3;
> +	/* Read the first entry to find the number of page table levels. */
> +	readmem(table, KVADDR, &entry, sizeof(entry), "entry", FAULT_ON_ERROR);
> +	level = (entry & 0xcULL) >> 2;
> +	if ((vaddr >> (31 + 11*level)) != 0ULL) {
> +		/* Address too big for the number of page table levels. */
>  		return FALSE;
>  	}
> -	/* get the pmd */
> -	pmd_base = pmd_base & S390X_PMD_BASE_MASK;
> -	pte_base = s390x_pmd_offset(pmd_base,kvaddr);
> -	if(s390x_pmd_bad(pte_base) ||
> -	   s390x_pmd_none(pte_base)) {
> -		*paddr = 0;
> -		return FALSE;
> +	while (level >= 0) {
> +		entry = _kl_rsg_table_deref_s390x(vaddr, table, len, level);
> +		if (!entry)
> +			return 0;
> +		table = entry & ~0xfffULL;
> +		len = entry & 0x3ULL;
> +		level--;
>  	}
> -	/* get the pte */
> -	pte_base = pte_base & S390X_PT_BASE_MASK;
> -	pte_val = s390x_pte_offset(pte_base,kvaddr);
> -	if (S390X_PTE_INVALID(pte_val) ||
> -	    s390x_pte_none(pte_val)){
> -		*paddr = 0;
> -		return FALSE;
> -	}
> -	if(!s390x_pte_present(pte_val)){
> -		/* swapped out */ 
> -		*paddr = pte_val;
> +
> +	/* Get the page table entry */
> +	entry = _kl_pg_table_deref_s390x(vaddr, entry & ~0x7ffULL);
> +	if (!entry)
>  		return FALSE;
> -	}
> -	*paddr = (pte_val & S390X_PAGE_BASE_MASK) |
> -		(kvaddr & (~(S390X_PAGE_MASK)));
> +
> +	/* Isolate the page origin from the page table entry. */
> +	paddr = entry & ~0xfffULL;
> +
> +	/* Add the page offset and return the final value. */
> +	*phys_addr = paddr + (vaddr & 0xfffULL);
> +
>  	return TRUE;
>  }
>  
> 
> 
> 





More information about the Crash-utility mailing list