[lvm-devel] [PATCH 1/6] Pool locking code
Zdenek Kabelac
zkabelac at redhat.com
Tue Mar 22 21:21:55 UTC 2011
Adding specific functions to lock and unlock pool, to modify
specific uint64_t*, and to revert all modified uint64_t back.
Current API is the result of several iterations I've made.
To avoid some more rewrites in case this API would still require some
changes - only pool-fast are currently fully updated for this API.
2 ways to debug code:
mprotect - quite fast all the time - but requires more memory and
currently it is using posix_memalign - this would be
later modified to use dm_malloc() and align internally.
crc - checksum of locked pool - implemented via quick simple hash.
But even this gets quite slow when the pool has bigger size -
thus it's not very effiecient to check with every restore,
so the check is made only when reused VG is released.
Probably needs some disscusion where to move futher...
Signed-off-by: Zdenek Kabelac <zkabelac at redhat.com>
---
libdm/libdevmapper.h | 11 ++
libdm/mm/pool-debug.c | 9 ++
libdm/mm/pool-fast.c | 74 ++++++++++++++-
libdm/mm/pool.c | 246 ++++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 337 insertions(+), 3 deletions(-)
diff --git a/libdm/libdevmapper.h b/libdm/libdevmapper.h
index 88bddb1..a0bed97 100644
--- a/libdm/libdevmapper.h
+++ b/libdm/libdevmapper.h
@@ -594,6 +594,17 @@ void *dm_pool_alloc_aligned(struct dm_pool *p, size_t s, unsigned alignment);
void dm_pool_empty(struct dm_pool *p);
void dm_pool_free(struct dm_pool *p, void *ptr);
+/* prevent modification of pool */
+int dm_pool_locked(struct dm_pool *p);
+int dm_pool_lock(struct dm_pool *p, unsigned count)
+ __attribute__((__warn_unused_result__));
+int dm_pool_unlock(struct dm_pool *p)
+ __attribute__((__warn_unused_result__));
+int dm_pool_set_uint64(struct dm_pool *p, uint64_t *addr, uint64_t new_value)
+ __attribute__((__warn_unused_result__));
+int dm_pool_restore(struct dm_pool *p, int check_crc)
+ __attribute__((__warn_unused_result__));
+
/*
* Object building routines:
*
diff --git a/libdm/mm/pool-debug.c b/libdm/mm/pool-debug.c
index fd931e4..206dd84 100644
--- a/libdm/mm/pool-debug.c
+++ b/libdm/mm/pool-debug.c
@@ -33,6 +33,7 @@ struct dm_pool {
struct dm_list list;
const char *name;
void *orig_pool; /* to pair it with first allocation call */
+ unsigned locked;
int begun;
struct block *object;
@@ -81,6 +82,8 @@ static void _free_blocks(struct dm_pool *p, struct block *b)
{
struct block *n;
+ assert(!p->locked);
+
while (b) {
p->stats.bytes -= b->size;
p->stats.blocks_allocated--;
@@ -94,6 +97,8 @@ static void _free_blocks(struct dm_pool *p, struct block *b)
static void _pool_stats(struct dm_pool *p, const char *action)
{
+ assert(!p->locked);
+
#ifdef DEBUG_POOL
log_debug("%s mempool %s: %u/%u bytes, %u/%u blocks, "
"%u allocations)", action, p->name, p->stats.bytes,
@@ -119,6 +124,8 @@ void *dm_pool_alloc(struct dm_pool *p, size_t s)
static void _append_block(struct dm_pool *p, struct block *b)
{
+ assert(!p->locked);
+
if (p->tail) {
p->tail->next = b;
p->tail = b;
@@ -226,6 +233,8 @@ int dm_pool_grow_object(struct dm_pool *p, const void *extra, size_t delta)
struct block *new;
size_t new_size;
+ assert(!p->locked);
+
if (!delta)
delta = strlen(extra);
diff --git a/libdm/mm/pool-fast.c b/libdm/mm/pool-fast.c
index 29d61a4..5d2643f 100644
--- a/libdm/mm/pool-fast.c
+++ b/libdm/mm/pool-fast.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
*
* This file is part of the device-mapper userspace tools.
*
@@ -18,6 +18,8 @@
#endif
#include "dmlib.h"
+#include <sys/mman.h>
+#include <malloc.h>
struct chunk {
char *begin, *end;
@@ -32,6 +34,10 @@ struct dm_pool {
size_t chunk_size;
size_t object_len;
unsigned object_alignment;
+ int locked;
+ long crc;
+ uint64_t *volatile_mem;
+ unsigned volatile_count;
};
static void _align_chunk(struct chunk *c, unsigned alignment);
@@ -74,6 +80,7 @@ void dm_pool_destroy(struct dm_pool *p)
}
dm_list_del(&p->list);
+ dm_free(p->volatile_mem);
dm_free(p);
}
@@ -254,17 +261,37 @@ static struct chunk *_new_chunk(struct dm_pool *p, size_t s)
{
struct chunk *c;
+#ifdef DEBUG_USE_MPROTECT
+ if (!pagesize) {
+ pagesize = getpagesize(); // lvm_pagesize();
+ pagesize_mask = pagesize - 1;
+ }
+#endif
if (p->spare_chunk &&
((p->spare_chunk->end - p->spare_chunk->begin) >= s)) {
/* reuse old chunk */
c = p->spare_chunk;
p->spare_chunk = 0;
} else {
- if (!(c = dm_malloc(s))) {
+#ifdef DEBUG_USE_MPROTECT
+ /*
+ * Allocate page aligned size so malloc could work.
+ * Otherwise page fault would happen from pool unrelated
+ * memory writes of internal malloc pointers.
+ */
+#define aligned_malloc(s) (posix_memalign((void**)&c, pagesize, \
+ ALIGN_ON_PAGE(s)) == 0)
+#else
+#define aligned_malloc(s) (c = dm_malloc(s))
+#endif
+ /* mem for internals */
+ if (!aligned_malloc(s)) {
+#undef aligned_malloc
log_error("Out of memory. Requested %" PRIsize_t
" bytes.", s);
return NULL;
}
+ //log_error("RESET %p %p %d a:%d", c, c + s, (int)s, (int)ALIGN_ON_PAGE(s));
c->begin = (char *) (c + 1);
c->end = (char *) c + s;
@@ -283,3 +310,46 @@ static void _free_chunk(struct chunk *c)
{
dm_free(c);
}
+
+/*
+ * Calc hash from pool memory chunks with internal pointers
+ */
+static long _pool_crc(const struct dm_pool *p)
+{
+ long crc_hash = 0;
+#ifndef DEBUG_USE_MPROTECT
+ const struct chunk *c;
+ const long *ptr, *end;
+
+ for (c = p->chunk; c; c = c->prev) {
+ end = (const long *) (c->begin < c->end ? (long) c->begin & ~7: (long) c->end);
+ ptr = (const long *) c;
+#ifdef VALGRIND_POOL
+ VALGRIND_MAKE_MEM_DEFINED(ptr, (end - ptr) * sizeof(*end));
+#endif
+ while (ptr < end) {
+ crc_hash += *ptr++;
+ crc_hash += (crc_hash << 10);
+ crc_hash ^= (crc_hash >> 6);
+ }
+ }
+#endif
+
+ return crc_hash;
+}
+
+static int _pool_protect(struct dm_pool *p, int prot)
+{
+#ifdef DEBUG_USE_MPROTECT
+ struct chunk *c;
+
+ for (c = p->chunk; c; c = c->prev) {
+ //log_error("protecting %d %p 0x%x", prot, c, (int) ((char*)c->end - (char*)c) - 1);
+ if (mprotect(c, (size_t) ((c->end - (char *) c) - 1), prot) != 0) {
+ log_sys_error("mprotect", "");
+ return 0;
+ }
+ }
+#endif
+ return 1;
+}
diff --git a/libdm/mm/pool.c b/libdm/mm/pool.c
index 608826b..9f036a8 100644
--- a/libdm/mm/pool.c
+++ b/libdm/mm/pool.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
*
* This file is part of the device-mapper userspace tools.
*
@@ -15,10 +15,33 @@
#include "dmlib.h"
+/*
+ * Use mprotect system call to ensure all locked pages are not writeable
+ * Generates segmentation fault with write access to locked pool.
+ *
+ * 1. Implementation is using posix_memalign() to get page aligned
+ * memory blocks (could be implemented also through malloc)
+ *
+ * 2. Only pool-fast is properly handle for now
+ *
+ * 3. As all pools could be locked now - using this aligned malloc for
+ * each pool - this is not memory effiecient
+ *
+ * 4. Checksum is quite slow compared to mprotect if it would be performed
+ * with every restore operation.
+ */
+#define DEBUG_USE_MPROTECT 1
+
/* FIXME: thread unsafe */
static DM_LIST_INIT(_dm_pools);
void dm_pools_check_leaks(void);
+#ifdef DEBUG_USE_MPROTECT
+static size_t pagesize = 0;
+static size_t pagesize_mask = 0;
+#define ALIGN_ON_PAGE(size) (((size) + (pagesize_mask)) & ~(pagesize_mask))
+#endif
+
#ifdef DEBUG_POOL
#include "pool-debug.c"
#else
@@ -75,3 +98,224 @@ void dm_pools_check_leaks(void)
#endif
}
}
+
+/**
+ * Status of locked pool.
+ *
+ * \param p
+ * Pool to be locked.
+ *
+ * \return
+ * 1 (success) when the pool is locked, 0 otherwise.
+ */
+int dm_pool_locked(struct dm_pool *p)
+{
+ return p->locked;
+}
+
+/**
+ * Lock memory pool.
+ *
+ * \param p
+ * Pool to be locked.
+ *
+ * \param count
+ * Specifies the amount of 64bit pairs to be placed in
+ * volatile exception store. When count is 0, pool is
+ * locked without reallocation of exception store and
+ * crc recomputation. This is useful for temporal pool
+ * unlocking and locking.
+ *
+ * \return
+ * 1 (success) when the pool was preperly locked, 0 otherwise.
+ */
+int dm_pool_lock(struct dm_pool *p, unsigned count)
+{
+ if (p->locked) {
+ log_error(INTERNAL_ERROR "Pool is already locked.");
+ return 0;
+ }
+
+ if (count) {
+ if (count != p->volatile_count) {
+ dm_free(p->volatile_mem);
+
+ if (!(p->volatile_mem = dm_malloc(2 * sizeof(uint64_t) * (count + 1)))) {
+ log_error("Failed to allocate exception store for pool.");
+ return 0;
+ }
+
+ p->volatile_count = count;
+ }
+
+ p->volatile_mem[0] = 0; /* Reset volatile buffer */
+ p->crc = _pool_crc(p); /* Get crc for pool */
+ } else if (!p->volatile_count) {
+ log_error(INTERNAL_ERROR "Pool locking without exception store specified.");
+ return 0;
+ }
+
+ if (!_pool_protect(p, PROT_READ)) {
+ _pool_protect(p, PROT_READ | PROT_WRITE);
+ return_0;
+ }
+
+ p->locked = 1;
+
+ log_debug("Pool %p locked.", p);
+
+ return 1;
+}
+
+/**
+ * Unlock memory pool.
+ *
+ * \param p
+ * Pool to be unlocked
+ *
+ * \return
+ * 1 (success) when the pool was properly locked, 0 otherwise.
+ */
+int dm_pool_unlock(struct dm_pool *p)
+{
+ if (!p->locked) {
+ log_error(INTERNAL_ERROR "Pool is already unlocked.");
+ return 0;
+ }
+
+ p->locked = 0;
+
+ if (!_pool_protect(p, PROT_READ | PROT_WRITE))
+ return_0;
+
+ log_debug("Pool %p unlocked.", p);
+
+ return 1;
+}
+
+static int _mprotect_addr(uint64_t *addr, int prot)
+{
+#ifdef DEBUG_USE_MPROTECT
+ int bounds = (((long) addr & (pagesize_mask)) < (pagesize - 8))
+ ? pagesize_mask : pagesize_mask + pagesize;
+
+ if (mprotect((void *) ((long) addr & ~pagesize_mask), bounds, prot)
+ != 0) {
+ log_sys_error("mprotect", (prot == PROT_READ) ? "READ" : "WRITE");
+ return 0;
+ }
+#endif
+ return 1;
+}
+
+/**
+ * Modify uint64_t value in pool
+ *
+ * \param p
+ * Pool which constains modified address.
+ *
+ * \param addr
+ * Address of the modified value. Original content is saved in exception store.
+ *
+ * \param d
+ * New value to be set.
+ *
+ * \return
+ * 1 (success) if the value was updated, 0 otherwise.
+ *
+ * TODO:
+ * More advanced algorithm would be needed if the exception
+ * area size grows to so large number of pairs.
+ * Currently it also appears to be enough to handle just 64bit pairs.
+ */
+int dm_pool_set_uint64(struct dm_pool *p, uint64_t *addr, uint64_t d)
+{
+ unsigned i;
+
+ if (!p->locked) {
+ *addr = d;
+ return 1;
+ }
+
+ for (i = 0; p->volatile_mem[i] != (uint64_t) addr; i += 2)
+ if (!p->volatile_mem[i]) {
+ /* Add new address and original value to exception store */
+ if ((i + 2) >= (p->volatile_count * 2)) {
+ log_error(INTERNAL_ERROR
+ "Exception store with %u size is full.",
+ (unsigned) p->volatile_count);
+ return 0;
+ }
+
+ log_debug("Saving protected %u: %p %" PRIu64
+ " -> %" PRIu64 ".", (unsigned) i / 2,
+ addr, *addr, d);
+ p->volatile_mem[i] = (uint64_t) addr;
+ p->volatile_mem[i + 1] = *addr;
+ p->volatile_mem[i + 2] = 0;
+ break;
+ }
+
+ if (!_mprotect_addr(addr, PROT_READ | PROT_WRITE))
+ return_0;
+
+ log_debug("Changing protected: %p %" PRIu64 " -> %" PRIu64 ".",
+ addr, *addr, d);
+ *addr = d;
+
+ if (!_mprotect_addr(addr, PROT_READ))
+ return_0;
+
+ return 1;
+}
+
+/**
+ * Restore original content in pool and checks whether
+ * it is matching the crc calculated at pool lock.
+ *
+ * \param p
+ * Locked pool to be restored.
+ *
+ * \param check_crc
+ * Boolean whether to check crc after restore.
+ * Checksum of larger pool size is slow.
+ *
+ * \return
+ * 1 (success) if the restore was successful, 0 otherwise.
+ */
+int dm_pool_restore(struct dm_pool *p, int check_crc)
+{
+ unsigned i;
+
+ if (!p->locked) {
+ log_error(INTERNAL_ERROR "Restore of unlocked pool.");
+ return 0;
+ }
+
+ /* Unlock all pages for modified addresses */
+ for (i = 0; p->volatile_mem[i]; i += 2)
+ if (!_mprotect_addr((uint64_t *)(long)p->volatile_mem[i],
+ PROT_READ | PROT_WRITE))
+ return_0;
+
+ /* Restore saved values from exception store */
+ for (i = 0; p->volatile_mem[i]; i += 2) {
+ log_debug("Restoring %u 0x%" PRIx64 " = %" PRIu64 ".", i / 2,
+ p->volatile_mem[i], p->volatile_mem[i + 1]);
+ ((uint64_t *)(long)p->volatile_mem[i])[0] = p->volatile_mem[i + 1];
+ }
+
+ /* Lock again modified pages */
+ for (i = 0; p->volatile_mem[i]; i += 2)
+ if (!_mprotect_addr((uint64_t *)p->volatile_mem[i], PROT_READ))
+ return_0;
+
+ p->volatile_mem[0] = 0;
+
+ if (check_crc && p->crc != _pool_crc(p)) {
+ log_error(INTERNAL_ERROR "Pool content was modified.");
+ return 0;
+ }
+
+ return 1;
+}
--
1.7.4.1
More information about the lvm-devel
mailing list