[Libguestfs] [PATCH] Report last-modified time of hive root and nodes

Richard W.M. Jones rjones at redhat.com
Wed Aug 10 09:44:00 UTC 2011


On Tue, Aug 09, 2011 at 09:54:40PM -0700, Alex‎ Nelson wrote:
> The infrastructure for modified-time reporting has been essentially
> unused.  These changes report the registry time by treating the
> time fields as Windows filetime fields stored in little-Endian
> (which means they can be treated as a single 64-bit little-Endian
> integer).  Some of the code changes necessary include:
> 
>  * Exposing the hive_h structure in the hivex header file (via
>    generator.ml)

You can't make the hive_h internal structure visible in the header
file.  I don't understand why you'd want callers to be delving into
the internals of the handle anyway.  What's the reason for this
change?

>  * Adding an additional argument to the node_start function, which
>    should cause no complications since the change is specific to the
>    C API.

This is a change to the C ABI, which is not allowed.  Instead:

Add a new node_start field to the hivex_visitor struct.  (Obviously
call it something different, like node_start2 or whatever, and it has
to be added to the end of the current struct).

In the code you can differentiate between whether the caller supplied
the old function or the new function:

  if (vtor->node_start2) {
     // call new vtor->node_start2 with extra argument
  } else if (vtor->node_start) {
     // call old vtor->node_start
  }

The last_modified changes in themselves seem quite reasonable to me,
but this patch cannot be applied as-is.

Rich.

> Signed-off-by: Alex Nelson <ajnelson at cs.ucsc.edu>
> ---
>  generator/generator.ml |   44 ++++++++++++++++-
>  lib/hivex.c            |  124 +++++++++++++++++++++++++++++++-----------------
>  xml/hivexml.c          |   11 ++++-
>  3 files changed, 132 insertions(+), 47 deletions(-)
> 
> diff --git a/generator/generator.ml b/generator/generator.ml
> index 31478cd..f1aa799 100755
> --- a/generator/generator.ml
> +++ b/generator/generator.ml
> @@ -695,6 +695,46 @@ extern \"C\" {
>  /* NOTE: This API is documented in the man page hivex(3). */
>  
>  /* Hive handle. */
> +struct hive_h {
> +  char *filename;
> +  int fd;
> +  size_t size;
> +  int msglvl;
> +  int writable;
> +
> +  /* Registry file, memory mapped if read-only, or malloc'd if writing. */
> +  union {
> +    char *addr;
> +    struct ntreg_header *hdr;
> +  };
> +
> +  /* Use a bitmap to store which file offsets are valid (point to a
> +   * used block).  We only need to store 1 bit per 32 bits of the file
> +   * (because blocks are 4-byte aligned).  We found that the average
> +   * block size in a registry file is ~50 bytes.  So roughly 1 in 12
> +   * bits in the bitmap will be set, making it likely a more efficient
> +   * structure than a hash table.
> +   */
> +  char *bitmap;
> +#define BITMAP_SET(bitmap,off) (bitmap[(off)>>5] |= 1 << (((off)>>2)&7))
> +#define BITMAP_CLR(bitmap,off) (bitmap[(off)>>5] &= ~ (1 << (((off)>>2)&7)))
> +#define BITMAP_TST(bitmap,off) (bitmap[(off)>>5] & (1 << (((off)>>2)&7)))
> +#define IS_VALID_BLOCK(h,off)               \
> +  (((off) & 3) == 0 &&                      \
> +   (off) >= 0x1000 &&                       \
> +   (off) < (h)->size &&                     \
> +   BITMAP_TST((h)->bitmap,(off)))
> +
> +  /* Fields from the header, extracted from little-endianness hell. */
> +  size_t rootoffs;              /* Root key offset (always an nk-block). */
> +  size_t endpages;              /* Offset of end of pages. */
> +  char *last_modified;          /* mtime of base block. */
> +
> +  /* For writing. */
> +  size_t endblocks;             /* Offset to next block allocation (0
> +                                   if not allocated anything yet). */
> +};
> +
>  typedef struct hive_h hive_h;
>  
>  /* Nodes and values. */
> @@ -761,7 +801,7 @@ typedef struct hive_set_value hive_set_value;
>   * languages make it much simpler to iterate over a tree.
>   */
>  struct hivex_visitor {
> -  int (*node_start) (hive_h *, void *opaque, hive_node_h, const char *name);
> +  int (*node_start) (hive_h *, void *opaque, hive_node_h, const char *name, const char *last_modified);
>    int (*node_end) (hive_h *, void *opaque, hive_node_h, const char *name);
>    int (*value_string) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *str);
>    int (*value_multiple_strings) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, char **argv);
> @@ -1110,7 +1150,7 @@ terminates immediately.  If you don't need a callback function at
>  all, set the function pointer to NULL.
>  
>   struct hivex_visitor {
> -   int (*node_start) (hive_h *, void *opaque, hive_node_h, const char *name);
> +   int (*node_start) (hive_h *, void *opaque, hive_node_h, const char *name, const char *last_modified);
>     int (*node_end) (hive_h *, void *opaque, hive_node_h, const char *name);
>     int (*value_string) (hive_h *, void *opaque, hive_node_h, hive_value_h,
>           hive_type t, size_t len, const char *key, const char *str);
> diff --git a/lib/hivex.c b/lib/hivex.c
> index fedbb6c..474249f 100644
> --- a/lib/hivex.c
> +++ b/lib/hivex.c
> @@ -33,6 +33,7 @@
>  #include <sys/mman.h>
>  #include <sys/stat.h>
>  #include <assert.h>
> +#include <time.h>
>  
>  #include "c-ctype.h"
>  #include "full-read.h"
> @@ -60,51 +61,12 @@
>  static char *windows_utf16_to_utf8 (/* const */ char *input, size_t len);
>  static size_t utf16_string_len_in_bytes_max (const char *str, size_t len);
>  
> -struct hive_h {
> -  char *filename;
> -  int fd;
> -  size_t size;
> -  int msglvl;
> -  int writable;
> -
> -  /* Registry file, memory mapped if read-only, or malloc'd if writing. */
> -  union {
> -    char *addr;
> -    struct ntreg_header *hdr;
> -  };
> -
> -  /* Use a bitmap to store which file offsets are valid (point to a
> -   * used block).  We only need to store 1 bit per 32 bits of the file
> -   * (because blocks are 4-byte aligned).  We found that the average
> -   * block size in a registry file is ~50 bytes.  So roughly 1 in 12
> -   * bits in the bitmap will be set, making it likely a more efficient
> -   * structure than a hash table.
> -   */
> -  char *bitmap;
> -#define BITMAP_SET(bitmap,off) (bitmap[(off)>>5] |= 1 << (((off)>>2)&7))
> -#define BITMAP_CLR(bitmap,off) (bitmap[(off)>>5] &= ~ (1 << (((off)>>2)&7)))
> -#define BITMAP_TST(bitmap,off) (bitmap[(off)>>5] & (1 << (((off)>>2)&7)))
> -#define IS_VALID_BLOCK(h,off)               \
> -  (((off) & 3) == 0 &&                      \
> -   (off) >= 0x1000 &&                       \
> -   (off) < (h)->size &&                     \
> -   BITMAP_TST((h)->bitmap,(off)))
> -
> -  /* Fields from the header, extracted from little-endianness hell. */
> -  size_t rootoffs;              /* Root key offset (always an nk-block). */
> -  size_t endpages;              /* Offset of end of pages. */
> -
> -  /* For writing. */
> -  size_t endblocks;             /* Offset to next block allocation (0
> -                                   if not allocated anything yet). */
> -};
> -
>  /* NB. All fields are little endian. */
>  struct ntreg_header {
>    char magic[4];                /* "regf" */
>    uint32_t sequence1;
>    uint32_t sequence2;
> -  char last_modified[8];
> +  uint64_t last_modified;
>    uint32_t major_ver;           /* 1 */
>    uint32_t minor_ver;           /* 3 */
>    uint32_t unknown5;            /* 0 */
> @@ -173,7 +135,7 @@ struct ntreg_nk_record {
>    int32_t seg_len;              /* length (always -ve because used) */
>    char id[2];                   /* "nk" */
>    uint16_t flags;
> -  char timestamp[8];
> +  uint64_t timestamp;
>    uint32_t unknown1;
>    uint32_t parent;              /* offset of owner/parent */
>    uint32_t nr_subkeys;          /* number of subkeys */
> @@ -265,7 +227,34 @@ header_checksum (const hive_h *h)
>    return sum;
>  }
>  
> +#define WINDOWS_TICK 10000000LL
> +#define SEC_TO_UNIX_EPOCH 11644473600LL
> +/**
> + * Convert Windows filetime to ISO 8601 format.
> + * Source for filetime->time_t conversion:  http://stackoverflow.com/questions/6161776/convert-windows-filetime-to-second-in-unix-linux/6161842#6161842
> + * Source for time_t->char* conversion:  Fiwalk version 0.6.14's fiwalk.cpp.
> + * @param windows_ticks Expected to not have any remaining Endian issues.
> + */
> +int
> +filetime_to_8601 (char *buf, int bufsize, uint64_t windows_ticks)
> +{
> +  if (buf == NULL) {
> +    fprintf (stderr, "filetime_to_8601: Received null output buffer, unable to proceed.\n");
> +    return -1;
> +  }
> +  uint64_t nanos = windows_ticks % WINDOWS_TICK;
> +  time_t tt = (windows_ticks / WINDOWS_TICK - SEC_TO_UNIX_EPOCH);
> +  struct tm time_tm;
> +  if (gmtime_r (&tt, &time_tm) == NULL) {
> +    fprintf (stderr, "filetime_to_8601: Error running gmtime_r on timestamp (decimal hundreds of ns: %" PRIu64 ").\n", windows_ticks);
> +    return -1;
> +  }
> +  strftime(buf, bufsize, "%FT%TZ", &time_tm);
> +  return 0;
> +}
> +
>  #define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG)
> +#define TIMESTAMP_BUF_LEN 32
>  
>  hive_h *
>  hivex_open (const char *filename, int flags)
> @@ -359,6 +348,15 @@ hivex_open (const char *filename, int flags)
>      goto error;
>    }
>  
> +  /* Last-modified time. */
> +  h->last_modified = (char *) calloc(1 + TIMESTAMP_BUF_LEN, sizeof(char));
> +  int ft_rc = filetime_to_8601(h->last_modified, TIMESTAMP_BUF_LEN, le64toh ((uint64_t) h->hdr->last_modified));
> +  if (ft_rc) {
> +    fprintf (stderr, "hivex: failed to parse time value\n");
> +    free(h->last_modified);
> +    h->last_modified = NULL;
> +  }
> +
>    if (h->msglvl >= 2) {
>      char *name = windows_utf16_to_utf8 (h->hdr->name, 64);
>  
> @@ -367,6 +365,8 @@ hivex_open (const char *filename, int flags)
>               "  file version             %" PRIu32 ".%" PRIu32 "\n"
>               "  sequence nos             %" PRIu32 " %" PRIu32 "\n"
>               "    (sequences nos should match if hive was synched at shutdown)\n"
> +             "  last modified            %s\n"
> +             "    (decimal, 100 ns)      %" PRIu64 "\n"
>               "  original file name       %s\n"
>               "    (only 32 chars are stored, name is probably truncated)\n"
>               "  root offset              0x%x + 0x1000\n"
> @@ -374,6 +374,8 @@ hivex_open (const char *filename, int flags)
>               "  checksum                 0x%x (calculated 0x%x)\n",
>               major_ver, le32toh (h->hdr->minor_ver),
>               le32toh (h->hdr->sequence1), le32toh (h->hdr->sequence2),
> +             h->last_modified,
> +             le64toh (h->hdr->last_modified),
>               name ? name : "(conversion failed)",
>               le32toh (h->hdr->offset),
>               le32toh (h->hdr->blocks), h->size,
> @@ -541,6 +543,10 @@ hivex_close (hive_h *h)
>    if (h->msglvl >= 1)
>      fprintf (stderr, "hivex_close\n");
>  
> +  if (h->last_modified) {
> +    free (h->last_modified);
> +    h->last_modified = NULL;
> +  }
>    free (h->bitmap);
>    if (!h->writable)
>      munmap (h->addr, h->size);
> @@ -608,6 +614,33 @@ hivex_node_name (hive_h *h, hive_node_h node)
>    return ret;
>  }
>  
> +/* Caller responsible for freeing returned char* if non-null. */
> +char *
> +hivex_node_mtime (hive_h *h, hive_node_h node)
> +{
> +  int ft_rc;
> +  if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
> +    errno = EINVAL;
> +    return NULL;
> +  }
> +
> +  struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
> +
> +  char *ret = calloc (32 + 1, sizeof(char));
> +  if (ret == NULL)
> +    return ret;
> +  ft_rc = filetime_to_8601 (ret, 32, le64toh (nk->timestamp));
> +  if (h->msglvl >= 2) {
> +    fprintf(stderr, "hivex_node_mtime: nk->timestamp: %" PRIu64 "\n", nk->timestamp);
> +    fprintf(stderr, "hivex_node_mtime: ret: %s\n", ret);
> +  }
> +  if (ft_rc) {
> +    free(ret);
> +    ret = NULL;
> +  }
> +  return ret;
> +}
> +
>  #if 0
>  /* I think the documentation for the sk and classname fields in the nk
>   * record is wrong, or else the offset field is in the wrong place.
> @@ -1560,6 +1593,7 @@ hivex__visit_node (hive_h *h, hive_node_h node,
>  {
>    int skip_bad = flags & HIVEX_VISIT_SKIP_BAD;
>    char *name = NULL;
> +  char *last_modified = NULL;
>    hive_value_h *values = NULL;
>    hive_node_h *children = NULL;
>    char *key = NULL;
> @@ -1584,8 +1618,11 @@ hivex__visit_node (hive_h *h, hive_node_h node,
>    BITMAP_CLR (unvisited, node);
>  
>    name = hivex_node_name (h, node);
> +  last_modified = hivex_node_mtime (h, node);
> +  if (h->msglvl >= 2)
> +    fprintf(stderr, "hivex__visit_node: last_modified: %s\n", last_modified ? last_modified : "NULL" );
>    if (!name) return skip_bad ? 0 : -1;
> -  if (vtor->node_start && vtor->node_start (h, opaque, node, name) == -1)
> +  if (vtor->node_start && vtor->node_start (h, opaque, node, name, last_modified) == -1)
>      goto error;
>  
>    values = hivex_node_values (h, node);
> @@ -1764,6 +1801,7 @@ hivex__visit_node (hive_h *h, hive_node_h node,
>  
>   error:
>    free (name);
> +  free (last_modified);
>    free (values);
>    free (children);
>    free (key);
> @@ -2264,7 +2302,7 @@ hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name)
>    nk->sk = htole32 (parent_sk_offset - 0x1000);
>  
>    /* Inherit parent timestamp. */
> -  memcpy (nk->timestamp, parent_nk->timestamp, sizeof (parent_nk->timestamp));
> +  nk->timestamp = parent_nk->timestamp;
>  
>    /* What I found out the hard way (not documented anywhere): the
>     * subkeys in lh-records must be kept sorted.  If you just add a
> diff --git a/xml/hivexml.c b/xml/hivexml.c
> index 90cb22b..b32eaa2 100644
> --- a/xml/hivexml.c
> +++ b/xml/hivexml.c
> @@ -40,7 +40,7 @@
>  #endif
>  
>  /* Callback functions. */
> -static int node_start (hive_h *, void *, hive_node_h, const char *name);
> +static int node_start (hive_h *, void *, hive_node_h, const char *name, const char *last_modified);
>  static int node_end (hive_h *, void *, hive_node_h, const char *name);
>  static int value_string (hive_h *, void *, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *str);
>  static int value_multiple_strings (hive_h *, void *, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, char **argv);
> @@ -123,6 +123,8 @@ main (int argc, char *argv[])
>  
>    XML_CHECK (xmlTextWriterStartDocument, (writer, NULL, "utf-8", NULL));
>    XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "hive"));
> +  if (h->last_modified)
> +    XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "mtime", BAD_CAST (h->last_modified)));
>  
>    if (hivex_visit (h, &visitor, sizeof visitor, writer, visit_flags) == -1) {
>      perror (argv[optind]);
> @@ -142,11 +144,16 @@ main (int argc, char *argv[])
>  }
>  
>  static int
> -node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name)
> +node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name, const char *last_modified)
>  {
>    xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
>    XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "node"));
>    XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "name", BAD_CAST name));
> +  if (last_modified) {
> +    XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "mtime", BAD_CAST last_modified));
> +  } else {
> +    fprintf(stderr, "node_start:  last_modified came across NULL.\n");
> +  }
>    return 0;
>  }
>  
> -- 
> 1.7.6
> 

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
virt-df lists disk usage of guests without needing to install any
software inside the virtual machine.  Supports Linux and Windows.
http://et.redhat.com/~rjones/virt-df/




More information about the Libguestfs mailing list