[Libguestfs] [PATCH 7/12] Tools for analyzing and reverse engineering hive files.
Richard W.M. Jones
rjones at redhat.com
Wed Feb 3 18:33:52 UTC 2010
--
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
libguestfs lets you edit virtual machines. Supports shell scripting,
bindings from many languages. http://et.redhat.com/~rjones/libguestfs/
See what it can do: http://et.redhat.com/~rjones/libguestfs/recipes.html
-------------- next part --------------
>From d2f6064200473d156fa1da70e5b87267dde6b22e Mon Sep 17 00:00:00 2001
From: Richard Jones <rjones at redhat.com>
Date: Wed, 3 Feb 2010 17:35:53 +0000
Subject: [PATCH 07/12] Tools for analyzing and reverse engineering hive files.
This commit is not of general interest. It contains the tools which
I used to reverse engineer the hive format and to test changes.
Keeping these with the rest of the code is useful in case in future
we encounter a hive file that we fail to modify.
Note that the tools are not compiled by default. You have to compile
each explicitly with:
make -C hivex/tools <toolname>.opt
---
.gitignore | 1 +
configure.ac | 1 +
hivex/Makefile.am | 2 +
hivex/tools/Makefile.am | 54 +++
hivex/tools/clearheaderfields.ml | 112 +++++
hivex/tools/fillemptyhbins.ml | 74 ++++
hivex/tools/truncatefile.ml | 112 +++++
hivex/tools/visualizer.ml | 845 +++++++++++++++++++++++++++++++++++++
hivex/tools/visualizer_NT_time.ml | 30 ++
hivex/tools/visualizer_utils.ml | 160 +++++++
10 files changed, 1391 insertions(+), 0 deletions(-)
create mode 100644 hivex/tools/Makefile.am
create mode 100644 hivex/tools/clearheaderfields.ml
create mode 100644 hivex/tools/fillemptyhbins.ml
create mode 100644 hivex/tools/truncatefile.ml
create mode 100644 hivex/tools/visualizer.ml
create mode 100644 hivex/tools/visualizer_NT_time.ml
create mode 100644 hivex/tools/visualizer_utils.ml
diff --git a/.gitignore b/.gitignore
index 49daccd..899e973 100644
--- a/.gitignore
+++ b/.gitignore
@@ -84,6 +84,7 @@ hivex/*.1
hivex/*.3
hivex/hivexsh
hivex/hivexml
+hivex/tools/*.opt
html/guestfish.1.html
html/guestfs.3.html
html/guestmount.1.html
diff --git a/configure.ac b/configure.ac
index fda4773..134cebd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -737,6 +737,7 @@ AC_CONFIG_FILES([Makefile
gnulib/lib/Makefile
gnulib/tests/Makefile
hivex/Makefile
+ hivex/tools/Makefile
fuse/Makefile
ocaml/META perl/Makefile.PL])
AC_OUTPUT
diff --git a/hivex/Makefile.am b/hivex/Makefile.am
index 1adbbd8..5624b16 100644
--- a/hivex/Makefile.am
+++ b/hivex/Makefile.am
@@ -15,6 +15,8 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+SUBDIRS = tools
+
EXTRA_DIST = hivex.pod hivexml.pod hivexget.pod hivexsh.pod LICENSE
lib_LTLIBRARIES = libhivex.la
diff --git a/hivex/tools/Makefile.am b/hivex/tools/Makefile.am
new file mode 100644
index 0000000..b744352
--- /dev/null
+++ b/hivex/tools/Makefile.am
@@ -0,0 +1,54 @@
+# libguestfs
+# Copyright (C) 2009 Red Hat Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+# OCaml Windows Registry visualizer. This was used while reverse
+# engineering the hive format, and is not normally compiled. If you
+# do with to compile it, you'll need ocaml-bitstring-devel and
+# ocaml-extlib-devel. Also you'll need a collection of hive files
+# from Windows machines to experiment with.
+#
+# We use '-w y' (disable unused variable warnings) because these
+# warnings aren't very reliable with heavily preprocessed code like
+# that produced by bitstring.
+
+EXTRA_DIST = \
+ visualizer.ml \
+ visualizer_utils.ml \
+ visualizer_NT_time.ml \
+ clearheaderfields.ml \
+ fillemptyhbins.ml \
+ truncatefile.ml
+
+visualizer.opt: visualizer_utils.ml visualizer_NT_time.ml visualizer.ml
+ ocamlfind ocamlopt -w y \
+ -package bitstring,bitstring.syntax,extlib \
+ -syntax camlp4 -linkpkg $^ -o $@
+
+fillemptyhbins.opt: fillemptyhbins.ml
+ ocamlfind ocamlopt -w y \
+ -package bitstring,bitstring.syntax,extlib \
+ -syntax camlp4 -linkpkg $^ -o $@
+
+clearheaderfields.opt: visualizer_utils.ml clearheaderfields.ml
+ ocamlfind ocamlopt -w y \
+ -package bitstring,bitstring.syntax,extlib \
+ -syntax camlp4 -linkpkg $^ -o $@
+
+truncatefile.opt: visualizer_utils.ml truncatefile.ml
+ ocamlfind ocamlopt -w y \
+ -package bitstring,bitstring.syntax,extlib \
+ -syntax camlp4 -linkpkg $^ -o $@
diff --git a/hivex/tools/clearheaderfields.ml b/hivex/tools/clearheaderfields.ml
new file mode 100644
index 0000000..d055553
--- /dev/null
+++ b/hivex/tools/clearheaderfields.ml
@@ -0,0 +1,112 @@
+(* Windows Registry reverse-engineering tool.
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *)
+
+open Bitstring
+open ExtString
+open Printf
+open Visualizer_utils
+
+let () =
+ if Array.length Sys.argv <> 2 then (
+ eprintf "Error: missing argument.
+Usage: %s hivefile
+" Sys.executable_name;
+ exit 1
+ )
+
+let filename = Sys.argv.(1)
+
+(* Load the file. *)
+let bits = bitstring_of_file filename
+
+(* Split into header + data at the 4KB boundary. *)
+let header, data = takebits (4096 * 8) bits, dropbits (4096 * 8) bits
+
+(* Read the header fields. *)
+let seq, last_modified, major, minor, unknown1, unknown2,
+ root_key, end_pages, unknown3, fname =
+ bitmatch header with
+ | { "regf" : 4*8 : string;
+ seq1 : 4*8 : littleendian;
+ seq2 : 4*8 : littleendian;
+ last_modified : 64 : bitstring;
+ major : 4*8 : littleendian;
+ minor : 4*8 : littleendian;
+ unknown1 : 4*8 : littleendian;
+ unknown2 : 4*8 : littleendian;
+ root_key : 4*8 : littleendian;
+ end_pages : 4*8 : littleendian;
+ unknown3 : 4*8 : littleendian;
+ fname : 64*8 : string;
+ unknownguid1 : 16*8 : bitstring;
+ unknownguid2 : 16*8 : bitstring;
+ unknown4 : 4*8 : littleendian;
+ unknownguid3 : 16*8 : bitstring;
+ unknown5 : 4*8 : string;
+ unknown6 : 340*8 : bitstring;
+ csum : 4*8
+ : littleendian, save_offset_to (crc_offset),
+ check (assert (crc_offset = 0x1fc * 8); true);
+ unknown7 : (0x1000-0x200)*8 : bitstring } ->
+ seq1, last_modified, major, minor, unknown1, unknown2,
+ root_key, end_pages, unknown3, fname
+ | {_} -> assert false
+
+(* Create a new header, but with unknown fields cleared. Do it in
+ * two parts, first creating everything up to the checksum, then
+ * calculating the checksum and appending checksum and the final
+ * field.
+ *)
+let header =
+ let zeroguid = zeroes_bitstring (16*8) in
+ let before_csum =
+ BITSTRING {
+ "regf" : 4*8 : string;
+ seq : 4*8 : littleendian;
+ seq : 4*8 : littleendian;
+ last_modified : 64 : bitstring;
+ major : 4*8 : littleendian;
+ minor : 4*8 : littleendian;
+ unknown1 : 4*8 : littleendian;
+ unknown2 : 4*8 : littleendian;
+ root_key : 4*8 : littleendian;
+ end_pages : 4*8 : littleendian;
+ unknown3 : 4*8 : littleendian;
+ fname : 64*8 : string;
+ zeroguid : 16*8 : bitstring;
+ zeroguid : 16*8 : bitstring;
+ 0_l : 4*8 : littleendian;
+ zeroguid : 16*8 : bitstring;
+ 0_l : 4*8 : littleendian;
+ zeroes_bitstring (340*8) : 340*8 : bitstring
+ } in
+ assert (bitstring_length before_csum = 0x1fc * 8);
+ let csum = bitstring_fold_left_int32_le Int32.logxor 0_l before_csum in
+ let csum_and_after =
+ BITSTRING {
+ csum : 4*8 : littleendian;
+ zeroes_bitstring ((0x1000-0x200)*8) : (0x1000-0x200)*8 : bitstring
+ } in
+ let new_header = concat [before_csum; csum_and_after] in
+ assert (bitstring_length header = bitstring_length new_header);
+ new_header
+
+(* Write it. *)
+let () =
+ let file = concat [header; data] in
+ bitstring_to_file file filename
diff --git a/hivex/tools/fillemptyhbins.ml b/hivex/tools/fillemptyhbins.ml
new file mode 100644
index 0000000..14eae96
--- /dev/null
+++ b/hivex/tools/fillemptyhbins.ml
@@ -0,0 +1,74 @@
+(* Windows Registry reverse-engineering tool.
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *)
+
+open Bitstring
+open ExtString
+open Printf
+
+let () =
+ if Array.length Sys.argv <> 3 then (
+ eprintf "Error: missing argument.
+Usage: %s hivefile startoffset
+" Sys.executable_name;
+ exit 1
+ )
+
+let filename = Sys.argv.(1)
+let offset = int_of_string Sys.argv.(2)
+
+(* Load the file. *)
+let bits = bitstring_of_file filename
+
+(* Split into header + data at the 4KB boundary. *)
+let header, data = takebits (4096 * 8) bits, dropbits (4096 * 8) bits
+
+(* Overwrite everything after @offset, so ... *)
+let nrpages = (bitstring_length data / 8 - offset) / 4096
+let data = takebits (offset * 8) data
+
+(* Create the empty pages. They're not all the same because each
+ * page contains its own page_offset.
+ *)
+let pages =
+ let noblock =
+ let seg_len = 4096 - 32 in
+ let zeroes = zeroes_bitstring ((seg_len - 4) * 8) in
+ BITSTRING {
+ Int32.of_int seg_len : 4*8 : littleendian;
+ zeroes : (seg_len - 4) * 8 : bitstring
+ } in
+ let zeroes = zeroes_bitstring (20*8) in
+ let rec loop page_offset i =
+ if i < nrpages then (
+ let page =
+ BITSTRING {
+ "hbin" : 4*8 : string;
+ Int32.of_int page_offset : 4*8 : littleendian;
+ 4096_l : 4*8 : littleendian; (* page length *)
+ zeroes : 20*8 : bitstring;
+ noblock : (4096 - 32) * 8 : bitstring
+ } in
+ page :: loop (page_offset + 4096) (i+1)
+ ) else []
+ in
+ loop offset 0
+
+(* Write it. *)
+let () =
+ let file = concat (header :: data :: pages) in
+ bitstring_to_file file filename
diff --git a/hivex/tools/truncatefile.ml b/hivex/tools/truncatefile.ml
new file mode 100644
index 0000000..b519f7a
--- /dev/null
+++ b/hivex/tools/truncatefile.ml
@@ -0,0 +1,112 @@
+(* Windows Registry reverse-engineering tool.
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *)
+
+open Bitstring
+open ExtString
+open Printf
+open Visualizer_utils
+
+let () =
+ if Array.length Sys.argv <> 3 then (
+ eprintf "Error: missing argument.
+Usage: %s hivefile endpages
+" Sys.executable_name;
+ exit 1
+ )
+
+let filename = Sys.argv.(1)
+let new_end_pages = int_of_string Sys.argv.(2)
+
+(* Load the file. *)
+let bits = bitstring_of_file filename
+
+(* Split into header + data at the 4KB boundary. *)
+let header, data = takebits (4096 * 8) bits, dropbits (4096 * 8) bits
+
+(* Truncate the file data. *)
+let data = takebits (new_end_pages * 8) data
+
+(* Read the header fields. *)
+let seq, last_modified, major, minor, unknown1, unknown2,
+ root_key, end_pages, unknown3, fname =
+ bitmatch header with
+ | { "regf" : 4*8 : string;
+ seq1 : 4*8 : littleendian;
+ seq2 : 4*8 : littleendian;
+ last_modified : 64 : bitstring;
+ major : 4*8 : littleendian;
+ minor : 4*8 : littleendian;
+ unknown1 : 4*8 : littleendian;
+ unknown2 : 4*8 : littleendian;
+ root_key : 4*8 : littleendian;
+ end_pages : 4*8 : littleendian;
+ unknown3 : 4*8 : littleendian;
+ fname : 64*8 : string;
+ unknownguid1 : 16*8 : bitstring;
+ unknownguid2 : 16*8 : bitstring;
+ unknown4 : 4*8 : littleendian;
+ unknownguid3 : 16*8 : bitstring;
+ unknown5 : 4*8 : string;
+ unknown6 : 340*8 : bitstring;
+ csum : 4*8
+ : littleendian, save_offset_to (crc_offset),
+ check (assert (crc_offset = 0x1fc * 8); true);
+ unknown7 : (0x1000-0x200)*8 : bitstring } ->
+ seq1, last_modified, major, minor, unknown1, unknown2,
+ root_key, end_pages, unknown3, fname
+ | {_} -> assert false
+
+(* Create a new header, with endpages updated. *)
+let header =
+ let zeroguid = zeroes_bitstring (16*8) in
+ let before_csum =
+ BITSTRING {
+ "regf" : 4*8 : string;
+ seq : 4*8 : littleendian;
+ seq : 4*8 : littleendian;
+ last_modified : 64 : bitstring;
+ major : 4*8 : littleendian;
+ minor : 4*8 : littleendian;
+ unknown1 : 4*8 : littleendian;
+ unknown2 : 4*8 : littleendian;
+ root_key : 4*8 : littleendian;
+ Int32.of_int new_end_pages : 4*8 : littleendian;
+ unknown3 : 4*8 : littleendian;
+ fname : 64*8 : string;
+ zeroguid : 16*8 : bitstring;
+ zeroguid : 16*8 : bitstring;
+ 0_l : 4*8 : littleendian;
+ zeroguid : 16*8 : bitstring;
+ 0_l : 4*8 : littleendian;
+ zeroes_bitstring (340*8) : 340*8 : bitstring
+ } in
+ assert (bitstring_length before_csum = 0x1fc * 8);
+ let csum = bitstring_fold_left_int32_le Int32.logxor 0_l before_csum in
+ let csum_and_after =
+ BITSTRING {
+ csum : 4*8 : littleendian;
+ zeroes_bitstring ((0x1000-0x200)*8) : (0x1000-0x200)*8 : bitstring
+ } in
+ let new_header = concat [before_csum; csum_and_after] in
+ assert (bitstring_length header = bitstring_length new_header);
+ new_header
+
+(* Write it. *)
+let () =
+ let file = concat [header; data] in
+ bitstring_to_file file filename
diff --git a/hivex/tools/visualizer.ml b/hivex/tools/visualizer.ml
new file mode 100644
index 0000000..437b90d
--- /dev/null
+++ b/hivex/tools/visualizer.ml
@@ -0,0 +1,845 @@
+(* Windows Registry reverse-engineering tool.
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * For existing information on the registry format, please refer
+ * to the following documents. Note they are both incomplete
+ * and inaccurate in some respects.
+ *
+ * http://www.sentinelchicken.com/data/TheWindowsNTRegistryFileFormat.pdf
+ * http://pogostick.net/~pnh/ntpasswd/WinReg.txt
+ *)
+
+open Bitstring
+open ExtString
+open Printf
+open Visualizer_utils
+open Visualizer_NT_time
+
+let () =
+ if Array.length Sys.argv <> 2 then (
+ eprintf "Error: missing argument.
+Usage: %s hivefile > out
+where
+ 'hivefile' is the input hive file from a Windows machine
+ 'out' is an output file where we will write all the keys,
+ values etc for extended debugging purposes.
+Errors, inconsistencies and unexpected fields in the hive file
+are written to stderr.
+" Sys.executable_name;
+ exit 1
+ )
+
+let filename = Sys.argv.(1)
+let basename = Filename.basename filename
+
+(* Load the file. *)
+let bits = bitstring_of_file filename
+
+(* Split into header + data at the 4KB boundary. *)
+let header, data = takebits (4096 * 8) bits, dropbits (4096 * 8) bits
+
+(* Define a persistent pattern which matches the header fields. By
+ * using persistent patterns, we can reuse them later in the
+ * program.
+ *)
+let bitmatch header_fields =
+ { "regf" : 4*8 : string;
+ seq1 : 4*8 : littleendian;
+ seq2 : 4*8 : littleendian;
+ last_modified : 64
+ : littleendian, bind (nt_to_time_t last_modified);
+ major : 4*8 : littleendian;
+ minor : 4*8 : littleendian;
+
+ (* "Type". Contains 0. *)
+ unknown1 : 4*8 : littleendian;
+
+ (* "Format". Contains 1. *)
+ unknown2 : 4*8 : littleendian;
+
+ root_key : 4*8
+ : littleendian, bind (get_offset root_key);
+ end_pages : 4*8
+ : littleendian, bind (get_offset end_pages);
+
+ (* "Cluster". Contains 1. *)
+ unknown3 : 4*8 : littleendian;
+
+ filename : 64*8 : string;
+
+ (* All three GUIDs here confirmed in Windows 7 registries. In
+ * Windows <= 2003 these GUID fields seem to contain junk.
+ *
+ * If you write zeroes to the GUID fields, load and unload in Win7
+ * REGEDIT, then Windows 7 writes some random GUIDs.
+ *
+ * Also (on Win7) unknownguid1 == unknownguid2. unknownguid3 is
+ * different.
+ *)
+ unknownguid1 : 16*8 : bitstring;
+ unknownguid2 : 16*8 : bitstring;
+
+ (* Wrote zero to unknown4, loaded and unloaded it in Win7 REGEDIT,
+ * and it still contained zero. In existing registries it seems to
+ * contain random junk.
+ *)
+ unknown4 : 4*8 : littleendian;
+ unknownguid3 : 16*8 : bitstring;
+
+ (* If you write zero to unknown5, load and unload it in REGEDIT,
+ * Windows 7 puts the string "rmtm" here. Existing registries also
+ * seen containing this string. However on older Windows it can
+ * be all zeroes.
+ *)
+ unknown5 : 4*8 : string;
+
+ (* This seems to contain junk from other parts of the registry. I
+ * wrote zeroes here, loaded and unloaded it in Win7 REGEDIT, and
+ * it still contained zeroes.
+ *)
+ unknown6 : 340*8 : bitstring;
+ csum : 4*8
+ : littleendian, save_offset_to (crc_offset),
+ check (assert (crc_offset = 0x1fc * 8); true);
+ unknown7 : (0x1000-0x200)*8 : bitstring }
+
+let fprintf_header chan bits =
+ bitmatch bits with
+ | { :header_fields } ->
+ fprintf chan
+ "HD %6ld %6ld %s %ld.%ld %08lx %08lx %s %s %08lx %s %s %s %08lx %s %s %s %08lx %s\n"
+ seq1 seq2 (print_time last_modified) major minor
+ unknown1 unknown2
+ (print_offset root_key) (print_offset end_pages)
+ unknown3 (print_utf16 filename)
+ (print_guid unknownguid1) (print_guid unknownguid2)
+ unknown4 (print_guid unknownguid3) unknown5
+ (print_bitstring unknown6)
+ csum (print_bitstring unknown7)
+
+(* Parse the header and check it. *)
+let root_key, end_pages =
+ bitmatch header with
+ | { :header_fields } ->
+ fprintf_header stdout header;
+
+ if major <> 1_l then
+ eprintf "HD hive file major <> 1 (major.minor = %ld.%ld)\n"
+ major minor;
+ if seq1 <> seq2 then
+ eprintf "HD hive file sequence numbers should match (%ld <> %ld)\n"
+ seq1 seq2;
+ if unknown1 <> 0_l then
+ eprintf "HD unknown1 field <> 0 (%08lx)\n" unknown1;
+ if unknown2 <> 1_l then
+ eprintf "HD unknown2 field <> 1 (%08lx)\n" unknown2;
+ if unknown3 <> 1_l then
+ eprintf "HD unknown3 field <> 1 (%08lx)\n" unknown3;
+ if not (equals unknownguid1 unknownguid2) then
+ eprintf "HD unknownguid1 <> unknownguid2 (%s, %s)\n"
+ (print_guid unknownguid1) (print_guid unknownguid2);
+ (* We think this is junk.
+ if unknown4 <> 0_l then
+ eprintf "HD unknown4 field <> 0 (%08lx)\n" unknown4;
+ *)
+ if unknown5 <> "rmtm" && unknown5 <> "\000\000\000\000" then
+ eprintf "HD unknown5 field <> \"rmtm\" & <> zeroes (%s)\n" unknown5;
+ (* We think this is junk.
+ if not (is_zero_bitstring unknown6) then
+ eprintf "HD unknown6 area is not zero (%s)\n"
+ (print_bitstring unknown6);
+ *)
+ if not (is_zero_bitstring unknown7) then
+ eprintf "HD unknown7 area is not zero (%s)\n"
+ (print_bitstring unknown7);
+
+ root_key, end_pages
+ | {_} ->
+ failwithf "%s: this doesn't look like a registry hive file\n" basename
+
+(* Define persistent patterns to match page and block fields. *)
+let bitmatch page_fields =
+ { "hbin" : 4*8 : string;
+ page_offset : 4*8
+ : littleendian, bind (get_offset page_offset);
+ page_size : 4*8
+ : littleendian, check (Int32.rem page_size 4096_l = 0_l),
+ bind (Int32.to_int page_size);
+
+ (* In the first hbin in the file these fields contain something.
+ * In subsequent hbins these fields are all zero.
+ *
+ * From existing hives (first hbin only):
+ *
+ * unknown1 unknown2 unknown5
+ * 00 00 00 00 00 00 00 00 9C 77 3B 02 6A 7D CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 50 3A 15 07 B5 9B CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 57 86 90 D4 9A 58 CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 52 3F 90 9D CF 7C CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 E8 86 C1 17 BD 06 CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 4A 77 CE 7A CF 7C CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 E4 EA 23 FF 69 7D CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 50 13 BA 8D A2 9A CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 0E 07 93 13 BD 06 CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 9D 55 D0 B3 99 58 CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 46 AC FF 8B CF 7C CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 80 29 2D 02 6A 7D CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 90 8D 36 07 B5 9B CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 5C 9B 8B B8 6A 06 CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 85 9F BB 99 9A 58 CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 BE 3D 21 02 6A 7D CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 70 53 09 07 B5 9B CA 01 00 00 00 00
+ * 00 00 00 00 00 00 00 00 5B 62 42 B6 9A 58 CA 01 00 00 00 00
+ * 01 00 00 00 00 00 00 00 B2 46 9B 9E CF 7C CA 01 00 00 00 00
+ * 01 00 00 00 00 00 00 00 CA 88 EE 1A BD 06 CA 01 00 00 00 00
+ *
+ * From the above we worked out that fields 3 and 4 are an NT
+ * timestamp, which seems to be "last modified" (when REGEDIT
+ * unloads a hive it updates this timestamp even if nothing
+ * has been changed).
+ *)
+ unknown1 : 4*8 : littleendian; (* usually zero, occasionally 1 *)
+ unknown2 : 4*8 : littleendian; (* always zero? *)
+ last_modified : 64
+ : littleendian,
+ bind (if page_offset = 0 then nt_to_time_t last_modified
+ else (
+ assert (last_modified = 0_L);
+ 0.
+ )
+ );
+ (* The "B.D." document said this field contains the page size, but
+ * this is not true. This misinformation has been copied to the
+ * sentinelchicken documentation too.
+ *)
+ unknown5 : 4*8 : littleendian; (* always zero? *)
+
+ (* Now the blocks in this page follow. *)
+ blocks : (page_size - 32) * 8 : bitstring;
+
+ rest : -1 : bitstring }
+
+let fprintf_page chan bits =
+ bitmatch bits with
+ | { :page_fields } ->
+ fprintf chan "HB %s %08x %08lx %08lx %s %08lx\n"
+ (print_offset page_offset)
+ page_size unknown1 unknown2
+ (if page_offset = 0 then print_time last_modified
+ else string_of_float last_modified) unknown5
+
+let bitmatch block_fields =
+ { seg_len : 4*8
+ : littleendian, bind (Int32.to_int seg_len);
+ block_data : (abs seg_len - 4) * 8 : bitstring;
+ rest : -1 : bitstring }
+
+let fprintf_block chan block_offset bits =
+ bitmatch bits with
+ | { :block_fields } ->
+ fprintf chan "BL %s %s %d\n"
+ (print_offset block_offset)
+ (if seg_len < 0 then "used" else "free")
+ (if seg_len < 0 then -seg_len else seg_len)
+
+(* Iterate over the pages and blocks. In the process we will examine
+ * each page (hbin) header. Also we will build block_list which is a
+ * list of (block offset, length, used flag, data).
+ *)
+let block_list = ref []
+let () =
+ let rec loop_over_pages data data_offset =
+ if data_offset < end_pages then (
+ bitmatch data with
+ | { rest : -1 : bitstring } when bitstring_length rest = 0 -> ()
+
+ | { :page_fields } ->
+ fprintf_page stdout data;
+
+ assert (page_offset = data_offset);
+
+ if data_offset = 0 then ( (* first hbin only *)
+ if unknown1 <> 0_l then
+ eprintf "HB %s unknown1 field <> 0 (%08lx)\n"
+ (print_offset page_offset) unknown1;
+ if unknown2 <> 0_l then
+ eprintf "HB %s unknown2 field <> 0 (%08lx)\n"
+ (print_offset page_offset) unknown2;
+ if unknown5 <> 0_l then
+ eprintf "HB %s unknown5 field <> 0 (%08lx)\n"
+ (print_offset page_offset) unknown5
+ ) else ( (* subsequent hbins *)
+ if unknown1 <> 0_l || unknown2 <> 0_l || unknown5 <> 0_l then
+ eprintf "HB %s unknown fields <> 0 (%08lx %08lx %08lx)\n"
+ (print_offset page_offset)
+ unknown1 unknown2 unknown5;
+ if last_modified <> 0. then
+ eprintf "HB %s last_modified <> 0. (%g)\n"
+ (print_offset page_offset) last_modified
+ );
+
+ (* Loop over the blocks in this page. *)
+ loop_over_blocks blocks (data_offset + 32);
+
+ (* Loop over rest of the pages. *)
+ loop_over_pages rest (data_offset + page_size)
+
+ | {_} ->
+ failwithf "%s: invalid hbin at offset %s\n"
+ basename (print_offset data_offset)
+ ) else (
+ (* Reached the end of the official hbins in this file, BUT the
+ * file can be larger than this and might contain stuff. What
+ * does it contain after the hbins? We think just junk, but
+ * we're not sure.
+ *)
+ if not (is_zero_bitstring data) then (
+ eprintf "Junk in file after end of pages:\n";
+ let rec loop data data_offset =
+ bitmatch data with
+ | { rest : -1 : bitstring } when bitstring_length rest = 0 -> ()
+ | { :page_fields } ->
+ eprintf "\tjunk hbin %s 0x%08x\n"
+ (print_offset data_offset) page_size;
+ loop rest (data_offset + page_size);
+ | { _ } ->
+ eprintf "\tother junk %s %s\n"
+ (print_offset data_offset) (print_bitstring data)
+ in
+ loop data data_offset
+ )
+ )
+ and loop_over_blocks blocks block_offset =
+ bitmatch blocks with
+ | { rest : -1 : bitstring } when bitstring_length rest = 0 -> ()
+
+ | { :block_fields } ->
+ assert (block_offset mod 8 = 0);
+
+ fprintf_block stdout block_offset blocks;
+
+ let used, seg_len =
+ if seg_len < 0 then true, -seg_len else false, seg_len in
+
+ let block = block_offset, (seg_len, used, block_data) in
+ block_list := block :: !block_list;
+
+ (* Loop over the rest of the blocks in this page. *)
+ loop_over_blocks rest (block_offset + seg_len)
+
+ | {_} ->
+ failwithf "%s: invalid block near offset %s\n"
+ basename (print_offset block_offset)
+ in
+ loop_over_pages data 0
+
+(* Turn the block_list into a map so we can quickly look up a block
+ * from its offset.
+ *)
+let block_list = !block_list
+let block_map =
+ List.fold_left (
+ fun map (block_offset, block) -> IntMap.add block_offset block map
+ ) IntMap.empty block_list
+let lookup fn offset =
+ try
+ let (_, used, _) as block = IntMap.find offset block_map in
+ if not used then
+ failwithf "%s: %s: lookup: free block %s referenced from hive tree"
+ basename fn (print_offset offset);
+ block
+ with Not_found ->
+ failwithf "%s: %s: lookup: unknown block %s referenced from hive tree"
+ basename fn (print_offset offset)
+
+(* Use this to mark blocks that we've visited. If the hive contains
+ * no unreferenced blocks, then by the end this should just contain
+ * free blocks.
+ *)
+let mark_visited, is_not_visited, unvisited_blocks =
+ let v = ref block_map in
+ let mark_visited offset = v := IntMap.remove offset !v
+ and is_not_visited offset = IntMap.mem offset !v
+ and unvisited_blocks () = !v in
+ mark_visited, is_not_visited, unvisited_blocks
+
+(* Define persistent patterns to match nk-records, vk-records and
+ * sk-records, which are the record types that we especially want to
+ * analyze later. Other blocks types (eg. value lists, lf-records)
+ * have no "spare space" so everything is known about them and we don't
+ * store these.
+ *)
+let bitmatch nk_fields =
+ { "nk" : 2*8 : string;
+ (* Flags stored in the file as a little endian word, hence the
+ * unusual ordering:
+ *)
+ virtmirrored : 1;
+ predefinedhandle : 1; keynameascii : 1; symlinkkey : 1;
+ cannotbedeleted : 1; isroot : 1; ismountpoint : 1; isvolatile : 1;
+ unknownflag8000 : 1; unknownflag4000 : 1;
+ unknownflag2000 : 1; unknownflag1000 : 1;
+ unknownflag0800 : 1; unknownflag0400 : 1;
+ virtualstore : 1; virttarget : 1;
+ timestamp : 64 : littleendian, bind (nt_to_time_t timestamp);
+ unknown1 : 4*8 : littleendian;
+ parent : 4*8 : littleendian, bind (get_offset parent);
+ nr_subkeys : 4*8 : littleendian, bind (Int32.to_int nr_subkeys);
+ nr_subkeys_vol : 4*8;
+ subkeys : 4*8 : littleendian, bind (get_offset subkeys);
+ subkeys_vol : 4*8;
+ nr_values : 4*8 : littleendian, bind (Int32.to_int nr_values);
+ vallist : 4*8 : littleendian, bind (get_offset vallist);
+ sk : 4*8 : littleendian, bind (get_offset sk);
+ classname : 4*8 : littleendian, bind (get_offset classname);
+ (* sentinelchicken.com says this is a single 32 bit field
+ * containing maximum number of bytes in a subkey name, however
+ * that does not seem to be correct. We think it is two 16 bit
+ * fields, the first being the maximum number of bytes in the
+ * UTF16-LE encoded version of the subkey names, (since subkey
+ * names are usually ASCII, that would be max length of names * 2).
+ * This is a historical maximum, so it can be greater than the
+ * current maximum name field.
+ *
+ * The second field is often non-zero, but the purpose is unknown.
+ * In the hives we examined it had values 0, 1, 0x20, 0x21, 0xa0,
+ * 0xa1, 0xe1, suggesting some sort of flags.
+ *)
+ max_subkey_name_len : 2*8 : littleendian;
+ unknown2 : 2*8 : littleendian;
+ (* sentinelchicken.com says: maximum subkey CLASSNAME length,
+ * however that does not seem to be correct. In hives I looked
+ * at, it has value 0, 0xc, 0x10, 0x18, 0x1a, 0x28.
+ *)
+ unknown3 : 4*8 : littleendian;
+ (* sentinelchicken.com says: maximum number of bytes in a value
+ * name, however that does not seem to be correct. We think it is
+ * the maximum number of bytes in the UTF16-LE encoded version of
+ * the value names (since value names are usually ASCII, that would
+ * be max length of names * 2). This is a historical maximum, so
+ * it can be greater than the current maximum name field.
+ *)
+ max_vk_name_len : 4*8 : littleendian, bind (Int32.to_int max_vk_name_len);
+ (* sentinelchicken.com says: maximum value data size, and this
+ * agrees with my observations. It is the largest data size (not
+ * seg_len, but vk.data_len) for any value in this key. We think
+ * that this field is a historical max, so eg if a maximally sized
+ * value is deleted then this field is not reduced. Certainly
+ * max_vk_data_len >= the measured maximum in all the hives that we
+ * have observed.
+ *)
+ max_vk_data_len : 4*8 : littleendian, bind (Int32.to_int max_vk_data_len);
+ unknown6 : 4*8 : littleendian;
+ name_len : 2*8 : littleendian;
+ classname_len : 2*8 : littleendian;
+ name : name_len * 8 : string }
+
+let fprintf_nk chan nk =
+ let (_, _, bits) = lookup "fprintf_nk" nk in
+ bitmatch bits with
+ | { :nk_fields } ->
+ fprintf chan
+ "NK %s %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s %s %08lx %s %d %ld %s %08lx %d %s %s %s %d %04x %08lx %d %d %08lx %d %d %s\n"
+ (print_offset nk)
+ (if unknownflag8000 then "8" else ".")
+ (if unknownflag4000 then "4" else ".")
+ (if unknownflag2000 then "2" else ".")
+ (if unknownflag1000 then "1" else ".")
+ (if unknownflag0800 then "8" else ".")
+ (if unknownflag0400 then "4" else ".")
+ (if virtualstore then "s" else ".")
+ (if virttarget then "t" else ".")
+ (if virtmirrored then "m" else ".")
+ (if predefinedhandle then "P" else ".")
+ (if keynameascii then "A" else ".")
+ (if symlinkkey then "S" else ".")
+ (if cannotbedeleted then "N" else ".")
+ (if isroot then "R" else ".")
+ (if ismountpoint then "M" else ".")
+ (if isvolatile then "V" else ".")
+ (print_time timestamp)
+ unknown1 (print_offset parent) nr_subkeys nr_subkeys_vol
+ (print_offset subkeys) subkeys_vol
+ nr_values (print_offset vallist)
+ (print_offset sk) (print_offset classname)
+ max_subkey_name_len unknown2 unknown3
+ max_vk_name_len max_vk_data_len unknown6
+ name_len classname_len name
+
+type data_t = Inline of bitstring | Offset of int
+let bitmatch vk_fields =
+ { "vk" : 2*8 : string;
+ name_len : 2*8 : littleendian;
+ (* No one documents the important fact that data_len can have the
+ * top bit set (randomly or is it meaningful?). The length can
+ * also be 0 (or 0x80000000) if the data type is NONE.
+ *)
+ data_len : 4*8
+ : littleendian, bind (
+ let data_len = Int32.logand data_len 0x7fff_ffff_l in
+ Int32.to_int data_len
+ );
+ (* Inline data if len <= 4, offset otherwise.
+ *
+ * The data itself depends on the type field.
+ *
+ * For REG_SZ type, the data always seems to be NUL-terminated, which
+ * means because these strings are often UTF-16LE, that the string will
+ * end with \0\0 bytes. The termination bytes are included in data_len.
+ *
+ * For REG_MULTI_SZ, see
+ * http://blogs.msdn.com/oldnewthing/archive/2009/10/08/9904646.aspx
+ *)
+ data : 4*8
+ : bitstring, bind (
+ if data_len <= 4 then
+ Inline (takebits (data_len*8) data)
+ else (
+ let offset =
+ bitmatch data with { offset : 4*8 : littleendian } -> offset in
+ let offset = get_offset offset in
+ Offset offset
+ )
+ );
+ t : 4*8 : littleendian, bind (Int32.to_int t);
+ (* Flags, stored as a little-endian word: *)
+ unknown1 : 7;
+ nameisascii : 1; (* Clear for default [zero-length] name, always set
+ * otherwise in registries that we found. Perhaps this
+ * is really "nameisdefault" flag?
+ *)
+ unknown2 : 8;
+ (* Unknown field, usually contains something. *)
+ unknown3 : 2*8 : littleendian;
+ name : name_len * 8 : string }
+
+let fprintf_vk chan vk =
+ let (_, _, bits) = lookup "fprintf_vk" vk in
+ bitmatch bits with
+ | { :vk_fields } ->
+ let real_data =
+ match data with
+ | Inline data -> data
+ | Offset offset ->
+ let (_, _, bits) = lookup "fprintf_vk (data)" offset in
+ bits in
+ fprintf chan "VK %s %s %d %s%s %s %08x %s %08x %08x\n"
+ (print_offset vk)
+ name data_len
+ (match data with
+ | Inline _ -> ""
+ | Offset offset -> "["^print_offset offset^"]")
+ (print_bitstring real_data)
+ (print_vk_type t)
+ unknown1 (if nameisascii then "A" else "L")
+ unknown2 unknown3
+
+let bitmatch sk_fields =
+ { "sk" : 2*8 : string;
+ unknown1 : 2*8 : littleendian;
+ sk_next : 4*8 : littleendian, bind (get_offset sk_next);
+ sk_prev : 4*8 : littleendian, bind (get_offset sk_prev);
+ refcount : 4*8 : littleendian, bind (Int32.to_int refcount);
+ sec_len : 4*8 : littleendian, bind (Int32.to_int sec_len);
+ sec_desc : sec_len * 8 : bitstring }
+
+let fprintf_sk chan sk =
+ let (_, _, bits) = lookup "fprintf_sk" sk in
+ bitmatch bits with
+ | { :sk_fields } ->
+ fprintf chan "SK %s %04x %s %s %d %d\n"
+ (print_offset sk) unknown1
+ (print_offset sk_next) (print_offset sk_prev)
+ refcount sec_len
+ (* print_bitstring sec_desc -- suppress this *)
+
+(* Store lists of records we encounter (lists of offsets). *)
+let nk_records = ref []
+and vk_records = ref []
+and sk_records = ref []
+
+(* Functions to visit each block, starting at the root. Each block
+ * that we visit is printed.
+ *)
+let rec visit_nk ?(nk_is_root = false) nk =
+ let (_, _, bits) = lookup "visit_nk" nk in
+ mark_visited nk;
+ (bitmatch bits with
+ | { :nk_fields } ->
+ fprintf_nk stdout nk;
+
+ nk_records := nk :: !nk_records;
+
+ (* Check the isroot flag is only set on the root node. *)
+ assert (isroot = nk_is_root);
+
+ if unknownflag8000 then
+ eprintf "NK %s unknownflag8000 is set\n" (print_offset nk);
+ if unknownflag4000 then
+ eprintf "NK %s unknownflag4000 is set\n" (print_offset nk);
+ if unknownflag2000 then
+ eprintf "NK %s unknownflag2000 is set\n" (print_offset nk);
+ if unknownflag1000 then
+ eprintf "NK %s unknownflag1000 is set\n" (print_offset nk);
+ if unknownflag0800 then
+ eprintf "NK %s unknownflag0800 is set\n" (print_offset nk);
+ if unknownflag0400 then
+ eprintf "NK %s unknownflag0400 is set\n" (print_offset nk);
+ if unknown1 <> 0_l then
+ eprintf "NK %s unknown1 <> 0 (%08lx)\n" (print_offset nk) unknown1;
+ if unknown2 <> 0 then
+ eprintf "NK %s unknown2 <> 0 (%04x)\n" (print_offset nk) unknown2;
+ if unknown3 <> 0_l then
+ eprintf "NK %s unknown3 <> 0 (%08lx)\n" (print_offset nk) unknown3;
+ if unknown6 <> 0_l then
+ eprintf "NK %s unknown6 <> 0 (%08lx)\n" (print_offset nk) unknown6;
+
+ (* -- common, assume it's not an error
+ if classname = -1 then
+ eprintf "NK %s has no classname\n" (print_offset nk);
+ if classname_len = 0 then
+ eprintf "NK %s has zero-length classname\n" (print_offset nk);
+ *)
+ if sk = -1 then
+ eprintf "NK %s has no sk-record\n" (print_offset nk);
+ if name_len = 0 then
+ eprintf "NK %s has zero-length name\n" (print_offset nk);
+
+ (* Visit the values first at this node. *)
+ let max_data_len, max_name_len =
+ if vallist <> -1 then
+ visit_vallist nr_values vallist
+ else
+ 0, 0 in
+
+ if max_vk_data_len < max_data_len then
+ eprintf "NK %s nk.max_vk_data_len (%d) < actual max data_len (%d)\n"
+ (print_offset nk) max_vk_data_len max_data_len;
+
+ if max_vk_name_len < max_name_len * 2 then
+ eprintf "NK %s nk.max_vk_name_len (%d) < actual max name_len * 2 (%d)\n"
+ (print_offset nk) max_vk_name_len (max_name_len * 2);
+
+ (* Visit the subkeys of this node. *)
+ if subkeys <> -1 then (
+ let counted, max_name_len = visit_subkeys subkeys in
+
+ if counted <> nr_subkeys then
+ failwithf "%s: incorrect count of subkeys (%d, counted %d) in subkey list at %s\n"
+ basename nr_subkeys counted (print_offset subkeys);
+
+ if max_subkey_name_len < max_name_len * 2 then
+ eprintf "NK %s nk.max_subkey_name_len (%d) < actual max name_len * 2 (%d)\n"
+ (print_offset nk) max_subkey_name_len (max_name_len * 2);
+ );
+
+ (* Visit the sk-record and classname. *)
+ if sk <> -1 then
+ visit_sk sk;
+ if classname <> -1 then
+ visit_classname classname classname_len;
+
+ | {_} ->
+ failwithf "%s: invalid nk block at offset %s\n"
+ basename (print_offset nk)
+ )
+
+and visit_vallist nr_values vallist =
+ let (seg_len, _, bits) = lookup "visit_vallist" vallist in
+ mark_visited vallist;
+ printf "VL %s %d %d\n" (print_offset vallist) nr_values seg_len;
+ visit_values_in_vallist nr_values vallist bits
+
+and visit_values_in_vallist nr_values vallist bits =
+ if nr_values > 0 then (
+ bitmatch bits with
+ | { rest : -1 : bitstring } when bitstring_length rest = 0 ->
+ assert (nr_values = 0);
+ 0, 0
+
+ | { value : 4*8 : littleendian, bind (get_offset value);
+ rest : -1 : bitstring } ->
+ let data_len, name_len = visit_vk value in
+ let max_data_len, max_name_len =
+ visit_values_in_vallist (nr_values-1) vallist rest in
+ max max_data_len data_len, max max_name_len name_len
+
+ | {_} ->
+ failwithf "%s: invalid offset in value list at %s\n"
+ basename (print_offset vallist)
+ ) else 0, 0
+
+and visit_vk vk =
+ let (_, _, bits) = lookup "visit_vk" vk in
+ mark_visited vk;
+
+ (bitmatch bits with
+ | { :vk_fields } ->
+ fprintf_vk stdout vk;
+
+ if unknown1 <> 0 then
+ eprintf "VK %s unknown1 flags set (%02x)\n"
+ (print_offset vk) unknown1;
+ if unknown2 <> 0 then
+ eprintf "VK %s unknown2 flags set (%02x)\n"
+ (print_offset vk) unknown2;
+ if unknown3 <> 0 then
+ eprintf "VK %s unknown3 flags set (%04x)\n"
+ (print_offset vk) unknown3;
+
+ (* Note this is common for default [ie. zero-length] key names. *)
+ if not nameisascii && name_len > 0 then
+ eprintf "VK %s has non-ASCII name flag set (name is %s)\n"
+ (print_offset vk) (print_binary_string name);
+
+ vk_records := vk :: !vk_records;
+ (match data with
+ | Inline data -> ()
+ | Offset offset ->
+ let _ = lookup "visit_vk (data)" offset in
+ mark_visited offset
+ );
+
+ data_len, name_len
+
+ | {_} ->
+ failwithf "%s: invalid vk block at offset %s\n"
+ basename (print_offset vk)
+ )
+
+(* Visits subkeys, recursing through intermediate lf/lh/ri structures,
+ * and returns the number of subkeys actually seen.
+ *)
+and visit_subkeys subkeys =
+ let (_, _, bits) = lookup "visit_subkeys" subkeys in
+ mark_visited subkeys;
+ (bitmatch bits with
+ | { ("lf"|"lh") : 2*8 : string;
+ len : 2*8 : littleendian; (* number of subkeys of this node *)
+ rest : len*8*8 : bitstring } ->
+ printf "LF %s %d\n" (print_offset subkeys) len;
+ visit_subkeys_in_lf_list subkeys len rest
+
+ | { "ri" : 2*8 : string;
+ len : 2*8 : littleendian;
+ rest : len*4*8 : bitstring } ->
+ printf "RI %s %d\n" (print_offset subkeys) len;
+ visit_subkeys_in_ri_list subkeys len rest
+
+ (* In theory you can have an li-record here, but we've never
+ * seen one.
+ *)
+
+ | { "nk" : 2*8 : string } ->
+ visit_nk subkeys;
+ let name_len = name_len_of_nk subkeys in
+ 1, name_len
+
+ | {_} ->
+ failwithf "%s: invalid subkey node found at %s\n"
+ basename (print_offset subkeys)
+ )
+
+and visit_subkeys_in_lf_list subkeys_top len bits =
+ if len > 0 then (
+ bitmatch bits with
+ | { rest : -1 : bitstring } when bitstring_length rest = 0 ->
+ assert (len = 0);
+ 0, 0
+
+ | { offset : 4*8 : littleendian, bind (get_offset offset);
+ _ (* hash *) : 4*8 : bitstring;
+ rest : -1 : bitstring } ->
+ let c1, name_len1 = visit_subkeys offset in
+ let c2, name_len2 = visit_subkeys_in_lf_list subkeys_top (len-1) rest in
+ c1 + c2, max name_len1 name_len2
+
+ | {_} ->
+ failwithf "%s: invalid subkey in lf/lh list at %s\n"
+ basename (print_offset subkeys_top)
+ ) else 0, 0
+
+and visit_subkeys_in_ri_list subkeys_top len bits =
+ if len > 0 then (
+ bitmatch bits with
+ | { rest : -1 : bitstring } when bitstring_length rest = 0 ->
+ assert (len = 0);
+ 0, 0
+
+ | { offset : 4*8 : littleendian, bind (get_offset offset);
+ rest : -1 : bitstring } ->
+ let c1, name_len1 = visit_subkeys offset in
+ let c2, name_len2 = visit_subkeys_in_ri_list subkeys_top (len-1) rest in
+ c1 + c2, max name_len1 name_len2
+
+ | {_} ->
+ failwithf "%s: invalid subkey in ri list at %s\n"
+ basename (print_offset subkeys_top)
+ ) else 0, 0
+
+and name_len_of_nk nk =
+ let (_, _, bits) = lookup "name_len_of_nk" nk in
+ bitmatch bits with
+ | { :nk_fields } -> name_len
+
+and visit_sk sk =
+ let (_, _, bits) = lookup "visit_sk" sk in
+ if is_not_visited sk then (
+ mark_visited sk;
+ (bitmatch bits with
+ | { :sk_fields } ->
+ fprintf_sk stdout sk;
+
+ if unknown1 <> 0 then
+ eprintf "SK %s unknown1 <> 0 (%04x)\n" (print_offset sk) unknown1;
+
+ sk_records := sk :: !sk_records
+
+ | {_} ->
+ failwithf "%s: invalid sk-record at %s\n"
+ basename (print_offset sk)
+ )
+ )
+
+and visit_classname classname classname_len =
+ let (seg_len, _, bits) = lookup "visit_classname" classname in
+ mark_visited classname;
+ assert (seg_len >= classname_len);
+ printf "CL %s %s\n" (print_offset classname) (print_bitstring bits)
+
+let () =
+ visit_nk ~nk_is_root:true root_key
+
+(* Now after visiting all the blocks, are there any used blocks which
+ * are unvisited? If there are any then that would indicate either (a)
+ * that the hive contains unreferenced blocks, or (b) that there are
+ * referenced blocks that we did not visit because we don't have a full
+ * understanding of the hive format.
+ *
+ * Windows 7 registries often contain a few of these -- not clear
+ * how serious they are, but don't fail here.
+ *)
+let () =
+ let unvisited = unvisited_blocks () in
+ IntMap.iter (
+ fun offset block ->
+ match block with
+ | (_, false, _) -> () (* ignore unused blocks *)
+ | (seg_len, true, _) ->
+ eprintf "used block %s (length %d) is not referenced\n"
+ (print_offset offset) seg_len
+ ) unvisited
diff --git a/hivex/tools/visualizer_NT_time.ml b/hivex/tools/visualizer_NT_time.ml
new file mode 100644
index 0000000..a752112
--- /dev/null
+++ b/hivex/tools/visualizer_NT_time.ml
@@ -0,0 +1,30 @@
+(* Windows Registry reverse-engineering tool.
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * For existing information on the registry format, please refer
+ * to the following documents. Note they are both incomplete
+ * and inaccurate in some respects.
+ *)
+
+(* Convert an NT file timestamp to time_t. See:
+ * http://blogs.msdn.com/oldnewthing/archive/2003/09/05/54806.aspx
+ * http://support.microsoft.com/kb/167296
+ *)
+let nt_to_time_t t =
+ let t = Int64.sub t 116444736000000000L in
+ let t = Int64.div t 10000000L in
+ Int64.to_float t
diff --git a/hivex/tools/visualizer_utils.ml b/hivex/tools/visualizer_utils.ml
new file mode 100644
index 0000000..df9c789
--- /dev/null
+++ b/hivex/tools/visualizer_utils.ml
@@ -0,0 +1,160 @@
+(* Windows Registry reverse-engineering tool.
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * For existing information on the registry format, please refer
+ * to the following documents. Note they are both incomplete
+ * and inaccurate in some respects.
+ *)
+
+open ExtString
+open Printf
+
+let failwithf fs = ksprintf failwith fs
+
+(* Useful function to convert unknown bitstring fragments into
+ * printable strings.
+ *)
+let rec print_bitstring bits =
+ let str = Bitstring.string_of_bitstring bits in
+ print_binary_string str
+and print_binary_string str =
+ let rec printable = function
+ | '\x00' -> "\\0" | '\x01' -> "\\1" | '\x02' -> "\\2" | '\x03' -> "\\3"
+ | '\x04' -> "\\4" | '\x05' -> "\\5" | '\x06' -> "\\6" | '\x07' -> "\\7"
+ | ('\x08'..'\x31' as c)
+ | ('\x7f'..'\xff' as c) -> sprintf "\\x%02x" (Char.code c)
+ | ('\x32'..'\x7e' as c) -> String.make 1 c
+ and repeat str = function
+ | n when n <= 0 -> ""
+ | n -> str ^ repeat str (n-1)
+ in
+ let chars = String.explode str in
+ let rec loop acc = function
+ | [] -> List.rev acc
+ | x :: xs ->
+ let rec loop2 i = function
+ | y :: ys when x = y -> loop2 (i+1) ys
+ | ys -> i, ys
+ in
+ let count, ys = loop2 1 xs in
+ let acc = (count, x) :: acc in
+ loop acc ys
+ in
+ let frags = loop [] chars in
+ let frags =
+ List.map (function
+ | (nr, x) when nr <= 4 -> repeat (printable x) nr
+ | (nr, x) -> sprintf "%s<%d times>" (printable x) nr
+ ) frags in
+ "\"" ^ String.concat "" frags ^ "\""
+
+(* Convert an offset from the file to an offset. The only special
+ * thing is that 0xffffffff in the file is used as a kind of "NULL
+ * pointer". We map these null values to -1.
+ *)
+let get_offset = function
+ | 0xffffffff_l -> -1
+ | i -> Int32.to_int i
+
+(* Print an offset. *)
+let print_offset = function
+ | -1 -> "NULL"
+ | i -> sprintf "@%08x" i
+
+(* Print time. *)
+let print_time t =
+ let tm = Unix.gmtime t in
+ sprintf "%04d-%02d-%02d %02d:%02d:%02d"
+ (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday
+ tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec
+
+(* Print UTF16LE. *)
+let print_utf16 str =
+ let n = String.length str in
+ if n land 1 <> 0 then
+ print_binary_string str
+ else (
+ let rec loop i =
+ if i < n-1 then (
+ let c1 = Char.code (str.[i]) in
+ let c2 = Char.code (str.[i+1]) in
+ if c1 <> 0 || c2 <> 0 then (
+ (* Well, this doesn't print non-7bit-ASCII ... *)
+ let c =
+ if c2 = 0 then String.make 1 (Char.chr c1)
+ else sprintf "\\u%04d" (c2 * 256 + c1) in
+ c :: loop (i+2)
+ ) else []
+ ) else []
+ in
+ let frags = loop 0 in
+ "L\"" ^ String.concat "" frags ^ "\""
+ )
+
+(* A map of int -> anything. *)
+module IntMap = Map.Make (struct type t = int let compare = compare end)
+
+(* Print registry vk-record type field. *)
+let print_vk_type = function
+ | 0 -> "NONE"
+ | 1 -> "SZ"
+ | 2 -> "EXPAND_SZ"
+ | 3 -> "BINARY"
+ | 4 -> "DWORD"
+ | 5 -> "DWORD_BIG_ENDIAN"
+ | 6 -> "LINK"
+ | 7 -> "MULTI_SZ"
+ | 8 -> "RESOURCE_LiST"
+ | 9 -> "FULL_RESOURCE_DESCRIPTOR"
+ | 10 -> "RESOURCE_REQUIREMENTS_LIST"
+ | 11 -> "QWORD"
+ | i -> sprintf "UNKNOWN_VK_TYPE_%d" i
+
+(* XXX We should write a more efficient version of this and
+ * push it into the bitstring library.
+ *)
+let is_zero_bitstring bits =
+ let len = Bitstring.bitstring_length bits in
+ let zeroes = Bitstring.zeroes_bitstring len in
+ 0 = Bitstring.compare bits zeroes
+
+let is_zero_guid = is_zero_bitstring
+
+(* http://msdn.microsoft.com/en-us/library/aa373931(VS.85).aspx
+ * Endianness of GUIDs is not clear from the MSDN documentation,
+ * so this is just a guess.
+ *)
+let print_guid bits =
+ bitmatch bits with
+ | { data1 : 4*8 : littleendian;
+ data2 : 2*8 : littleendian;
+ data3 : 2*8 : littleendian;
+ data4_1 : 2*8 : littleendian;
+ data4_2 : 6*8 : littleendian } ->
+ sprintf "%08lX-%04X-%04X-%04X-%012LX" data1 data2 data3 data4_1 data4_2
+ | { _ } ->
+ assert false
+
+(* Fold over little-endian 32-bit integers in a bitstring. *)
+let rec bitstring_fold_left_int32_le f a bits =
+ bitmatch bits with
+ | { i : 4*8 : littleendian;
+ rest : -1 : bitstring } ->
+ bitstring_fold_left_int32_le f (f a i) rest
+ | { rest : -1 : bitstring } when Bitstring.bitstring_length rest = 0 -> a
+ | { _ } ->
+ invalid_arg "bitstring_fold_left_int32_le: length not a multiple of 32 bits"
--
1.6.5.2
More information about the Libguestfs
mailing list