[Libguestfs] [PATCH] hivexml

Todd Mummert toddmummert at gmail.com
Wed Feb 1 21:08:15 UTC 2012


I changed the name of the function to is_valid_xml_string() to avoid
camel-case.   I also briefly commented the function itself, since it
does expect its input to be valid UTF-8.   Also, the patch was missing
an end-attribute call, which apparently didn't matter in this case
since the element was being ended subsequently anyway -- but it
missing was an oversight, not intentional.

The updated patch to handle illegal XML characters that appear in
registry values is below:

[todd at tm-nc hivex]# cat hivexml.patch
diff -urNp hivex-1.3.3-orig/xml/hivexml.c hivex-1.3.3-new/xml/hivexml.c
--- hivex-1.3.3-orig/xml/hivexml.c      2011-09-22 09:17:09.000000000 -0400
+++ hivex-1.3.3-new/xml/hivexml.c       2012-02-01 15:51:06.481728986 -0500
@@ -33,6 +33,7 @@
 #endif

 #include <libxml/xmlwriter.h>
+#include <libxml/chvalid.h>

 #include "hivex.h"

@@ -208,6 +209,26 @@ filetime_to_8601 (int64_t windows_ticks)
   return ret;
 }

+/*
+ * Check that a UTF-8 string contains only valid XML characters.
+ * There is an assumption that the input string is valid UTF-8.
+ */
+static int
+is_valid_xml_string(const char *string)
+{
+  int c;
+  int pos = 0;
+  int len = strlen(string);
+  int charlen = len;
+  while ((c = xmlGetUTF8Char(string+pos, &charlen)) >= 0) {
+    if (xmlIsCharQ(c) == 0)
+      return 0;
+    pos += charlen;
+    charlen = len - pos;
+  }
+  return 1;
+}
+
 static int
 node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name)
 {
@@ -265,6 +286,20 @@ end_value (xmlTextWriterPtr writer)
   XML_CHECK (xmlTextWriterEndElement, (writer));
 }

+static void
+start_string(xmlTextWriterPtr writer, const char *encoding)
+{
+  XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "string"));
+  if (encoding)
+    XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST
"encoding", BAD_CAST encoding));
+}
+
+static void
+end_string(xmlTextWriterPtr writer)
+{
+  XML_CHECK (xmlTextWriterEndElement, (writer));
+}
+
 static int
 value_string (hive_h *h, void *writer_v, hive_node_h node, hive_value_h value,
               hive_type t, size_t len, const char *key, const char *str)
@@ -292,9 +327,14 @@ value_string (hive_h *h, void *writer_v,
     type = "unknown";
   }

-  start_value (writer, key, type, NULL);
+  int validXML = is_valid_xml_string(str);
+  start_value (writer, key, type, validXML ? NULL : "base64");
   XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST "value"));
-  XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST str));
+  if (validXML)
+    XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST str));
+  else
+    XML_CHECK (xmlTextWriterWriteBase64, (writer, str, 0, strlen(str)));
+
   XML_CHECK (xmlTextWriterEndAttribute, (writer));
   end_value (writer);
   return 0;
@@ -310,9 +350,15 @@ value_multiple_strings (hive_h *h, void

   size_t i;
   for (i = 0; argv[i] != NULL; ++i) {
-    XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "string"));
-    XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST argv[i]));
-    XML_CHECK (xmlTextWriterEndElement, (writer));
+    int validXML = is_valid_xml_string(argv[i]);
+    start_string(writer, validXML ? NULL : "base64");
+    XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST "value"));
+    if (validXML)
+      XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST argv[i]));
+    else
+      XML_CHECK (xmlTextWriterWriteBase64, (writer, argv[i], 0,
strlen(argv[i])));
+    XML_CHECK (xmlTextWriterEndAttribute, (writer));
+    end_string(writer);
   }

   end_value (writer);
[todd at tm-nc hivex]#




More information about the Libguestfs mailing list