[Libguestfs] [PATCH 1/1] hivexml: Base64-encode non-printable data

Alex Nelson ajnelson at cs.ucsc.edu
Sat Sep 17 16:53:31 UTC 2011


Basically, if any of the characters passed to a hivexml.c function aren't isprint()-able, they're base64-encoded.

The characters passed rely on the iconv library.  Whatever comes out of the processing of iconv() in "UTF-8 or UTF-16" mode is what's passed to hivexml.  I checked all the UTF-processing points and have extensive notes in the postscript, but essentially:
* Key and value names aren't run through iconv
* Value string data and the hive header name are
* The patch below runs a stricter sanitization than iconv, also running everything through isprint.

--Alex

Notes:
* The hive's header-embedded name is just checked as UTF-16, though not passed onto hivexml (yet) (lib/hivex.c:hivex_open).  The name is converted with lib/hivex.c:windows_utf16_to_utf8, which relies on the iconv library, so ASCII going in should be ASCII coming out.
* For node and value names, there is no UTF-n check (lib/hivex.c:hivex_node_name, hivex_value_key), because nobody developing hivex has seen something besides plain ASCII in the names yet.  I can probably identify somewhere where this isn't the case, but it'd take some machine-hours.  So far all I've seen in one corpus is a Registered sign, 0xae.
* String values in general are treated like they could be UTF-16.  All are passed to windows_utf16_to_utf8 (lib/hivex.c:hivex_value_string, lib/hivex.c:hivex_value_multiple_strings).
* If the hivex_value_string or hivex_value_multiple_strings function fails to process the data, the value_string_invalid_utf16 function of the hivex library invoker is called (lib/hivex.c:hivex__visit_node), which in hivexml's case just base64-encodes the output (xml/hivexml.c:value_string_invalid_utf16).


On Sep 17, 2011, at 03:17 , Simson Garfinkel wrote:

> What does this do when there is UTF8 in the hive?
> 
> Sent from my iPad
> 
> On Sep 17, 2011, at 12:30 AM, Alex Nelson <ajnelson at cs.ucsc.edu> wrote:
> 
>> Some of the data in names and string values were being unsafely printed,
>> causing some types of XML processors to fail (e.g. Python's Expat).
>> This patch checks for printability of each character and outputs base64
>> with an encoding attribute for unsafe data.
>> ---
>> xml/hivexml.c |   75 ++++++++++++++++++++++++++++++++++++++++++++++++--------
>> 1 files changed, 64 insertions(+), 11 deletions(-)
>> 
>> diff --git a/xml/hivexml.c b/xml/hivexml.c
>> index cf11676..110c8fb 100644
>> --- a/xml/hivexml.c
>> +++ b/xml/hivexml.c
>> @@ -27,6 +27,7 @@
>> #include <errno.h>
>> #include <time.h>
>> #include <locale.h>
>> +#include <ctype.h>
>> 
>> #ifdef HAVE_LIBINTL_H
>> #include <libintl.h>
>> @@ -201,6 +202,52 @@ filetime_to_8601 (int64_t windows_ticks)
>>  return ret;
>> }
>> 
>> +/* Caller need not free return value afterwards. */
>> +static char *
>> +encoding_recommendation (const char *data)
>> +{
>> +  /* Note that this function assumes data is null-terminated. */
>> +  //See if the data are printable
>> +  int is_printable = 0;
>> +  size_t i;
>> +  size_t data_len = strlen(data);
>> +
>> +  for (i=0; i < data_len; i++) {
>> +    is_printable = isprint (data[i]);
>> +    if (!is_printable) {
>> +      fprintf (stderr, "encoding_recommendation: Non-printable character found at data index %zu (c=%i)\n", i, data[i]);
>> +      break;
>> +    }
>> +  }
>> +
>> +  return is_printable ? "none" : "base64";
>> +}
>> +
>> +static int
>> +safe_print_string_attribute (hive_h *h, void *writer_v, const char *attr_name, const char *attr_encoding, const char *attr_data)
>> +{
>> +  int ret = 0;
>> +  char *encoding_to_use = NULL;
>> +  if (attr_name && attr_data && attr_encoding) {
>> +    xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
>> +    encoding_to_use = encoding_recommendation (attr_data);
>> +
>> +    if (strcmp (encoding_to_use, "none") == 0)
>> +      XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST attr_name, BAD_CAST attr_data));
>> +    else if (strcmp (encoding_to_use, "base64") == 0) {
>> +      XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST attr_encoding, BAD_CAST "base64"));
>> +      XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST attr_name));
>> +      XML_CHECK (xmlTextWriterWriteBase64, (writer, BAD_CAST attr_data, 0, strlen(attr_data)));
>> +      XML_CHECK (xmlTextWriterEndAttribute, (writer));
>> +    } else {
>> +      fprintf (stderr, "safe_print_string_attribute: Unexpected encoding to use (won't print here).\n");
>> +      ret = -1;
>> +    }
>> +  } else
>> +    ret = -1;
>> +  return ret;
>> +}
>> +
>> static int
>> node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name)
>> {
>> @@ -210,7 +257,10 @@ node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name)
>> 
>>  xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
>>  XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "node"));
>> -  XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "name", BAD_CAST name));
>> +
>> +  ret = safe_print_string_attribute (h, writer_v, "name", "name_encoding", name);
>> +  if (ret)
>> +    fprintf (stderr, "Warning: node_start: safe_print_string_attribute failed, but we're continuing.\n");
>> 
>>  if (node == hivex_root (h)) {
>>    XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "root", BAD_CAST "1"));
>> @@ -227,7 +277,7 @@ node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name)
>>    }
>>  }
>> 
>> -  return 0;
>> +  return ret;
>> }
>> 
>> static int
>> @@ -242,13 +292,16 @@ static void
>> start_value (xmlTextWriterPtr writer,
>>             const char *key, const char *type, const char *encoding)
>> {
>> +  int ret = 0;
>>  XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "value"));
>>  XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "type", BAD_CAST type));
>>  if (encoding)
>>    XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "encoding", BAD_CAST encoding));
>> -  if (*key)
>> -    XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "key", BAD_CAST key));
>> -  else                          /* default key */
>> +  if (*key) {
>> +    ret = safe_print_string_attribute (NULL, writer, "key", "key_encoding", key);
>> +    if (ret)
>> +      fprintf (stderr, "Warning: start_value: safe_print_string_attribute failed, but we're continuing.\n");
>> +  } else                          /* default key */
>>    XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "default", BAD_CAST "1"));
>> }
>> 
>> @@ -264,6 +317,7 @@ value_string (hive_h *h, void *writer_v, hive_node_h node, hive_value_h value,
>> {
>>  xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
>>  const char *type;
>> +  int ret = 0;
>> 
>>  switch (t) {
>>  case hive_t_string: type = "string"; break;
>> @@ -286,11 +340,9 @@ value_string (hive_h *h, void *writer_v, hive_node_h node, hive_value_h value,
>>  }
>> 
>>  start_value (writer, key, type, NULL);
>> -  XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST "value"));
>> -  XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST str));
>> -  XML_CHECK (xmlTextWriterEndAttribute, (writer));
>> +  ret = safe_print_string_attribute (h, writer_v, "value", "value_encoding", str);
>>  end_value (writer);
>> -  return 0;
>> +  return ret;
>> }
>> 
>> static int
>> @@ -299,17 +351,18 @@ value_multiple_strings (hive_h *h, void *writer_v, hive_node_h node,
>>                        const char *key, char **argv)
>> {
>>  xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
>> +  int ret = 0;
>>  start_value (writer, key, "string-list", NULL);
>> 
>>  size_t i;
>>  for (i = 0; argv[i] != NULL; ++i) {
>>    XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "string"));
>> -    XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST argv[i]));
>> +    ret = safe_print_string_attribute (h, writer_v, "value", "value_encoding", argv[i]);
>>    XML_CHECK (xmlTextWriterEndElement, (writer));
>>  }
>> 
>>  end_value (writer);
>> -  return 0;
>> +  return ret;
>> }
>> 
>> static int
>> -- 
>> 1.7.4.4
>> 
>> 





More information about the Libguestfs mailing list