[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: [Libguestfs] [PATCH 1/1] hivexml: Base64-encode non-printable data



On Fri, Sep 16, 2011 at 09:30:01PM -0700, Alex Nelson wrote:
> Some of the data in names and string values were being unsafely printed,
> causing some types of XML processors to fail (e.g. Python's Expat).
> This patch checks for printability of each character and outputs base64
> with an encoding attribute for unsafe data.
> ---
>  xml/hivexml.c |   75 ++++++++++++++++++++++++++++++++++++++++++++++++--------
>  1 files changed, 64 insertions(+), 11 deletions(-)

> diff --git a/xml/hivexml.c b/xml/hivexml.c
> index cf11676..110c8fb 100644
> --- a/xml/hivexml.c
> +++ b/xml/hivexml.c
> @@ -27,6 +27,7 @@
>  #include <errno.h>
>  #include <time.h>
>  #include <locale.h>
> +#include <ctype.h>
>  
>  #ifdef HAVE_LIBINTL_H
>  #include <libintl.h>
> @@ -201,6 +202,52 @@ filetime_to_8601 (int64_t windows_ticks)
>    return ret;
>  }
>  
> +/* Caller need not free return value afterwards. */
> +static char *
> +encoding_recommendation (const char *data)
> +{
> +  /* Note that this function assumes data is null-terminated. */
> +  //See if the data are printable
> +  int is_printable = 0;
> +  size_t i;
> +  size_t data_len = strlen(data);
> +
> +  for (i=0; i < data_len; i++) {
> +    is_printable = isprint (data[i]);

isprint is a bit of a minefield because the output changes depending
on the user's locale.  You might want to use c_isprint (from gnulib)
instead, since it always checks printability against the C locale.

BTW I'm told that there are more "unprintable" characters in XML
than just !isprint.  An XML parser may reject any codepoint not in:

Char ::= I#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
  /* any Unicode character,  excluding the surrogate blocks, FFFE, and FFFF. */

> +    if (!is_printable) {
> +      fprintf (stderr, "encoding_recommendation: Non-printable character found at data index %zu (c=%i)\n", i, data[i]);
> +      break;
> +    }
> +  }
> +
> +  return is_printable ? "none" : "base64";

enum perhaps?  You don't actually use the "base64" string later, and
enum makes more sense in a C program.

> +}
> +
> +static int
> +safe_print_string_attribute (hive_h *h, void *writer_v, const char *attr_name, const char *attr_encoding, const char *attr_data)
> +{
> +  int ret = 0;
> +  char *encoding_to_use = NULL;
> +  if (attr_name && attr_data && attr_encoding) {
> +    xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
> +    encoding_to_use = encoding_recommendation (attr_data);
> +
> +    if (strcmp (encoding_to_use, "none") == 0)
> +      XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST attr_name, BAD_CAST attr_data));
> +    else if (strcmp (encoding_to_use, "base64") == 0) {
> +      XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST attr_encoding, BAD_CAST "base64"));

Instead of key_encoding, name_encoding etc. what's wrong with just
<name encoding="base64">?  It seems strange to make the encoding
attribute name depend on the element name.

> +      XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST attr_name));
> +      XML_CHECK (xmlTextWriterWriteBase64, (writer, BAD_CAST attr_data, 0, strlen(attr_data)));
> +      XML_CHECK (xmlTextWriterEndAttribute, (writer));
> +    } else {
> +      fprintf (stderr, "safe_print_string_attribute: Unexpected encoding to use (won't print here).\n");
> +      ret = -1;

Is there some place where we expect this to fail?  How about making it
always work, or making it exit on failure?

> +    }
> +  } else
> +    ret = -1;
> +  return ret;
> +}
> +
>  static int
>  node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name)
>  {
> @@ -210,7 +257,10 @@ node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name)
>  
>    xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
>    XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "node"));
> -  XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "name", BAD_CAST name));
> +
> +  ret = safe_print_string_attribute (h, writer_v, "name", "name_encoding", name);
> +  if (ret)
> +    fprintf (stderr, "Warning: node_start: safe_print_string_attribute failed, but we're continuing.\n");
>  
>    if (node == hivex_root (h)) {
>      XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "root", BAD_CAST "1"));
> @@ -227,7 +277,7 @@ node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name)
>      }
>    }
>  
> -  return 0;
> +  return ret;
>  }
>  
>  static int
> @@ -242,13 +292,16 @@ static void
>  start_value (xmlTextWriterPtr writer,
>               const char *key, const char *type, const char *encoding)
>  {
> +  int ret = 0;
>    XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "value"));
>    XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "type", BAD_CAST type));
>    if (encoding)
>      XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "encoding", BAD_CAST encoding));
> -  if (*key)
> -    XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "key", BAD_CAST key));
> -  else                          /* default key */
> +  if (*key) {
> +    ret = safe_print_string_attribute (NULL, writer, "key", "key_encoding", key);
> +    if (ret)
> +      fprintf (stderr, "Warning: start_value: safe_print_string_attribute failed, but we're continuing.\n");
> +  } else                          /* default key */
>      XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "default", BAD_CAST "1"));
>  }
>  
> @@ -264,6 +317,7 @@ value_string (hive_h *h, void *writer_v, hive_node_h node, hive_value_h value,
>  {
>    xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
>    const char *type;
> +  int ret = 0;
>  
>    switch (t) {
>    case hive_t_string: type = "string"; break;
> @@ -286,11 +340,9 @@ value_string (hive_h *h, void *writer_v, hive_node_h node, hive_value_h value,
>    }
>  
>    start_value (writer, key, type, NULL);
> -  XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST "value"));
> -  XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST str));
> -  XML_CHECK (xmlTextWriterEndAttribute, (writer));
> +  ret = safe_print_string_attribute (h, writer_v, "value", "value_encoding", str);
>    end_value (writer);
> -  return 0;
> +  return ret;
>  }
>  
>  static int
> @@ -299,17 +351,18 @@ value_multiple_strings (hive_h *h, void *writer_v, hive_node_h node,
>                          const char *key, char **argv)
>  {
>    xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
> +  int ret = 0;
>    start_value (writer, key, "string-list", NULL);
>  
>    size_t i;
>    for (i = 0; argv[i] != NULL; ++i) {
>      XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "string"));
> -    XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST argv[i]));
> +    ret = safe_print_string_attribute (h, writer_v, "value", "value_encoding", argv[i]);
>      XML_CHECK (xmlTextWriterEndElement, (writer));
>    }
>  
>    end_value (writer);
> -  return 0;
> +  return ret;
>  }

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming blog: http://rwmj.wordpress.com
Fedora now supports 80 OCaml packages (the OPEN alternative to F#)
http://cocan.org/getting_started_with_ocaml_on_red_hat_and_fedora


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]