[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Libguestfs] [FOR REVIEW ONLY] guestfish: Enable grouping in string lists



This patch lacks updated documentation and tests.

This change adds the ability to group entries in a string list with single
quotes. So the string:
  "'foo bar'"
becomes 1 token rather than 2. Consequently single quotes must now be escaped:
  "\'"
resolves to a literal single quote.

Incidentally, this change also alters another, probably unintentional behaviour
of the previous implementation, in that tokens are separated by any amount of
whitespace rather than a single whitespace character. I.e.:
  "a  b"
resolves to:
  'a' 'b'
rather than:
  'a' '' 'b'
Whitespace is now also defined to include tabs.

parse_string_list can also now fail if it contains an unmatched open quote.
---
 fish/fish.c      |  129 +++++++++++++++++++++++++++++++++++++++++++++--------
 src/generator.ml |    3 +-
 2 files changed, 111 insertions(+), 21 deletions(-)

diff --git a/fish/fish.c b/fish/fish.c
index a4069d6..62ec3a3 100644
--- a/fish/fish.c
+++ b/fish/fish.c
@@ -1082,30 +1082,119 @@ is_true (const char *str)
     strcasecmp (str, "no") != 0;
 }
 
-/* XXX We could improve list parsing. */
 char **
 parse_string_list (const char *str)
 {
-  char **argv;
-  const char *p, *pend;
-  int argc, i;
-
-  argc = 1;
-  for (i = 0; str[i]; ++i)
-    if (str[i] == ' ') argc++;
-
-  argv = malloc (sizeof (char *) * (argc+1));
-  if (argv == NULL) { perror ("malloc"); exit (1); }
-
-  p = str;
-  i = 0;
-  while (*p) {
-    pend = strchrnul (p, ' ');
-    argv[i] = strndup (p, pend-p);
-    i++;
-    p = *pend == ' ' ? pend+1 : pend;
+  char **argv = NULL;
+  size_t argv_i = 0;
+
+  /* Current position pointer */
+  const char *p = str;
+
+  /* Token might be simple:
+   *  Token
+   * or be quoted:
+   *  'This is a single token'
+   * or contain embedded single-quoted sections:
+   *  This' is a sing'l'e to'ken
+   *
+   * The latter may seem over-complicated, but it's what a normal shell does.
+   * Not doing it risks surprising somebody.
+   *
+   * This outer loop is over complete tokens.
+   */
+  while(*p) {
+    char *tok = NULL;
+    size_t tok_len = 0;
+
+    /* Skip leading whitespace */
+    p += strspn (p, " \t");
+
+    char in_quote = 0;
+
+    /* This loop is over token 'fragments'. A token can be in multiple bits if
+     * it contains single quotes. We also treat a both sides of an escaped quote
+     * as separate fragments because we can't just copy it: we have to remove
+     * the \.
+     */
+    while (*p && (!isblank (*p) || in_quote)) {
+      const char *end = p;
+
+      /* Check if the fragment starts with a quote */
+      if ('\'' == *p) {
+        /* Toggle in_quote */
+        in_quote = !in_quote;
+
+        /* Skip the quote */
+        p++; end++;
+      }
+
+      /* If we're in a quote, look for an end quote */
+      if (in_quote) {
+        end += strcspn (end, "'");
+      }
+
+      /* Otherwise, look for whitespace or a quote */
+      else {
+        end += strcspn (end, " \t'");
+      }
+
+      /* Grow the token to accommodate the fragment */
+      char *tok_end = tok_len == 0 ? NULL : tok + tok_len;
+      tok_len += end - p;
+      tok = realloc (tok, tok_len + 1);
+      if (NULL == tok) { perror ("realloc"); exit (1); }
+      if (NULL == tok_end) tok_end = tok;
+
+      /* Check if we stopped on an escaped quote */
+      if ('\'' == *end && end != p && *(end-1) == '\\') {
+        /* Add everything before \' to the token */
+        memcpy (tok_end, p, end - p - 1);
+
+        /* Add the ' */
+        tok[tok_len-1] = '\'';
+
+        /* Already processed the quote */
+        p = end + 1;
+      }
+      
+      else {
+        /* Add the whole fragment */
+        memcpy (tok_end, p, end - p);
+
+        p = end;
+      }
+    }
+
+    /* We've reached the end of a token. We shouldn't still be in quotes. */
+    if (in_quote) {
+      fprintf(stderr, _("Runaway quote in string \"%s\"\n"), str);
+
+      size_t i;
+      for (i = 0; i < argv_i; i++) {
+        free (argv[i]);
+      }
+      free(argv);
+
+      return NULL;
+    }
+
+    /* Add this token if there is one. There might not be if there was
+     * whitespace at the end of the input string */
+    if(tok) {
+      /* Add the NULL terminator */
+      tok[tok_len] = '\0';
+
+      /* Add the argument to the argument list */
+      argv = realloc(argv, sizeof(*argv) * argv_i + 1);
+      argv[argv_i] = tok;
+      argv_i++;
+    }
   }
-  argv[i] = NULL;
+
+  /* NULL terminate the argument list */
+  argv = realloc(argv, sizeof(*argv) * argv_i + 1);
+  argv[argv_i] = NULL;
 
   return argv;
 }
diff --git a/src/generator.ml b/src/generator.ml
index 5cf6a94..7571f95 100755
--- a/src/generator.ml
+++ b/src/generator.ml
@@ -6348,7 +6348,8 @@ and generate_fish_cmds () =
               pr "  %s = strcmp (argv[%d], \"-\") != 0 ? argv[%d] : \"/dev/stdout\";\n"
                 name i i
           | StringList name | DeviceList name ->
-              pr "  %s = parse_string_list (argv[%d]);\n" name i
+              pr "  %s = parse_string_list (argv[%d]);\n" name i;
+              pr "  if (%s == NULL) return -1;\n" name;
           | Bool name ->
               pr "  %s = is_true (argv[%d]) ? 1 : 0;\n" name i
           | Int name ->
-- 
1.6.2.5


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]