[augeas-devel] [PATCH 5/6] Fix regular expression escaping

Michael Chapman mike at very.puzzling.org
Sat Oct 8 11:08:05 UTC 2011


This patch fixes:

  /[\/]/

Previously, this matched either a backslash or a slash, since \/ was not
an escape sequence known to unescape(). Now this matches only a slash.

We pass an extra argument to escape() and unescape() listing the extra
characters that should be handled. STR_ESCAPES and RX_ESCAPES provide
the extra characters for strings and regexps respectively, and NULL may
be passed if no extra characters should be handled.

Signed-off-by: Michael Chapman <mike at very.puzzling.org>
---
 src/augeas.c   |    2 +-
 src/get.c      |    2 +-
 src/internal.c |   26 ++++++++++++++++++--------
 src/internal.h |    8 ++++++--
 src/lens.c     |   14 +++++++-------
 src/lexer.l    |    4 ++--
 src/regexp.c   |    4 ++--
 7 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/src/augeas.c b/src/augeas.c
index 4b17384..35721fc 100644
--- a/src/augeas.c
+++ b/src/augeas.c
@@ -1366,7 +1366,7 @@ static int print_one(FILE *out, const char *path, const char *value) {
     if (r < 0)
         return -1;
     if (value != NULL) {
-        char *val = escape(value, -1);
+        char *val = escape(value, -1, STR_ESCAPES);
         r = fprintf(out, " = \"%s\"", val);
         free(val);
         if (r < 0)
diff --git a/src/get.c b/src/get.c
index f9ac6fb..143403b 100644
--- a/src/get.c
+++ b/src/get.c
@@ -243,7 +243,7 @@ static void get_expected_error(struct state *state, struct lens *l) {
     for (p = word; *p != '\0' && *p != '\n'; p++);
     *p = '\0';
 
-    pat = escape(l->ctype->pattern->str, -1);
+    pat = escape(l->ctype->pattern->str, -1, NULL);
     get_error(state, l, "expected %s at '%s'", pat, word);
     free(pat);
 }
diff --git a/src/internal.c b/src/internal.c
index 9f4b697..6ce24dd 100644
--- a/src/internal.c
+++ b/src/internal.c
@@ -148,10 +148,10 @@ char* xread_file(const char *path) {
 /*
  * Escape/unescape of string literals
  */
-static const char *const escape_chars    = "\"\a\b\t\n\v\f\r\\";
-static const char *const escape_names = "\"abtnvfr\\";
+static const char *const escape_chars = "\a\b\t\n\v\f\r";
+static const char *const escape_names = "abtnvfr";
 
-char *unescape(const char *s, int len) {
+char *unescape(const char *s, int len, const char *extra) {
     size_t size;
     const char *n;
     char *result, *t;
@@ -162,7 +162,9 @@ char *unescape(const char *s, int len) {
 
     size = 0;
     for (i=0; i < len; i++, size++)
-        if (s[i] == '\\' && strchr(escape_names, s[i+1]) != NULL) {
+        if (s[i] == '\\' && (n = strchr(escape_names, s[i+1])) != NULL) {
+            i += 1;
+        } else if (s[i] == '\\' && extra && strchr(extra, s[i+1]) != NULL) {
             i += 1;
         }
 
@@ -173,6 +175,9 @@ char *unescape(const char *s, int len) {
         if (s[i] == '\\' && (n = strchr(escape_names, s[i+1])) != NULL) {
             *t++ = escape_chars[n - escape_names];
             i += 1;
+        } else if (s[i] == '\\' && extra && strchr(extra, s[i+1]) != NULL) {
+            *t++ = s[i+1];
+            i += 1;
         } else {
             *t++ = s[i];
         }
@@ -180,7 +185,7 @@ char *unescape(const char *s, int len) {
     return result;
 }
 
-char *escape(const char *text, int cnt) {
+char *escape(const char *text, int cnt, const char *extra) {
 
     int len = 0;
     char *esc = NULL, *e;
@@ -191,6 +196,8 @@ char *escape(const char *text, int cnt) {
     for (int i=0; i < cnt; i++) {
         if (text[i] && (strchr(escape_chars, text[i]) != NULL))
             len += 2;  /* Escaped as '\x' */
+        else if (text[i] && extra && (strchr(extra, text[i]) != NULL))
+            len += 2;  /* Escaped as '\x' */
         else if (! isprint(text[i]))
             len += 4;  /* Escaped as '\ooo' */
         else
@@ -204,6 +211,9 @@ char *escape(const char *text, int cnt) {
         if (text[i] && ((p = strchr(escape_chars, text[i])) != NULL)) {
             *e++ = '\\';
             *e++ = escape_names[p - escape_chars];
+        } else if (text[i] && extra && (strchr(extra, text[i]) != NULL)) {
+            *e++ = '\\';
+            *e++ = text[i];
         } else if (! isprint(text[i])) {
             sprintf(e, "\\%03o", (unsigned char) text[i]);
             e += 4;
@@ -225,7 +235,7 @@ int print_chars(FILE *out, const char *text, int cnt) {
     if (cnt < 0)
         cnt = strlen(text);
 
-    esc = escape(text, cnt);
+    esc = escape(text, cnt, NULL);
     total = strlen(esc);
     if (out != NULL)
         fprintf(out, "%s", esc);
@@ -243,10 +253,10 @@ char *format_pos(const char *text, int pos) {
 
     if (before > window)
         before = window;
-    left = escape(text + pos - before, before);
+    left = escape(text + pos - before, before, NULL);
     if (left == NULL)
         goto done;
-    right = escape(text + pos, window);
+    right = escape(text + pos, window, NULL);
     if (right == NULL)
         goto done;
 
diff --git a/src/internal.h b/src/internal.h
index 411a540..8c58d37 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -253,10 +253,14 @@ int pathjoin(char **path, int nseg, ...);
  * Escape nonprintable characters within TEXT, similar to how it's done in
  * C string literals. Caller must free the returned string.
  */
-char *escape(const char *text, int cnt);
+char *escape(const char *text, int cnt, const char *extra);
 
 /* Function: unescape */
-char *unescape(const char *s, int len);
+char *unescape(const char *s, int len, const char *extra);
+
+/* Extra characters to be escaped in strings and regexps respectively */
+#define STR_ESCAPES "\"\\"
+#define RX_ESCAPES  "/\\"
 
 /* Function: print_chars */
 int print_chars(FILE *out, const char *text, int cnt);
diff --git a/src/lens.c b/src/lens.c
index 4e13811..d5c60bb 100644
--- a/src/lens.c
+++ b/src/lens.c
@@ -115,7 +115,7 @@ static struct value *str_to_fa(struct info *info, const char *pattern,
         return NULL;
     }
 
-    re_str = escape(pattern, -1);
+    re_str = escape(pattern, -1, RX_ESCAPES);
     ERR_NOMEM(re_str == NULL, info);
 
     exn = make_exn_value(info, "Invalid regular expression /%s/", re_str);
@@ -542,7 +542,7 @@ struct value *lns_make_prim(enum lens_tag tag, struct info *info,
         const char *dflt = string->str;
         cnt = regexp_match(regexp, dflt, strlen(dflt), 0, NULL);
         if (cnt != strlen(dflt)) {
-            char *s = escape(dflt, -1);
+            char *s = escape(dflt, -1, RX_ESCAPES);
             char *r = regexp_escape(regexp);
             exn = make_exn_value(info,
                    "del: the default value '%s' does not match /%s/",
@@ -709,11 +709,11 @@ ambig_check(struct info *info, struct fa *fa1, struct fa *fa2,
             lns_format_atype(l1, &s1);
             lns_format_atype(l2, &s2);
         } else {
-            e_u = escape(upv, pv - upv);
-            e_up = escape(upv, v - upv);
-            e_upv = escape(upv, -1);
-            e_pv = escape(pv, -1);
-            e_v = escape(v, -1);
+            e_u = escape(upv, pv - upv, RX_ESCAPES);
+            e_up = escape(upv, v - upv, RX_ESCAPES);
+            e_upv = escape(upv, -1, RX_ESCAPES);
+            e_pv = escape(pv, -1, RX_ESCAPES);
+            e_v = escape(v, -1, RX_ESCAPES);
             s1 = regexp_escape(ltype(l1, typ));
             s2 = regexp_escape(ltype(l2, typ));
         }
diff --git a/src/lexer.l b/src/lexer.l
index acd83f2..cb8d506 100644
--- a/src/lexer.l
+++ b/src/lexer.l
@@ -55,7 +55,7 @@ static void loc_update(YYLTYPE *yylloc, const char *s, int len) {
 }
 
 static char *regexp_literal(const char *s, int len) {
-  char *u = unescape(s, len);
+  char *u = unescape(s, len, RX_ESCAPES);
   size_t u_len = strlen(u);
 
   if (u == NULL)
@@ -89,7 +89,7 @@ ARROW  ->
 {
   \"([^\"]|\\\")*\"   {
                loc_update(yylloc, yytext, yyleng);
-               yylval->string = unescape(yytext+1, yyleng-2);
+               yylval->string = unescape(yytext+1, yyleng-2, STR_ESCAPES);
                return DQUOTED;
   }
 
diff --git a/src/regexp.c b/src/regexp.c
index 2cd47cb..51d8703 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -56,7 +56,7 @@ char *regexp_escape(const struct regexp *r) {
 #endif
 
     if (pat == NULL)
-        pat = escape(r->pattern->str, -1);
+        pat = escape(r->pattern->str, -1, RX_ESCAPES);
 
     if (pat == NULL)
         return NULL;
@@ -115,7 +115,7 @@ void print_regexp(FILE *out, struct regexp *r) {
 
 struct regexp *
 make_regexp_unescape(struct info *info, const char *pat, int nocase) {
-    char *p = unescape(pat, strlen(pat));
+    char *p = unescape(pat, strlen(pat), NULL);
 
     if (p == NULL)
         return NULL;
-- 
1.7.6.4




More information about the augeas-devel mailing list