[augeas-devel] [PATCH 5/6] Fix regular expression escaping
Michael Chapman
mike at very.puzzling.org
Sat Oct 8 11:08:05 UTC 2011
This patch fixes:
/[\/]/
Previously, this matched either a backslash or a slash, since \/ was not
an escape sequence known to unescape(). Now this matches only a slash.
We pass an extra argument to escape() and unescape() listing the extra
characters that should be handled. STR_ESCAPES and RX_ESCAPES provide
the extra characters for strings and regexps respectively, and NULL may
be passed if no extra characters should be handled.
Signed-off-by: Michael Chapman <mike at very.puzzling.org>
---
src/augeas.c | 2 +-
src/get.c | 2 +-
src/internal.c | 26 ++++++++++++++++++--------
src/internal.h | 8 ++++++--
src/lens.c | 14 +++++++-------
src/lexer.l | 4 ++--
src/regexp.c | 4 ++--
7 files changed, 37 insertions(+), 23 deletions(-)
diff --git a/src/augeas.c b/src/augeas.c
index 4b17384..35721fc 100644
--- a/src/augeas.c
+++ b/src/augeas.c
@@ -1366,7 +1366,7 @@ static int print_one(FILE *out, const char *path, const char *value) {
if (r < 0)
return -1;
if (value != NULL) {
- char *val = escape(value, -1);
+ char *val = escape(value, -1, STR_ESCAPES);
r = fprintf(out, " = \"%s\"", val);
free(val);
if (r < 0)
diff --git a/src/get.c b/src/get.c
index f9ac6fb..143403b 100644
--- a/src/get.c
+++ b/src/get.c
@@ -243,7 +243,7 @@ static void get_expected_error(struct state *state, struct lens *l) {
for (p = word; *p != '\0' && *p != '\n'; p++);
*p = '\0';
- pat = escape(l->ctype->pattern->str, -1);
+ pat = escape(l->ctype->pattern->str, -1, NULL);
get_error(state, l, "expected %s at '%s'", pat, word);
free(pat);
}
diff --git a/src/internal.c b/src/internal.c
index 9f4b697..6ce24dd 100644
--- a/src/internal.c
+++ b/src/internal.c
@@ -148,10 +148,10 @@ char* xread_file(const char *path) {
/*
* Escape/unescape of string literals
*/
-static const char *const escape_chars = "\"\a\b\t\n\v\f\r\\";
-static const char *const escape_names = "\"abtnvfr\\";
+static const char *const escape_chars = "\a\b\t\n\v\f\r";
+static const char *const escape_names = "abtnvfr";
-char *unescape(const char *s, int len) {
+char *unescape(const char *s, int len, const char *extra) {
size_t size;
const char *n;
char *result, *t;
@@ -162,7 +162,9 @@ char *unescape(const char *s, int len) {
size = 0;
for (i=0; i < len; i++, size++)
- if (s[i] == '\\' && strchr(escape_names, s[i+1]) != NULL) {
+ if (s[i] == '\\' && (n = strchr(escape_names, s[i+1])) != NULL) {
+ i += 1;
+ } else if (s[i] == '\\' && extra && strchr(extra, s[i+1]) != NULL) {
i += 1;
}
@@ -173,6 +175,9 @@ char *unescape(const char *s, int len) {
if (s[i] == '\\' && (n = strchr(escape_names, s[i+1])) != NULL) {
*t++ = escape_chars[n - escape_names];
i += 1;
+ } else if (s[i] == '\\' && extra && strchr(extra, s[i+1]) != NULL) {
+ *t++ = s[i+1];
+ i += 1;
} else {
*t++ = s[i];
}
@@ -180,7 +185,7 @@ char *unescape(const char *s, int len) {
return result;
}
-char *escape(const char *text, int cnt) {
+char *escape(const char *text, int cnt, const char *extra) {
int len = 0;
char *esc = NULL, *e;
@@ -191,6 +196,8 @@ char *escape(const char *text, int cnt) {
for (int i=0; i < cnt; i++) {
if (text[i] && (strchr(escape_chars, text[i]) != NULL))
len += 2; /* Escaped as '\x' */
+ else if (text[i] && extra && (strchr(extra, text[i]) != NULL))
+ len += 2; /* Escaped as '\x' */
else if (! isprint(text[i]))
len += 4; /* Escaped as '\ooo' */
else
@@ -204,6 +211,9 @@ char *escape(const char *text, int cnt) {
if (text[i] && ((p = strchr(escape_chars, text[i])) != NULL)) {
*e++ = '\\';
*e++ = escape_names[p - escape_chars];
+ } else if (text[i] && extra && (strchr(extra, text[i]) != NULL)) {
+ *e++ = '\\';
+ *e++ = text[i];
} else if (! isprint(text[i])) {
sprintf(e, "\\%03o", (unsigned char) text[i]);
e += 4;
@@ -225,7 +235,7 @@ int print_chars(FILE *out, const char *text, int cnt) {
if (cnt < 0)
cnt = strlen(text);
- esc = escape(text, cnt);
+ esc = escape(text, cnt, NULL);
total = strlen(esc);
if (out != NULL)
fprintf(out, "%s", esc);
@@ -243,10 +253,10 @@ char *format_pos(const char *text, int pos) {
if (before > window)
before = window;
- left = escape(text + pos - before, before);
+ left = escape(text + pos - before, before, NULL);
if (left == NULL)
goto done;
- right = escape(text + pos, window);
+ right = escape(text + pos, window, NULL);
if (right == NULL)
goto done;
diff --git a/src/internal.h b/src/internal.h
index 411a540..8c58d37 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -253,10 +253,14 @@ int pathjoin(char **path, int nseg, ...);
* Escape nonprintable characters within TEXT, similar to how it's done in
* C string literals. Caller must free the returned string.
*/
-char *escape(const char *text, int cnt);
+char *escape(const char *text, int cnt, const char *extra);
/* Function: unescape */
-char *unescape(const char *s, int len);
+char *unescape(const char *s, int len, const char *extra);
+
+/* Extra characters to be escaped in strings and regexps respectively */
+#define STR_ESCAPES "\"\\"
+#define RX_ESCAPES "/\\"
/* Function: print_chars */
int print_chars(FILE *out, const char *text, int cnt);
diff --git a/src/lens.c b/src/lens.c
index 4e13811..d5c60bb 100644
--- a/src/lens.c
+++ b/src/lens.c
@@ -115,7 +115,7 @@ static struct value *str_to_fa(struct info *info, const char *pattern,
return NULL;
}
- re_str = escape(pattern, -1);
+ re_str = escape(pattern, -1, RX_ESCAPES);
ERR_NOMEM(re_str == NULL, info);
exn = make_exn_value(info, "Invalid regular expression /%s/", re_str);
@@ -542,7 +542,7 @@ struct value *lns_make_prim(enum lens_tag tag, struct info *info,
const char *dflt = string->str;
cnt = regexp_match(regexp, dflt, strlen(dflt), 0, NULL);
if (cnt != strlen(dflt)) {
- char *s = escape(dflt, -1);
+ char *s = escape(dflt, -1, RX_ESCAPES);
char *r = regexp_escape(regexp);
exn = make_exn_value(info,
"del: the default value '%s' does not match /%s/",
@@ -709,11 +709,11 @@ ambig_check(struct info *info, struct fa *fa1, struct fa *fa2,
lns_format_atype(l1, &s1);
lns_format_atype(l2, &s2);
} else {
- e_u = escape(upv, pv - upv);
- e_up = escape(upv, v - upv);
- e_upv = escape(upv, -1);
- e_pv = escape(pv, -1);
- e_v = escape(v, -1);
+ e_u = escape(upv, pv - upv, RX_ESCAPES);
+ e_up = escape(upv, v - upv, RX_ESCAPES);
+ e_upv = escape(upv, -1, RX_ESCAPES);
+ e_pv = escape(pv, -1, RX_ESCAPES);
+ e_v = escape(v, -1, RX_ESCAPES);
s1 = regexp_escape(ltype(l1, typ));
s2 = regexp_escape(ltype(l2, typ));
}
diff --git a/src/lexer.l b/src/lexer.l
index acd83f2..cb8d506 100644
--- a/src/lexer.l
+++ b/src/lexer.l
@@ -55,7 +55,7 @@ static void loc_update(YYLTYPE *yylloc, const char *s, int len) {
}
static char *regexp_literal(const char *s, int len) {
- char *u = unescape(s, len);
+ char *u = unescape(s, len, RX_ESCAPES);
size_t u_len = strlen(u);
if (u == NULL)
@@ -89,7 +89,7 @@ ARROW ->
{
\"([^\"]|\\\")*\" {
loc_update(yylloc, yytext, yyleng);
- yylval->string = unescape(yytext+1, yyleng-2);
+ yylval->string = unescape(yytext+1, yyleng-2, STR_ESCAPES);
return DQUOTED;
}
diff --git a/src/regexp.c b/src/regexp.c
index 2cd47cb..51d8703 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -56,7 +56,7 @@ char *regexp_escape(const struct regexp *r) {
#endif
if (pat == NULL)
- pat = escape(r->pattern->str, -1);
+ pat = escape(r->pattern->str, -1, RX_ESCAPES);
if (pat == NULL)
return NULL;
@@ -115,7 +115,7 @@ void print_regexp(FILE *out, struct regexp *r) {
struct regexp *
make_regexp_unescape(struct info *info, const char *pat, int nocase) {
- char *p = unescape(pat, strlen(pat));
+ char *p = unescape(pat, strlen(pat), NULL);
if (p == NULL)
return NULL;
--
1.7.6.4
More information about the augeas-devel
mailing list