[augeas-devel] [PATCH 5/6] Fix regular expression escaping
Raphaël Pinson
raphink at gmail.com
Mon Oct 10 09:02:24 UTC 2011
Upon reviewing the series a bit better, I'll let David validate patches 5
(this one) and 6, and I'll only commit patches 1 through 4 for now.
Sorry David for the little mess I made by committing the whole series
earlier.
Raphaël
On Sat, Oct 8, 2011 at 1:08 PM, Michael Chapman <mike at very.puzzling.org>wrote:
> This patch fixes:
>
> /[\/]/
>
> Previously, this matched either a backslash or a slash, since \/ was not
> an escape sequence known to unescape(). Now this matches only a slash.
>
> We pass an extra argument to escape() and unescape() listing the extra
> characters that should be handled. STR_ESCAPES and RX_ESCAPES provide
> the extra characters for strings and regexps respectively, and NULL may
> be passed if no extra characters should be handled.
>
> Signed-off-by: Michael Chapman <mike at very.puzzling.org>
> ---
> src/augeas.c | 2 +-
> src/get.c | 2 +-
> src/internal.c | 26 ++++++++++++++++++--------
> src/internal.h | 8 ++++++--
> src/lens.c | 14 +++++++-------
> src/lexer.l | 4 ++--
> src/regexp.c | 4 ++--
> 7 files changed, 37 insertions(+), 23 deletions(-)
>
> diff --git a/src/augeas.c b/src/augeas.c
> index 4b17384..35721fc 100644
> --- a/src/augeas.c
> +++ b/src/augeas.c
> @@ -1366,7 +1366,7 @@ static int print_one(FILE *out, const char *path,
> const char *value) {
> if (r < 0)
> return -1;
> if (value != NULL) {
> - char *val = escape(value, -1);
> + char *val = escape(value, -1, STR_ESCAPES);
> r = fprintf(out, " = \"%s\"", val);
> free(val);
> if (r < 0)
> diff --git a/src/get.c b/src/get.c
> index f9ac6fb..143403b 100644
> --- a/src/get.c
> +++ b/src/get.c
> @@ -243,7 +243,7 @@ static void get_expected_error(struct state *state,
> struct lens *l) {
> for (p = word; *p != '\0' && *p != '\n'; p++);
> *p = '\0';
>
> - pat = escape(l->ctype->pattern->str, -1);
> + pat = escape(l->ctype->pattern->str, -1, NULL);
> get_error(state, l, "expected %s at '%s'", pat, word);
> free(pat);
> }
> diff --git a/src/internal.c b/src/internal.c
> index 9f4b697..6ce24dd 100644
> --- a/src/internal.c
> +++ b/src/internal.c
> @@ -148,10 +148,10 @@ char* xread_file(const char *path) {
> /*
> * Escape/unescape of string literals
> */
> -static const char *const escape_chars = "\"\a\b\t\n\v\f\r\\";
> -static const char *const escape_names = "\"abtnvfr\\";
> +static const char *const escape_chars = "\a\b\t\n\v\f\r";
> +static const char *const escape_names = "abtnvfr";
>
> -char *unescape(const char *s, int len) {
> +char *unescape(const char *s, int len, const char *extra) {
> size_t size;
> const char *n;
> char *result, *t;
> @@ -162,7 +162,9 @@ char *unescape(const char *s, int len) {
>
> size = 0;
> for (i=0; i < len; i++, size++)
> - if (s[i] == '\\' && strchr(escape_names, s[i+1]) != NULL) {
> + if (s[i] == '\\' && (n = strchr(escape_names, s[i+1])) != NULL) {
> + i += 1;
> + } else if (s[i] == '\\' && extra && strchr(extra, s[i+1]) != NULL)
> {
> i += 1;
> }
>
> @@ -173,6 +175,9 @@ char *unescape(const char *s, int len) {
> if (s[i] == '\\' && (n = strchr(escape_names, s[i+1])) != NULL) {
> *t++ = escape_chars[n - escape_names];
> i += 1;
> + } else if (s[i] == '\\' && extra && strchr(extra, s[i+1]) != NULL)
> {
> + *t++ = s[i+1];
> + i += 1;
> } else {
> *t++ = s[i];
> }
> @@ -180,7 +185,7 @@ char *unescape(const char *s, int len) {
> return result;
> }
>
> -char *escape(const char *text, int cnt) {
> +char *escape(const char *text, int cnt, const char *extra) {
>
> int len = 0;
> char *esc = NULL, *e;
> @@ -191,6 +196,8 @@ char *escape(const char *text, int cnt) {
> for (int i=0; i < cnt; i++) {
> if (text[i] && (strchr(escape_chars, text[i]) != NULL))
> len += 2; /* Escaped as '\x' */
> + else if (text[i] && extra && (strchr(extra, text[i]) != NULL))
> + len += 2; /* Escaped as '\x' */
> else if (! isprint(text[i]))
> len += 4; /* Escaped as '\ooo' */
> else
> @@ -204,6 +211,9 @@ char *escape(const char *text, int cnt) {
> if (text[i] && ((p = strchr(escape_chars, text[i])) != NULL)) {
> *e++ = '\\';
> *e++ = escape_names[p - escape_chars];
> + } else if (text[i] && extra && (strchr(extra, text[i]) != NULL)) {
> + *e++ = '\\';
> + *e++ = text[i];
> } else if (! isprint(text[i])) {
> sprintf(e, "\\%03o", (unsigned char) text[i]);
> e += 4;
> @@ -225,7 +235,7 @@ int print_chars(FILE *out, const char *text, int cnt) {
> if (cnt < 0)
> cnt = strlen(text);
>
> - esc = escape(text, cnt);
> + esc = escape(text, cnt, NULL);
> total = strlen(esc);
> if (out != NULL)
> fprintf(out, "%s", esc);
> @@ -243,10 +253,10 @@ char *format_pos(const char *text, int pos) {
>
> if (before > window)
> before = window;
> - left = escape(text + pos - before, before);
> + left = escape(text + pos - before, before, NULL);
> if (left == NULL)
> goto done;
> - right = escape(text + pos, window);
> + right = escape(text + pos, window, NULL);
> if (right == NULL)
> goto done;
>
> diff --git a/src/internal.h b/src/internal.h
> index 411a540..8c58d37 100644
> --- a/src/internal.h
> +++ b/src/internal.h
> @@ -253,10 +253,14 @@ int pathjoin(char **path, int nseg, ...);
> * Escape nonprintable characters within TEXT, similar to how it's done in
> * C string literals. Caller must free the returned string.
> */
> -char *escape(const char *text, int cnt);
> +char *escape(const char *text, int cnt, const char *extra);
>
> /* Function: unescape */
> -char *unescape(const char *s, int len);
> +char *unescape(const char *s, int len, const char *extra);
> +
> +/* Extra characters to be escaped in strings and regexps respectively */
> +#define STR_ESCAPES "\"\\"
> +#define RX_ESCAPES "/\\"
>
> /* Function: print_chars */
> int print_chars(FILE *out, const char *text, int cnt);
> diff --git a/src/lens.c b/src/lens.c
> index 4e13811..d5c60bb 100644
> --- a/src/lens.c
> +++ b/src/lens.c
> @@ -115,7 +115,7 @@ static struct value *str_to_fa(struct info *info, const
> char *pattern,
> return NULL;
> }
>
> - re_str = escape(pattern, -1);
> + re_str = escape(pattern, -1, RX_ESCAPES);
> ERR_NOMEM(re_str == NULL, info);
>
> exn = make_exn_value(info, "Invalid regular expression /%s/", re_str);
> @@ -542,7 +542,7 @@ struct value *lns_make_prim(enum lens_tag tag, struct
> info *info,
> const char *dflt = string->str;
> cnt = regexp_match(regexp, dflt, strlen(dflt), 0, NULL);
> if (cnt != strlen(dflt)) {
> - char *s = escape(dflt, -1);
> + char *s = escape(dflt, -1, RX_ESCAPES);
> char *r = regexp_escape(regexp);
> exn = make_exn_value(info,
> "del: the default value '%s' does not match /%s/",
> @@ -709,11 +709,11 @@ ambig_check(struct info *info, struct fa *fa1, struct
> fa *fa2,
> lns_format_atype(l1, &s1);
> lns_format_atype(l2, &s2);
> } else {
> - e_u = escape(upv, pv - upv);
> - e_up = escape(upv, v - upv);
> - e_upv = escape(upv, -1);
> - e_pv = escape(pv, -1);
> - e_v = escape(v, -1);
> + e_u = escape(upv, pv - upv, RX_ESCAPES);
> + e_up = escape(upv, v - upv, RX_ESCAPES);
> + e_upv = escape(upv, -1, RX_ESCAPES);
> + e_pv = escape(pv, -1, RX_ESCAPES);
> + e_v = escape(v, -1, RX_ESCAPES);
> s1 = regexp_escape(ltype(l1, typ));
> s2 = regexp_escape(ltype(l2, typ));
> }
> diff --git a/src/lexer.l b/src/lexer.l
> index acd83f2..cb8d506 100644
> --- a/src/lexer.l
> +++ b/src/lexer.l
> @@ -55,7 +55,7 @@ static void loc_update(YYLTYPE *yylloc, const char *s,
> int len) {
> }
>
> static char *regexp_literal(const char *s, int len) {
> - char *u = unescape(s, len);
> + char *u = unescape(s, len, RX_ESCAPES);
> size_t u_len = strlen(u);
>
> if (u == NULL)
> @@ -89,7 +89,7 @@ ARROW ->
> {
> \"([^\"]|\\\")*\" {
> loc_update(yylloc, yytext, yyleng);
> - yylval->string = unescape(yytext+1, yyleng-2);
> + yylval->string = unescape(yytext+1, yyleng-2, STR_ESCAPES);
> return DQUOTED;
> }
>
> diff --git a/src/regexp.c b/src/regexp.c
> index 2cd47cb..51d8703 100644
> --- a/src/regexp.c
> +++ b/src/regexp.c
> @@ -56,7 +56,7 @@ char *regexp_escape(const struct regexp *r) {
> #endif
>
> if (pat == NULL)
> - pat = escape(r->pattern->str, -1);
> + pat = escape(r->pattern->str, -1, RX_ESCAPES);
>
> if (pat == NULL)
> return NULL;
> @@ -115,7 +115,7 @@ void print_regexp(FILE *out, struct regexp *r) {
>
> struct regexp *
> make_regexp_unescape(struct info *info, const char *pat, int nocase) {
> - char *p = unescape(pat, strlen(pat));
> + char *p = unescape(pat, strlen(pat), NULL);
>
> if (p == NULL)
> return NULL;
> --
> 1.7.6.4
>
> _______________________________________________
> augeas-devel mailing list
> augeas-devel at redhat.com
> https://www.redhat.com/mailman/listinfo/augeas-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://listman.redhat.com/archives/augeas-devel/attachments/20111010/2273e03a/attachment.htm>
More information about the augeas-devel
mailing list