[augeas-devel] [PATCH 5/6] Fix regular expression escaping

Raphaël Pinson raphink at gmail.com
Mon Oct 10 09:02:24 UTC 2011


Upon reviewing the series a bit better, I'll let David validate patches 5
(this one) and 6, and I'll only commit patches 1 through 4 for now.

Sorry David for the little mess I made by committing the whole series
earlier.


Raphaël



On Sat, Oct 8, 2011 at 1:08 PM, Michael Chapman <mike at very.puzzling.org>wrote:

> This patch fixes:
>
>  /[\/]/
>
> Previously, this matched either a backslash or a slash, since \/ was not
> an escape sequence known to unescape(). Now this matches only a slash.
>
> We pass an extra argument to escape() and unescape() listing the extra
> characters that should be handled. STR_ESCAPES and RX_ESCAPES provide
> the extra characters for strings and regexps respectively, and NULL may
> be passed if no extra characters should be handled.
>
> Signed-off-by: Michael Chapman <mike at very.puzzling.org>
> ---
>  src/augeas.c   |    2 +-
>  src/get.c      |    2 +-
>  src/internal.c |   26 ++++++++++++++++++--------
>  src/internal.h |    8 ++++++--
>  src/lens.c     |   14 +++++++-------
>  src/lexer.l    |    4 ++--
>  src/regexp.c   |    4 ++--
>  7 files changed, 37 insertions(+), 23 deletions(-)
>
> diff --git a/src/augeas.c b/src/augeas.c
> index 4b17384..35721fc 100644
> --- a/src/augeas.c
> +++ b/src/augeas.c
> @@ -1366,7 +1366,7 @@ static int print_one(FILE *out, const char *path,
> const char *value) {
>     if (r < 0)
>         return -1;
>     if (value != NULL) {
> -        char *val = escape(value, -1);
> +        char *val = escape(value, -1, STR_ESCAPES);
>         r = fprintf(out, " = \"%s\"", val);
>         free(val);
>         if (r < 0)
> diff --git a/src/get.c b/src/get.c
> index f9ac6fb..143403b 100644
> --- a/src/get.c
> +++ b/src/get.c
> @@ -243,7 +243,7 @@ static void get_expected_error(struct state *state,
> struct lens *l) {
>     for (p = word; *p != '\0' && *p != '\n'; p++);
>     *p = '\0';
>
> -    pat = escape(l->ctype->pattern->str, -1);
> +    pat = escape(l->ctype->pattern->str, -1, NULL);
>     get_error(state, l, "expected %s at '%s'", pat, word);
>     free(pat);
>  }
> diff --git a/src/internal.c b/src/internal.c
> index 9f4b697..6ce24dd 100644
> --- a/src/internal.c
> +++ b/src/internal.c
> @@ -148,10 +148,10 @@ char* xread_file(const char *path) {
>  /*
>  * Escape/unescape of string literals
>  */
> -static const char *const escape_chars    = "\"\a\b\t\n\v\f\r\\";
> -static const char *const escape_names = "\"abtnvfr\\";
> +static const char *const escape_chars = "\a\b\t\n\v\f\r";
> +static const char *const escape_names = "abtnvfr";
>
> -char *unescape(const char *s, int len) {
> +char *unescape(const char *s, int len, const char *extra) {
>     size_t size;
>     const char *n;
>     char *result, *t;
> @@ -162,7 +162,9 @@ char *unescape(const char *s, int len) {
>
>     size = 0;
>     for (i=0; i < len; i++, size++)
> -        if (s[i] == '\\' && strchr(escape_names, s[i+1]) != NULL) {
> +        if (s[i] == '\\' && (n = strchr(escape_names, s[i+1])) != NULL) {
> +            i += 1;
> +        } else if (s[i] == '\\' && extra && strchr(extra, s[i+1]) != NULL)
> {
>             i += 1;
>         }
>
> @@ -173,6 +175,9 @@ char *unescape(const char *s, int len) {
>         if (s[i] == '\\' && (n = strchr(escape_names, s[i+1])) != NULL) {
>             *t++ = escape_chars[n - escape_names];
>             i += 1;
> +        } else if (s[i] == '\\' && extra && strchr(extra, s[i+1]) != NULL)
> {
> +            *t++ = s[i+1];
> +            i += 1;
>         } else {
>             *t++ = s[i];
>         }
> @@ -180,7 +185,7 @@ char *unescape(const char *s, int len) {
>     return result;
>  }
>
> -char *escape(const char *text, int cnt) {
> +char *escape(const char *text, int cnt, const char *extra) {
>
>     int len = 0;
>     char *esc = NULL, *e;
> @@ -191,6 +196,8 @@ char *escape(const char *text, int cnt) {
>     for (int i=0; i < cnt; i++) {
>         if (text[i] && (strchr(escape_chars, text[i]) != NULL))
>             len += 2;  /* Escaped as '\x' */
> +        else if (text[i] && extra && (strchr(extra, text[i]) != NULL))
> +            len += 2;  /* Escaped as '\x' */
>         else if (! isprint(text[i]))
>             len += 4;  /* Escaped as '\ooo' */
>         else
> @@ -204,6 +211,9 @@ char *escape(const char *text, int cnt) {
>         if (text[i] && ((p = strchr(escape_chars, text[i])) != NULL)) {
>             *e++ = '\\';
>             *e++ = escape_names[p - escape_chars];
> +        } else if (text[i] && extra && (strchr(extra, text[i]) != NULL)) {
> +            *e++ = '\\';
> +            *e++ = text[i];
>         } else if (! isprint(text[i])) {
>             sprintf(e, "\\%03o", (unsigned char) text[i]);
>             e += 4;
> @@ -225,7 +235,7 @@ int print_chars(FILE *out, const char *text, int cnt) {
>     if (cnt < 0)
>         cnt = strlen(text);
>
> -    esc = escape(text, cnt);
> +    esc = escape(text, cnt, NULL);
>     total = strlen(esc);
>     if (out != NULL)
>         fprintf(out, "%s", esc);
> @@ -243,10 +253,10 @@ char *format_pos(const char *text, int pos) {
>
>     if (before > window)
>         before = window;
> -    left = escape(text + pos - before, before);
> +    left = escape(text + pos - before, before, NULL);
>     if (left == NULL)
>         goto done;
> -    right = escape(text + pos, window);
> +    right = escape(text + pos, window, NULL);
>     if (right == NULL)
>         goto done;
>
> diff --git a/src/internal.h b/src/internal.h
> index 411a540..8c58d37 100644
> --- a/src/internal.h
> +++ b/src/internal.h
> @@ -253,10 +253,14 @@ int pathjoin(char **path, int nseg, ...);
>  * Escape nonprintable characters within TEXT, similar to how it's done in
>  * C string literals. Caller must free the returned string.
>  */
> -char *escape(const char *text, int cnt);
> +char *escape(const char *text, int cnt, const char *extra);
>
>  /* Function: unescape */
> -char *unescape(const char *s, int len);
> +char *unescape(const char *s, int len, const char *extra);
> +
> +/* Extra characters to be escaped in strings and regexps respectively */
> +#define STR_ESCAPES "\"\\"
> +#define RX_ESCAPES  "/\\"
>
>  /* Function: print_chars */
>  int print_chars(FILE *out, const char *text, int cnt);
> diff --git a/src/lens.c b/src/lens.c
> index 4e13811..d5c60bb 100644
> --- a/src/lens.c
> +++ b/src/lens.c
> @@ -115,7 +115,7 @@ static struct value *str_to_fa(struct info *info, const
> char *pattern,
>         return NULL;
>     }
>
> -    re_str = escape(pattern, -1);
> +    re_str = escape(pattern, -1, RX_ESCAPES);
>     ERR_NOMEM(re_str == NULL, info);
>
>     exn = make_exn_value(info, "Invalid regular expression /%s/", re_str);
> @@ -542,7 +542,7 @@ struct value *lns_make_prim(enum lens_tag tag, struct
> info *info,
>         const char *dflt = string->str;
>         cnt = regexp_match(regexp, dflt, strlen(dflt), 0, NULL);
>         if (cnt != strlen(dflt)) {
> -            char *s = escape(dflt, -1);
> +            char *s = escape(dflt, -1, RX_ESCAPES);
>             char *r = regexp_escape(regexp);
>             exn = make_exn_value(info,
>                    "del: the default value '%s' does not match /%s/",
> @@ -709,11 +709,11 @@ ambig_check(struct info *info, struct fa *fa1, struct
> fa *fa2,
>             lns_format_atype(l1, &s1);
>             lns_format_atype(l2, &s2);
>         } else {
> -            e_u = escape(upv, pv - upv);
> -            e_up = escape(upv, v - upv);
> -            e_upv = escape(upv, -1);
> -            e_pv = escape(pv, -1);
> -            e_v = escape(v, -1);
> +            e_u = escape(upv, pv - upv, RX_ESCAPES);
> +            e_up = escape(upv, v - upv, RX_ESCAPES);
> +            e_upv = escape(upv, -1, RX_ESCAPES);
> +            e_pv = escape(pv, -1, RX_ESCAPES);
> +            e_v = escape(v, -1, RX_ESCAPES);
>             s1 = regexp_escape(ltype(l1, typ));
>             s2 = regexp_escape(ltype(l2, typ));
>         }
> diff --git a/src/lexer.l b/src/lexer.l
> index acd83f2..cb8d506 100644
> --- a/src/lexer.l
> +++ b/src/lexer.l
> @@ -55,7 +55,7 @@ static void loc_update(YYLTYPE *yylloc, const char *s,
> int len) {
>  }
>
>  static char *regexp_literal(const char *s, int len) {
> -  char *u = unescape(s, len);
> +  char *u = unescape(s, len, RX_ESCAPES);
>   size_t u_len = strlen(u);
>
>   if (u == NULL)
> @@ -89,7 +89,7 @@ ARROW  ->
>  {
>   \"([^\"]|\\\")*\"   {
>                loc_update(yylloc, yytext, yyleng);
> -               yylval->string = unescape(yytext+1, yyleng-2);
> +               yylval->string = unescape(yytext+1, yyleng-2, STR_ESCAPES);
>                return DQUOTED;
>   }
>
> diff --git a/src/regexp.c b/src/regexp.c
> index 2cd47cb..51d8703 100644
> --- a/src/regexp.c
> +++ b/src/regexp.c
> @@ -56,7 +56,7 @@ char *regexp_escape(const struct regexp *r) {
>  #endif
>
>     if (pat == NULL)
> -        pat = escape(r->pattern->str, -1);
> +        pat = escape(r->pattern->str, -1, RX_ESCAPES);
>
>     if (pat == NULL)
>         return NULL;
> @@ -115,7 +115,7 @@ void print_regexp(FILE *out, struct regexp *r) {
>
>  struct regexp *
>  make_regexp_unescape(struct info *info, const char *pat, int nocase) {
> -    char *p = unescape(pat, strlen(pat));
> +    char *p = unescape(pat, strlen(pat), NULL);
>
>     if (p == NULL)
>         return NULL;
> --
> 1.7.6.4
>
> _______________________________________________
> augeas-devel mailing list
> augeas-devel at redhat.com
> https://www.redhat.com/mailman/listinfo/augeas-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://listman.redhat.com/archives/augeas-devel/attachments/20111010/2273e03a/attachment.htm>


More information about the augeas-devel mailing list