[augeas-devel] augeas: master - fa_compile: accept regexps with embedded NUL's
David Lutterkort
lutter at fedoraproject.org
Tue Sep 1 18:09:06 UTC 2009
Gitweb: http://git.fedorahosted.org/git/augeas.git?p=augeas.git;a=commitdiff;h=82152586f01e19d8ed4bc144f83855328d1196dc
Commit: 82152586f01e19d8ed4bc144f83855328d1196dc
Parent: 15eb39a97ad98be637c9093cd1cfeee455cfefef
Author: David Lutterkort <lutter at redhat.com>
AuthorDate: Fri Aug 14 16:31:39 2009 -0700
Committer: David Lutterkort <lutter at redhat.com>
CommitterDate: Mon Aug 31 14:36:27 2009 -0700
fa_compile: accept regexps with embedded NUL's
---
src/fa.c | 232 +++++++++++++++++++++++++++++++++-----------------------
tests/fatest.c | 17 +++--
2 files changed, 147 insertions(+), 102 deletions(-)
diff --git a/src/fa.c b/src/fa.c
index 869d16f..491ec63 100644
--- a/src/fa.c
+++ b/src/fa.c
@@ -1,7 +1,7 @@
/*
* fa.c: finite automata
*
- * Copyright (C) 2007, 2008 Red Hat Inc.
+ * Copyright (C) 2007-2009 Red Hat Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -174,7 +174,7 @@ struct re {
struct re *exp2;
};
struct { /* CSET */
- int negate;
+ bool negate;
bitset *cset;
};
struct { /* CHAR */
@@ -188,6 +188,15 @@ struct re {
};
};
+/* Used to keep state of the regex parse; RX may contain NUL's */
+struct re_parse {
+ const char *rx; /* Current position in regex */
+ const char *rend; /* Last char of rx+ 1 */
+ int error; /* error code */
+};
+
+static struct re *parse_regexp(struct re_parse *parse);
+
/* A map from a set of states to a state. */
typedef hash_t state_set_hash;
@@ -216,8 +225,6 @@ struct state_set_list {
struct state_set *set;
};
-static struct re *parse_regexp(const char **regexp, int *error);
-
/* Clean up FA by removing dead transitions and states and reducing
* transitions. Unreachable states are freed. The return value is the same
* as FA; returning it is merely a convenience.
@@ -2530,23 +2537,24 @@ static void free_re(struct re *re) {
}
int fa_compile(const char *regexp, size_t size, struct fa **fa) {
- int ret = REG_NOERROR;
struct re *re = NULL;
+ struct re_parse parse;
+
*fa = NULL;
- /* We don't handle embedded nul's yet */
- if (strlen(regexp) != size)
- return REG_ESIZE;
+ parse.rx = regexp;
+ parse.rend = regexp + size;
+ parse.error = REG_NOERROR;
- re = parse_regexp(®exp, &ret);
+ re = parse_regexp(&parse);
if (re == NULL)
- return ret;
+ return parse.error;
*fa = fa_from_re(re);
re_unref(re);
collect(*fa);
- return ret;
+ return parse.error;
}
/*
@@ -2593,7 +2601,7 @@ static struct re *make_re_char(uchar c) {
return re;
}
-static struct re *make_re_char_set(int negate) {
+static struct re *make_re_char_set(bool negate) {
struct re *re = make_re(CSET);
if (re) {
re->negate = negate;
@@ -2604,37 +2612,40 @@ static struct re *make_re_char_set(int negate) {
return re;
}
-static int more(const char **regexp) {
- return (*regexp) != '\0';
+static bool more(struct re_parse *parse) {
+ return parse->rx < parse->rend;
}
-static int match(const char **regexp, char m) {
- if (!more(regexp))
- return 0;
- if (**regexp == m) {
- (*regexp) += 1;
- return 1;
+static bool match(struct re_parse *parse, char m) {
+ if (!more(parse))
+ return false;
+ if (*parse->rx == m) {
+ parse->rx += 1;
+ return true;
}
- return 0;
+ return false;
}
-static int peek(const char **regexp, const char *chars) {
- return strchr(chars, **regexp) != NULL;
+static bool peek(struct re_parse *parse, const char *chars) {
+ return strchr(chars, *parse->rx) != NULL;
}
-static char next(const char **regexp) {
- char c = **regexp;
- if (c != '\0')
- *regexp += 1;
- return c;
+static bool next(struct re_parse *parse, char *c) {
+ if (!more(parse))
+ return false;
+ *c = *parse->rx;
+ parse->rx += 1;
+ return true;
}
-static char parse_char(const char **regexp, int quoted) {
- if (quoted && **regexp == '\\') {
- next(regexp);
- return next(regexp);
+static bool parse_char(struct re_parse *parse, int quoted, char *c) {
+ if (!more(parse))
+ return false;
+ if (quoted && *parse->rx == '\\') {
+ parse->rx += 1;
+ return next(parse, c);
} else {
- return next(regexp);
+ return next(parse, c);
}
}
@@ -2644,33 +2655,34 @@ static void add_re_char(struct re *re, uchar from, uchar to) {
bitset_set(re->cset, c);
}
-static void parse_char_class(const char **regexp, struct re *re,
- int *error) {
- if (! more(regexp)) {
- *error = REG_EBRACK;
+static void parse_char_class(struct re_parse *parse, struct re *re) {
+ if (! more(parse)) {
+ parse->error = REG_EBRACK;
goto error;
}
- char from = parse_char(regexp, 0);
- char to = from;
- if (match(regexp, '-')) {
- if (! more(regexp)) {
- *error = REG_EBRACK;
+ char from, to;
+ parse_char(parse, 0, &from);
+ to = from;
+ if (match(parse, '-')) {
+ if (! more(parse)) {
+ parse->error = REG_EBRACK;
goto error;
}
- if (peek(regexp, "]")) {
+ if (peek(parse, "]")) {
if (from > to) {
- *error = REG_ERANGE;
+ parse->error = REG_ERANGE;
goto error;
}
add_re_char(re, from, to);
add_re_char(re, '-', '-');
return;
- } else {
- to = parse_char(regexp, 0);
+ } else if (!parse_char(parse, 0, &to)) {
+ parse->error = REG_ERANGE;
+ goto error;
}
}
if (from > to) {
- *error = REG_ERANGE;
+ parse->error = REG_ERANGE;
goto error;
}
add_re_char(re, from, to);
@@ -2678,45 +2690,53 @@ static void parse_char_class(const char **regexp, struct re *re,
return;
}
-static struct re *parse_simple_exp(const char **regexp, int *error) {
+static struct re *parse_simple_exp(struct re_parse *parse) {
struct re *re = NULL;
- if (match(regexp, '[')) {
- int negate = match(regexp, '^');
+ if (match(parse, '[')) {
+ bool negate = match(parse, '^');
re = make_re_char_set(negate);
- if (re == NULL)
+ if (re == NULL) {
+ parse->error = REG_ESPACE;
goto error;
- parse_char_class(regexp, re, error);
- if (*error != REG_NOERROR)
+ }
+ parse_char_class(parse, re);
+ if (parse->error != REG_NOERROR)
goto error;
- while (more(regexp) && ! peek(regexp, "]")) {
- parse_char_class(regexp, re, error);
- if (*error != REG_NOERROR)
+ while (more(parse) && ! peek(parse, "]")) {
+ parse_char_class(parse, re);
+ if (parse->error != REG_NOERROR)
goto error;
}
- if (! match(regexp, ']')) {
- *error = REG_EBRACK;
+ if (! match(parse, ']')) {
+ parse->error = REG_EBRACK;
goto error;
}
- } else if (match(regexp, '(')) {
- if (match(regexp, ')')) {
+ } else if (match(parse, '(')) {
+ if (match(parse, ')')) {
return make_re(EPSILON);
}
- re = parse_regexp(regexp, error);
+ re = parse_regexp(parse);
if (re == NULL)
goto error;
- if (! match(regexp, ')')) {
- *error = REG_EPAREN;
+ if (! match(parse, ')')) {
+ parse->error = REG_EPAREN;
goto error;
}
- } else if (match(regexp, '.')) {
+ } else if (match(parse, '.')) {
re = make_re_char_set(1);
- if (re == NULL)
+ if (re == NULL) {
+ parse->error = REG_ESPACE;
goto error;
+ }
add_re_char(re, '\n', '\n');
} else {
- if (more(regexp)) {
- char c = parse_char(regexp, 1);
+ if (more(parse)) {
+ char c;
+ if (!parse_char(parse, 1, &c)) {
+ parse->error = REG_EESCAPE;
+ goto error;
+ }
re = make_re_char(c);
}
}
@@ -2726,50 +2746,70 @@ static struct re *parse_simple_exp(const char **regexp, int *error) {
return NULL;
}
-static int parse_int(const char **regexp, int *error) {
+static int parse_int(struct re_parse *parse) {
+ const char *lim;
char *end;
- long l = strtoul(*regexp, &end, 10);
- *regexp = end;
+ size_t used;
+ long l;
+
+ /* We need to be careful that strtoul will never access
+ * memory beyond parse->rend
+ */
+ for (lim = parse->rx; lim < parse->rend && *lim >= '0' && *lim <= '9';
+ lim++);
+ if (lim < parse->rend) {
+ l = strtoul(parse->rx, &end, 10);
+ used = end - parse->rx;
+ } else {
+ char *s = strndup(parse->rx, parse->rend - parse->rx);
+ if (s == NULL) {
+ parse->error = REG_ESPACE;
+ return -1;
+ }
+ l = strtoul(s, &end, 10);
+ used = end - s;
+ free(s);
+ }
+
+ parse->rx += used;
if ((l<0) || (l > INT_MAX)) {
- *error = REG_BADBR;
+ parse->error = REG_BADBR;
return -1;
}
return (int) l;
}
-static struct re *parse_repeated_exp(const char **regexp, int *error) {
- struct re *re = parse_simple_exp(regexp, error);
+static struct re *parse_repeated_exp(struct re_parse *parse) {
+ struct re *re = parse_simple_exp(parse);
if (re == NULL)
goto error;
- if (match(regexp, '?')) {
+ if (match(parse, '?')) {
re = make_re_rep(re, 0, 1);
- } else if (match(regexp, '*')) {
+ } else if (match(parse, '*')) {
re = make_re_rep(re, 0, -1);
- } else if (match(regexp, '+')) {
+ } else if (match(parse, '+')) {
re = make_re_rep(re, 1, -1);
- } else if (match(regexp, '{')) {
+ } else if (match(parse, '{')) {
int min, max;
- min = parse_int(regexp, error);
- if (min == -1) {
- *error = REG_BADBR;
+ min = parse_int(parse);
+ if (min == -1)
goto error;
- }
- if (match(regexp, ',')) {
- max = parse_int(regexp, error);
+ if (match(parse, ',')) {
+ max = parse_int(parse);
if (max == -1)
goto error;
- if (! match(regexp, '}')) {
- *error = REG_EBRACE;
+ if (! match(parse, '}')) {
+ parse->error = REG_EBRACE;
goto error;
}
- } else if (match(regexp, '}')) {
+ } else if (match(parse, '}')) {
max = min;
} else {
- *error = REG_EBRACE;
+ parse->error = REG_EBRACE;
goto error;
}
if (min > max) {
- *error = REG_BADBR;
+ parse->error = REG_BADBR;
goto error;
}
re = make_re_rep(re, min, max);
@@ -2780,13 +2820,13 @@ static struct re *parse_repeated_exp(const char **regexp, int *error) {
return NULL;
}
-static struct re *parse_concat_exp(const char **regexp, int *error) {
- struct re *re = parse_repeated_exp(regexp, error);
+static struct re *parse_concat_exp(struct re_parse *parse) {
+ struct re *re = parse_repeated_exp(parse);
if (re == NULL)
goto error;
- if (more(regexp) && ! peek(regexp, ")|")) {
- struct re *re2 = parse_concat_exp(regexp, error);
+ if (more(parse) && ! peek(parse, ")|")) {
+ struct re *re2 = parse_concat_exp(parse);
if (re2 == NULL)
goto error;
return make_re_binop(CONCAT, re, re2);
@@ -2798,13 +2838,13 @@ static struct re *parse_concat_exp(const char **regexp, int *error) {
return NULL;
}
-static struct re *parse_regexp(const char **regexp, int *error) {
- struct re *re = parse_concat_exp(regexp, error);
+static struct re *parse_regexp(struct re_parse *parse) {
+ struct re *re = parse_concat_exp(parse);
if (re == NULL)
goto error;
- if (match(regexp, '|')) {
- struct re *re2 = parse_regexp(regexp, error);
+ if (match(parse, '|')) {
+ struct re *re2 = parse_regexp(parse);
if (re2 == NULL)
goto error;
return make_re_binop(UNION, re, re2);
diff --git a/tests/fatest.c b/tests/fatest.c
index fd0b66b..bf1071d 100644
--- a/tests/fatest.c
+++ b/tests/fatest.c
@@ -94,11 +94,13 @@ static void assertAsRegexp(CuTest *tc, struct fa *fa) {
free(re);
}
-static struct fa *make_fa(CuTest *tc, const char *regexp, int exp_err) {
+static struct fa *make_fa(CuTest *tc,
+ const char *regexp, size_t reglen,
+ int exp_err) {
struct fa *fa;
int r;
- r = fa_compile(regexp, strlen(regexp), &fa);
+ r = fa_compile(regexp, reglen, &fa);
if (exp_err == REG_NOERROR) {
if (r != REG_NOERROR)
print_regerror(r, regexp);
@@ -114,7 +116,7 @@ static struct fa *make_fa(CuTest *tc, const char *regexp, int exp_err) {
}
static struct fa *make_good_fa(CuTest *tc, const char *regexp) {
- return make_fa(tc, regexp, REG_NOERROR);
+ return make_fa(tc, regexp, strlen(regexp), REG_NOERROR);
}
static void dot(struct fa *fa) {
@@ -143,8 +145,10 @@ static void dot(struct fa *fa) {
}
static void testBadRegexps(CuTest *tc) {
- make_fa(tc, "(x", REG_EPAREN);
- make_fa(tc, "a{5,3}", REG_BADBR);
+ const char *const re1 = "(x";
+ const char *const re2 = "a{5,3}";
+ make_fa(tc, re1, strlen(re1), REG_EPAREN);
+ make_fa(tc, re2, strlen(re2), REG_BADBR);
}
/* Stress test, mostly good to check that allocation is clean */
@@ -431,7 +435,8 @@ static void testAsRegexpMinus(CuTest *tc) {
}
static void testRangeEnd(CuTest *tc) {
- make_fa(tc, "[1-0]", REG_ERANGE);
+ const char *const re = "[1-0]";
+ make_fa(tc, re, strlen(re), REG_ERANGE);
}
int main(int argc, char **argv) {
More information about the augeas-devel
mailing list