[augeas-devel] [PATCH 1/3] libfa (fa_enumerate): new function

lutter at redhat.com lutter at redhat.com
Tue Oct 23 23:25:27 UTC 2012


From: David Lutterkort <lutter at redhat.com>

Function to enumerate FA's with a finite language, given a preset limit of
words.
---
 src/fa.c           |   75 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/fa.h           |   10 +++++++
 src/fa_sym.version |    4 +++
 tests/fatest.c     |   35 ++++++++++++++++++++++++
 4 files changed, 124 insertions(+), 0 deletions(-)

diff --git a/src/fa.c b/src/fa.c
index 8a385de..eb2d927 100644
--- a/src/fa.c
+++ b/src/fa.c
@@ -81,6 +81,7 @@ struct state {
     unsigned int  accept : 1;
     unsigned int  live : 1;
     unsigned int  reachable : 1;
+    unsigned int  visited : 1;   /* Used in various places to track progress */
     /* Array of transitions. The TUSED first entries are used, the array
        has allocated room for TSIZE */
     size_t        tused;
@@ -2659,6 +2660,80 @@ int fa_example(struct fa *fa, char **example, size_t *example_len) {
     return -1;
 }
 
+struct enum_intl {
+    int       limit;
+    int       nwords;
+    char    **words;
+    char     *buf;
+    size_t    bsize;
+};
+
+static int fa_enumerate_intl(struct state *s, struct enum_intl *ei, int pos) {
+    int result = -1;
+
+    if (ei->bsize <= pos + 1) {
+        ei->bsize *= 2;
+        F(REALLOC_N(ei->buf, ei->bsize));
+    }
+
+    ei->buf[pos] = '\0';
+    for_each_trans(t, s) {
+        if (t->to->visited)
+            return -2;
+        t->to->visited = 1;
+        for (int i=t->min; i <= t->max; i++) {
+            ei->buf[pos] = i;
+            if (t->to->accept) {
+                if (ei->nwords >= ei->limit)
+                    return -2;
+                ei->words[ei->nwords] = strdup(ei->buf);
+                E(ei->words[ei->nwords] == NULL);
+                ei->nwords += 1;
+            }
+            result = fa_enumerate_intl(t->to, ei, pos+1);
+            E(result < 0);
+        }
+        t->to->visited = 0;
+    }
+    ei->buf[pos] = '\0';
+    result = 0;
+ error:
+    return result;
+}
+
+int fa_enumerate(struct fa *fa, int limit, char ***words) {
+    struct enum_intl ei;
+    int result = -1;
+
+    *words = NULL;
+    MEMZERO(&ei, 1);
+    ei.bsize = 8;                    /* Arbitrary initial size */
+    ei.limit = limit;
+    F(ALLOC_N(ei.words, limit));
+    F(ALLOC_N(ei.buf, ei.bsize));
+
+    /* We use the visited bit to track which states we already visited
+     * during the construction of a word to detect loops */
+    list_for_each(s, fa->initial)
+        s->visited = 0;
+    fa->initial->visited = 1;
+    result = fa_enumerate_intl(fa->initial, &ei, 0);
+    E(result < 0);
+
+    result = ei.nwords;
+    *words = ei.words;
+    ei.words = NULL;
+ done:
+    free(ei.buf);
+    return result;
+
+ error:
+    for (int i=0; i < ei.nwords; i++)
+        free(ei.words[i]);
+    free(ei.words);
+    goto done;
+}
+
 /* Expand the automaton FA by replacing every transition s(c) -> p from
  * state s to p on character c by two transitions s(X) -> r, r(c) -> p via
  * a new state r.
diff --git a/src/fa.h b/src/fa.h
index a061872..0413c9d 100644
--- a/src/fa.h
+++ b/src/fa.h
@@ -254,6 +254,16 @@ int fa_is_nocase(struct fa *fa);
  */
 int fa_expand_nocase(const char *regexp, size_t regexp_len,
                      char **newregexp, size_t *newregexp_len);
+
+/* Generate up to LIMIT words from the language of FA, which is assumed to
+ * be finite. The words are returned in WORDS, which is allocated by this
+ * function and must be freed by the caller.
+ *
+ * Return the number of generated words on success, -1 if we run out of
+ * memory, and -2 if FA has more than LIMIT words.
+ */
+int fa_enumerate(struct fa *fa, int limit, char ***words);
+
 #endif
 
 
diff --git a/src/fa_sym.version b/src/fa_sym.version
index 5d513e9..8d04cbf 100644
--- a/src/fa_sym.version
+++ b/src/fa_sym.version
@@ -29,3 +29,7 @@ FA_1.2.0 {
       fa_is_nocase;
       fa_expand_nocase;
 } FA_1.0.0;
+
+FA_1.4.0 {
+      fa_enumerate;
+} FA_1.2.0;
diff --git a/tests/fatest.c b/tests/fatest.c
index 79cd94c..c02a577 100644
--- a/tests/fatest.c
+++ b/tests/fatest.c
@@ -599,6 +599,40 @@ static void testNoCaseComplement(CuTest *tc) {
     CuAssertIntEquals(tc, 1, fa_is_basic(isect, FA_EMPTY));
 }
 
+static void testEnumerate(CuTest *tc) {
+    struct fa *fa1 = make_good_fa(tc, "[ab](cc|dd)");
+    static const char *const fa1_expected[] =
+        { "acc", "add", "bcc", "bdd" };
+    struct fa *fa_inf = make_good_fa(tc, "a(b*|d)c");
+    char **words;
+    int r;
+
+    r = fa_enumerate(fa1, 2, &words);
+    CuAssertIntEquals(tc, -2, r);
+    CuAssertPtrEquals(tc, NULL, words);
+
+    r = fa_enumerate(fa1, 10, &words);
+    CuAssertIntEquals(tc, 4, r);
+    CuAssertPtrNotNull(tc, words);
+
+    for (int i=0; i < r; i++) {
+        int found = 0;
+        for (int j=0; j < ARRAY_CARDINALITY(fa1_expected); j++) {
+            if (STREQ(words[i], fa1_expected[j]))
+                found = 1;
+        }
+        if (!found) {
+            char *msg;
+            asprintf(&msg, "Generated word %s not expected", words[i]);
+            CuFail(tc, msg);
+        }
+    }
+
+    r = fa_enumerate(fa_inf, 100, &words);
+    CuAssertIntEquals(tc, -2, r);
+    CuAssertPtrEquals(tc, NULL, words);
+}
+
 int main(int argc, char **argv) {
     if (argc == 1) {
         char *output = NULL;
@@ -624,6 +658,7 @@ int main(int argc, char **argv) {
         SUITE_ADD_TEST(suite, testNoCase);
         SUITE_ADD_TEST(suite, testExpandNoCase);
         SUITE_ADD_TEST(suite, testNoCaseComplement);
+        SUITE_ADD_TEST(suite, testEnumerate);
 
         CuSuiteRun(suite);
         CuSuiteSummary(suite, &output);
-- 
1.7.7.6




More information about the augeas-devel mailing list