[augeas-devel] augeas: master - * src/fa.c (fa_as_regexp): convert strings more efficiently

David Lutterkort lutter at fedoraproject.org
Thu Jan 14 18:20:25 UTC 2010


Gitweb:        http://git.fedorahosted.org/git/augeas.git?p=augeas.git;a=commitdiff;h=3a2afb2c05290f2521ee0d320619a3f702d05f13
Commit:        3a2afb2c05290f2521ee0d320619a3f702d05f13
Parent:        ed870909996445b9e8e549be878f31c2039486ab
Author:        David Lutterkort <lutter at redhat.com>
AuthorDate:    Wed Jan 13 17:10:40 2010 -0800
Committer:     David Lutterkort <lutter at redhat.com>
CommitterDate: Thu Jan 14 09:31:30 2010 -0800

* src/fa.c (fa_as_regexp): convert strings more efficiently

Since we are building the transitive closure of the FA, cut down the size
of the FA by first extracting strings and removing their internal states.
---
 src/fa.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 81 insertions(+), 0 deletions(-)

diff --git a/src/fa.c b/src/fa.c
index 55eb318..067c96f 100644
--- a/src/fa.c
+++ b/src/fa.c
@@ -3907,6 +3907,85 @@ static int re_collapse_trans(struct state *s1, struct state *s2,
     return -1;
 }
 
+static int convert_strings(struct fa *fa) {
+    struct state_set *worklist = state_set_init(-1, S_NONE);
+    int result = -1;
+
+    _E(worklist == NULL);
+
+    /* Abuse hash to count indegree, reachable to mark visited states */
+    list_for_each(s, fa->initial) {
+        s->hash = 0;
+        s->reachable = 0;
+    }
+
+    list_for_each(s, fa->initial) {
+        for_each_trans(t, s) {
+            t->to->hash += 1;
+        }
+    }
+
+    for (struct state *s = fa->initial;
+         s != NULL;
+         s = state_set_pop(worklist)) {
+        for (int i=0; i < s->tused; i++) {
+            struct trans *t = s->trans + i;
+            struct state *to = t->to;
+            while (to->hash == 1 && to->tused == 1 && ! to->accept) {
+                if (t->re == NULL) {
+                    t->re = to->trans->re;
+                    to->trans->re = NULL;
+                } else {
+                    t->re = make_re_binop(CONCAT, t->re, to->trans->re);
+                    if (t->re == NULL)
+                        goto error;
+                }
+                t->to = to->trans->to;
+                to->tused = 0;
+                to->hash -= 1;
+                to = t->to;
+                for (int j=0; j < s->tused; j++) {
+                    if (j != i && s->trans[j].to == to) {
+                        /* Combine transitions i and j; remove trans j */
+                        t->re = make_re_binop(UNION, t->re, s->trans[j].re);
+                        if (t->re == NULL)
+                            goto error;
+                        memmove(s->trans + j, s->trans + j + 1,
+                                sizeof(s->trans[j]) * (s->tused - j - 1));
+                        to->hash -= 1;
+                        s->tused -= 1;
+                        if (j < i) {
+                            i = i - 1;
+                            t = s->trans + i;
+                        }
+                    }
+                }
+            }
+
+            if (! to->reachable) {
+                to->reachable = 1;
+                _F(state_set_push(worklist, to));
+            }
+        }
+    }
+
+    for (struct state *s = fa->initial; s->next != NULL; ) {
+        if (s->next->hash == 0 && s->next->tused == 0) {
+            struct state *del = s->next;
+            s->next = del->next;
+            free(del->trans);
+            free(del);
+        } else {
+            s = s->next;
+        }
+    }
+    result = 0;
+
+ error:
+    state_set_free(worklist);
+    return result;
+}
+
 /* Convert an FA to a regular expression.
  * The strategy is the following:
  * (1) For all states S1 and S2, convert the transitions between them
@@ -3970,6 +4049,8 @@ int fa_as_regexp(struct fa *fa, char **regexp, size_t *regexp_len) {
         goto error;
     set_initial(fa, ini);
 
+    convert_strings(fa);
+
     list_for_each(s, fa->initial->next) {
         if (s == fin)
             continue;




More information about the augeas-devel mailing list