[augeas-devel] [PATCH 07/16] Add a typechecker and constructor for recursive lenses

lutter at redhat.com lutter at redhat.com
Wed Dec 23 21:30:56 UTC 2009


From: David Lutterkort <lutter at redhat.com>

  * src/lens.h (lns_make_rec, lns_check_rec): new functions
  * src/lens.c (lns_make_rec, lns_check_rec): new functions
---
 src/lens.c |  877 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/lens.h |   21 ++-
 2 files changed, 894 insertions(+), 4 deletions(-)

diff --git a/src/lens.c b/src/lens.c
index 35a0876..d7c5bbf 100644
--- a/src/lens.c
+++ b/src/lens.c
@@ -27,6 +27,11 @@
 #include "memory.h"
 #include "errcode.h"
 
+/* This enum must be kept in sync with type_offs and ntypes */
+enum lens_type {
+    CTYPE, ATYPE, KTYPE, VTYPE
+};
+
 static const int const type_offs[] = {
     offsetof(struct lens, ctype),
     offsetof(struct lens, atype),
@@ -35,6 +40,9 @@ static const int const type_offs[] = {
 };
 static const int ntypes = sizeof(type_offs)/sizeof(type_offs[0]);
 
+static const char *lens_type_names[] =
+    { "ctype", "atype", "ktype", "vtype" };
+
 #define ltype(lns, t) *((struct regexp **) ((char *) lns + type_offs[t]))
 
 static struct value * typecheck_union(struct info *,
@@ -713,9 +721,7 @@ void free_lens(struct lens *lens) {
         free(lens->children);
         break;
     case L_REC:
-        if (lens->ref != REF_MAX) {
-            /* Break unbounded recursion */
-            lens->ref = REF_MAX;
+        if (!lens->rec_internal) {
             unref(lens->body, lens);
         }
         break;
@@ -982,6 +988,871 @@ int lns_format_atype(struct lens *l, char **buf) {
         assert(0);
         break;
     };
+
+}
+
+/*
+ * Recursive lenses
+ */
+struct value *lns_make_rec(struct info *info) {
+    struct lens *l = make_lens(L_REC, info);
+    l->recursive = 1;
+
+    return make_lens_value(l);
+}
+
+/* Transform a recursive lens into a recursive transition network
+ *
+ * First, we transform the lens into context free grammar, considering any
+ * nonrecursive lens as a terminal
+ *
+ * cfg: lens -> nonterminal -> production list
+ *
+ * cfg(primitive, N) -> N := regexp(primitive)
+ * cfg(l1 . l2, N)   -> N := N1 . N2 + cfg(l1, N1) + cfg(l2, N2)
+ * cfg(l1 | l2, N)   -> N := N1 | N2 + cfg(l1, N1) + cfg(l2, N2)
+ * cfg(l*, N)        -> N := N . N' | eps + cfg(l, N')
+ * cfg([ l ], N)     -> N := N' + cfg(l, N')
+ *
+ * We use the lenses as nonterminals themselves; this also means that our
+ * productions are normalized such that the RHS is either a terminal
+ * (regexp) or entirely consists of nonterminals
+ *
+ * In a few places, we need to know that a nonterminal corresponds to a
+ * subtree combinator ([ l ]); this is the main reason that the rule (cfg[
+ * l ], N) introduces a useless production N := N'.
+ *
+ * Computing the types for a recursive lens r is (fairly) straightforward,
+ * given the above grammar, which we convert to an automaton following
+ * http://arxiv.org/abs/cs/9910022; the only complication arises from the
+ * subtree combinator, since it can be used in recursive lenses to
+ * construct trees of arbitrary depth, but we need to approximate the types
+ * of r in a way that fits with our top-down tree automaton in put.c.
+ *
+ * To handle subtree combinators, remember that the type rules for a lens
+ * m = [ l ] are:
+ *
+ *   m.ktype = NULL
+ *   m.vtype = NULL
+ *   m.ctype = l.ctype
+ *   m.atype = enc(l.ktype, l.vtype)
+ *     ( enc is a function regexp -> regexp -> regexp)
+ *
+ * We compute types for r by modifying its automaton according to
+ * Nederhof's paper and reducing it to a regular expression of lenses. This
+ * has to happen in the following steps:
+ *   r.ktype : approximate by using [ .. ].ktype = NULL
+ *   r.vtype : same as r.ktype
+ *   r.ctype : approximate by treating [ l ] as l
+ *   r.atype : approximate by using r.ktype and r.vtype from above
+ *             in lens expressions [ f(r) ]
+ */
+
+/* Transitions go to a state and are labeled with a lens. For epsilon
+ * transitions, lens may be NULL. When lens is a simple (nonrecursive
+ * lens), PROD will be NULL. When we modify the automaton to splice
+ * nonterminals in, we remember the production for the nonterminal in PROD.
+ */
+struct trans {
+    struct state  *to;
+    struct lens   *lens;
+    struct regexp *re;
+};
+
+struct state {
+    struct state  *next;   /* Linked list for memory management */
+    size_t         ntrans;
+    struct trans  *trans;
+};
+
+/* Productions for lens LENS. Start state START and end state END. If we
+   start with START, END is the only accepting state. */
+struct prod {
+    struct lens  *lens;
+    struct state *start;
+    struct state *end;
+};
+
+/* A recursive transition network used to compute regular approximations
+ * to the types */
+struct rtn {
+    struct info *info;
+    size_t        nprod;
+    struct prod **prod;
+    struct state *states;  /* Linked list through next of all states in all
+                              prods; the states for each production are on
+                              the part of the list from prod->start to
+                              prod->end */
+    struct value *exn;
+    enum lens_type lens_type;
+    unsigned int check : 1;
+};
+
+#define RTN_BAIL(rtn) if ((rtn)->exn != NULL ||                     \
+                          (rtn)->info->error->code != AUG_NOERROR)  \
+                         goto error;
+
+static void free_prod(struct prod *prod) {
+    if (prod == NULL)
+        return;
+    unref(prod->lens, lens);
+    free(prod);
+}
+
+static void free_rtn(struct rtn *rtn) {
+    if (rtn == NULL)
+        return;
+    for (int i=0; i < rtn->nprod; i++)
+        free_prod(rtn->prod[i]);
+    free(rtn->prod);
+    list_for_each(s, rtn->states) {
+        for (int i=0; i < s->ntrans; i++) {
+            unref(s->trans[i].lens, lens);
+            unref(s->trans[i].re, regexp);
+        }
+        free(s->trans);
+    }
+    list_free(rtn->states);
+    unref(rtn->info, info);
+    unref(rtn->exn, value);
+    free(rtn);
+}
+
+static struct state *add_state(struct prod *prod) {
+    struct state *result = NULL;
+    int r;
+
+    r = ALLOC(result);
+    ERR_NOMEM(r < 0, prod->lens->info);
+
+    list_cons(prod->start->next, result);
+ error:
+    return result;
+}
+
+static struct trans *add_trans(struct rtn *rtn, struct state *state,
+                               struct state *to, struct lens *l) {
+    int r;
+    struct trans *result = NULL;
+
+    for (int i=0; i < state->ntrans; i++)
+        if (state->trans[i].to == to && state->trans[i].lens == l)
+            return state->trans + i;
+
+    r = REALLOC_N(state->trans, state->ntrans+1);
+    ERR_NOMEM(r < 0, rtn->info);
+
+    result = state->trans + state->ntrans;
+    state->ntrans += 1;
+
+    MEMZERO(result, 1);
+    result->to = to;
+    if (l != NULL) {
+        result->lens = ref(l);
+        result->re = ref(ltype(l, rtn->lens_type));
+    }
+ error:
+    return result;
+}
+
+static struct prod *make_prod(struct rtn *rtn, struct lens *l) {
+    struct prod *result = NULL;
+    int r;
+
+    r = ALLOC(result);
+    ERR_NOMEM(r < 0, l->info);
+
+    result->lens = ref(l);
+    r = ALLOC(result->start);
+    ERR_NOMEM(r < 0, l->info);
+
+    result->end = add_state(result);
+    ERR_BAIL(l->info);
+
+    result->end->next = rtn->states;
+    rtn->states = result->start;
+
+    return result;
+ error:
+    free_prod(result);
+    return NULL;
+}
+
+static struct prod *prod_for_lens(struct rtn *rtn, struct lens *l) {
+    if (l == NULL)
+        return NULL;
+    for (int i=0; i < rtn->nprod; i++) {
+        if (rtn->prod[i]->lens == l)
+            return rtn->prod[i];
+    }
+    return NULL;
+}
+
+static void rtn_dot(struct rtn *rtn, const char *stage) {
+    FILE *fp;
+
+    fp = debug_fopen("rtn_%s_%s.dot", stage, lens_type_names[rtn->lens_type]);
+    if (fp == NULL)
+        return;
+
+    fprintf(fp, "digraph \"l1\" {\n  rankdir=LR;\n");
+    list_for_each(s, rtn->states) {
+        char *label = NULL;
+        for (int p=0; p < rtn->nprod; p++) {
+            if (s == rtn->prod[p]->start) {
+                asprintf(&label, "s%d", p);
+            } else if (s == rtn->prod[p]->end) {
+                asprintf(&label, "e%d", p);
+            }
+        }
+        if (label == NULL)
+            asprintf(&label, "%p", s);
+        fprintf(fp, "  n%p [label = \"%s\"];\n", s, label == NULL ? "" : label);
+        FREE(label);
+        for (int i=0; i < s->ntrans; i++) {
+            fprintf(fp, "  n%p -> n%p", s, s->trans[i].to);
+            if (s->trans[i].re != NULL) {
+                label = regexp_escape(s->trans[i].re);
+                for (char *t = label; *t; t++)
+                    if (*t == '\\')
+                        *t = '~';
+                fprintf(fp, " [ label = \"%s\" ]", label);
+                FREE(label);
+            }
+            fprintf(fp, ";\n");
+        }
+    }
+    fprintf(fp, "}\n");
+    fclose(fp);
+}
+
+/* Add transitions to RTN corresponding to cfg(l, N) */
+static void rtn_rules(struct rtn *rtn, struct lens *l) {
+    if (! l->recursive)
+        return;
+
+    struct prod *prod = prod_for_lens(rtn, l);
+    if (prod != NULL)
+        return;
+
+    int r = REALLOC_N(rtn->prod, rtn->nprod+1);
+    ERR_NOMEM(r < 0, l->info);
+
+    prod =  make_prod(rtn, l);
+    rtn->prod[rtn->nprod] = prod;
+    RTN_BAIL(rtn);
+    rtn->nprod += 1;
+
+    struct state *start = prod->start;
+
+    switch (l->tag) {
+    case L_UNION:
+        /* cfg(l1|..|ln, N) -> N := N1 | N2 | ... | Nn */
+        for (int i=0; i < l->nchildren; i++) {
+            add_trans(rtn, start, prod->end, l->children[i]);
+            RTN_BAIL(rtn);
+            rtn_rules(rtn, l->children[i]);
+            RTN_BAIL(rtn);
+        }
+        break;
+    case L_CONCAT:
+        /* cfg(l1 . l2 ... ln, N) -> N := N1 . N2 ... Nn */
+        for (int i=0; i < l->nchildren-1; i++) {
+            struct state *s = add_state(prod);
+            RTN_BAIL(rtn);
+            add_trans(rtn, start, s, l->children[i]);
+            RTN_BAIL(rtn);
+            start = s;
+            rtn_rules(rtn, l->children[i]);
+            RTN_BAIL(rtn);
+        }
+        {
+            struct lens *c = l->children[l->nchildren - 1];
+            add_trans(rtn, start, prod->end, c);
+            RTN_BAIL(rtn);
+            rtn_rules(rtn, c);
+            RTN_BAIL(rtn);
+        }
+        break;
+    case L_STAR: {
+        /* cfg(l*, N) -> N := N . N' | eps */
+        struct state *s = add_state(prod);
+        RTN_BAIL(rtn);
+        add_trans(rtn, start, s, l);
+        RTN_BAIL(rtn);
+        add_trans(rtn, s, prod->end, l->child);
+        RTN_BAIL(rtn);
+        add_trans(rtn, start, prod->end, NULL);
+        RTN_BAIL(rtn);
+        rtn_rules(rtn, l->child);
+        RTN_BAIL(rtn);
+        break;
+    }
+    case L_SUBTREE:
+        switch (rtn->lens_type) {
+        case KTYPE:
+        case VTYPE:
+            /* cfg([ l ], N) -> N := eps */
+            add_trans(rtn, start, prod->end, NULL);
+            break;
+        case CTYPE:
+            /* cfg([ l ], N) -> N := N' plus cfg(l, N') */
+            add_trans(rtn, start, prod->end, l->child);
+            RTN_BAIL(rtn);
+            rtn_rules(rtn, l->child);
+            RTN_BAIL(rtn);
+            break;
+        case ATYPE: {
+            /* At this point, we have propagated ktype and vtype */
+            /* cfg([ l ], N) -> N := enc(l->ktype, l->vtype) */
+            struct trans *t = add_trans(rtn, start, prod->end, NULL);
+            RTN_BAIL(rtn);
+            t->re = subtree_atype(l->info, l->child->ktype, l->child->vtype);
+            break;
+        }
+        default:
+            assert(0);
+        }
+        break;
+    case L_MAYBE:
+        /* cfg(l?, N) -> N := N' | eps plus cfg(l, N') */
+        add_trans(rtn, start, prod->end, l->child);
+        RTN_BAIL(rtn);
+        add_trans(rtn, start, prod->end, NULL);
+        RTN_BAIL(rtn);
+        rtn_rules(rtn, l->child);
+        RTN_BAIL(rtn);
+        break;
+    case L_REC:
+        /* cfg(l, N) -> N := N' plus cfg(l->body, N') */
+        add_trans(rtn, start, prod->end, l->body);
+        RTN_BAIL(rtn);
+        rtn_rules(rtn, l->body);
+        RTN_BAIL(rtn);
+        break;
+    default:
+        assert(0);
+    }
+ error:
+    return;
+}
+
+/* Replace transition t with two epsilon transitions s => p->start and
+ * p->end => s->trans[i].to where s is the start of t. Instead of adding
+ * epsilon transitions, we expand the epsilon transitions.
+ */
+static void prod_splice(struct rtn *rtn,
+                        struct prod *from, struct prod *to, struct trans *t) {
+
+    add_trans(rtn, to->end, t->to, NULL);
+    ERR_BAIL(from->lens->info);
+    t->to = to->start;
+    unref(t->re, regexp);
+
+ error:
+    return;
+}
+
+static void rtn_splice(struct rtn *rtn, struct prod *prod) {
+    for (struct state *s = prod->start; s != prod->end; s = s->next) {
+        for (int i=0; i < s->ntrans; i++) {
+            struct prod *p = prod_for_lens(rtn, s->trans[i].lens);
+            if (p != NULL) {
+                prod_splice(rtn, prod, p, s->trans+i);
+                RTN_BAIL(rtn);
+            }
+        }
+    }
+ error:
+    return;
+}
+
+static struct rtn *rtn_build(struct lens *rec, enum lens_type lt) {
+    int r;
+    struct rtn *rtn;
+
+    r = ALLOC(rtn);
+    ERR_NOMEM(r < 0, rec->info);
+
+    rtn->info = ref(rec->info);
+    rtn->lens_type = lt;
+
+    rtn_rules(rtn, rec);
+    RTN_BAIL(rtn);
+    if (debugging("cf.approx"))
+        rtn_dot(rtn, "10-rules");
+
+    for (int i=0; i < rtn->nprod; i++) {
+        rtn_splice(rtn, rtn->prod[i]);
+        RTN_BAIL(rtn);
+    }
+    if (debugging("cf.approx"))
+        rtn_dot(rtn, "11-splice");
+
+ error:
+    return rtn;
+}
+
+/* Compare transitions lexicographically by (to, lens) */
+static int trans_to_cmp(const void *v1, const void *v2) {
+    const struct trans *t1 = v1;
+    const struct trans *t2 = v2;
+
+    if (t1->to != t2->to)
+        return (t1->to < t2->to) ? -1 : 1;
+
+    if (t1->lens == t2->lens)
+        return 0;
+    return (t1->lens < t2->lens) ? -1 : 1;
+}
+
+/* Collapse a transition S1 -> S -> S2 by adding a transition S1 -> S2 with
+ * lens R1 . (LOOP)* . R2 | R3 where R3 is the regexp on the possibly
+ * existing transition S1 -> S2. If LOOP is NULL or R3 does not exist,
+ * label the transition with a simplified regexp by treating NULL as
+ * epsilon */
+static void collapse_trans(struct rtn *rtn,
+                           struct state *s1, struct state *s2,
+                           struct regexp *r1, struct regexp *loop,
+                           struct regexp *r2) {
+
+    struct trans *t = NULL;
+    struct regexp *r = NULL;
+
+    for (int i=0; i < s1->ntrans; i++) {
+        if (s1->trans[i].to == s2) {
+            t = s1->trans + i;
+            break;
+        }
+    }
+
+    /* Set R = R1 . (LOOP)* . R2, treating NULL's as epsilon */
+    if (loop == NULL) {
+        if (r1 == NULL)
+            r = ref(r2);
+        else if (r2 == NULL)
+            r = ref(r1);
+        else
+            r = regexp_concat(rtn->info, r1, r2);
+    } else {
+        struct regexp *s = regexp_iter(rtn->info, loop, 0, -1);
+        ERR_NOMEM(s == NULL, rtn->info);
+        struct regexp *c = NULL;
+        if (r1 == NULL) {
+            c = s;
+            s = NULL;
+        } else {
+            c = regexp_concat(rtn->info, r1, s);
+            unref(s, regexp);
+            ERR_NOMEM(c == NULL, rtn->info);
+        }
+        if (r2 == NULL) {
+            r = c;
+            c = NULL;
+        } else {
+            r = regexp_concat(rtn->info, c, r2);
+            unref(c, regexp);
+            ERR_NOMEM(r == NULL, rtn->info);
+        }
+    }
+
+    if (t == NULL) {
+        t = add_trans(rtn, s1, s2, NULL);
+        ERR_NOMEM(t == NULL, rtn->info);
+        t->re = r;
+    } else if (t->re == NULL) {
+        if (r == NULL || regexp_matches_empty(r))
+            t->re = r;
+        else {
+            t->re = regexp_maybe(rtn->info, r);
+            unref(r, regexp);
+            ERR_NOMEM(t->re == NULL, rtn->info);
+        }
+    } else if (r == NULL) {
+        if (!regexp_matches_empty(t->re)) {
+            r = regexp_maybe(rtn->info, t->re);
+            unref(t->re, regexp);
+            t->re = r;
+            ERR_NOMEM(r == NULL, rtn->info);
+        }
+    } else {
+        struct regexp *u = regexp_union(rtn->info, r, t->re);
+        unref(r, regexp);
+        unref(t->re, regexp);
+        t->re = u;
+        ERR_NOMEM(u == NULL, rtn->info);
+    }
+
+    return;
+ error:
+    rtn->exn = exn_error();
+    return;
+}
+
+/* Reduce the automaton with start state rprod->start and only accepting
+ * state rprod->end so that we have a single transition rprod->start =>
+ * rprod->end labelled with the overall approximating regexp for the
+ * automaton.
+ *
+ * This is the same algorithm as fa_as_regexp in fa.c
+ */
+static struct regexp *rtn_reduce(struct rtn *rtn, struct lens *rec) {
+    struct prod *prod = prod_for_lens(rtn, rec);
+    int r;
+
+    ERR_THROW(prod == NULL, rtn->info, AUG_EINTERNAL,
+              "No production for recursive lens");
+
+    /* Eliminate epsilon transitions and turn transitions between the same
+     * two states into a regexp union */
+    list_for_each(s, rtn->states) {
+        qsort(s->trans, s->ntrans, sizeof(*s->trans), trans_to_cmp);
+        for (int i=0; i < s->ntrans; i++) {
+            int j = i+1;
+            for (;j < s->ntrans && s->trans[i].to == s->trans[j].to;
+                 j++);
+            if (j > i+1) {
+                struct regexp *u, **v;
+                r = ALLOC_N(v, j - i);
+                ERR_NOMEM(r < 0, rtn->info);
+                for (int k=i; k < j; k++)
+                    v[k-i] = s->trans[k].re;
+                u = regexp_union_n(rtn->info, j - i, v);
+                if (u == NULL) {
+                    // FIXME: The calling convention for regexp_union_n
+                    // is bad, since we can't distinguish between alloc
+                    // failure and unioning all NULL's
+                    for (int k=0; k < j-i; k++)
+                        if (v[k] != NULL) {
+                            FREE(v);
+                            ERR_NOMEM(true, rtn->info);
+                        }
+                }
+                FREE(v);
+                for (int k=i; k < j; k++) {
+                    unref(s->trans[k].lens, lens);
+                    unref(s->trans[k].re, regexp);
+                }
+                s->trans[i].re = u;
+                MEMMOVE(s->trans + (i+1),
+                        s->trans + j,
+                        s->ntrans - j);
+                s->ntrans -= j - (i + 1);
+            }
+        }
+    }
+
+    /* Introduce new start and end states with epsilon transitions to/from
+     * the old start and end states */
+    struct state *end = NULL;
+    struct state *start = NULL;
+    if (ALLOC(start) < 0 || ALLOC(end) < 0) {
+        FREE(start);
+        FREE(end);
+        ERR_NOMEM(true, rtn->info);
+    }
+    list_insert_before(start, prod->start, rtn->states);
+    end->next = prod->end->next;
+    prod->end->next = end;
+
+    add_trans(rtn, start, prod->start, NULL);
+    RTN_BAIL(rtn);
+    add_trans(rtn, prod->end, end, NULL);
+    RTN_BAIL(rtn);
+
+    prod->start = start;
+    prod->end = end;
+
+    /* Eliminate states S (except for INI and FIN) one by one:
+     *     Let LOOP the regexp for the transition S -> S if it exists, epsilon
+     *     otherwise.
+     *     For all S1, S2 different from S with S1 -> S -> S2
+     *       Let R1 the regexp of S1 -> S
+     *           R2 the regexp of S -> S2
+     *           R3 the regexp of S1 -> S2 (or the regexp matching nothing
+     *                                      if no such transition)
+     *        set the regexp on the transition S1 -> S2 to
+     *          R1 . (LOOP)* . R2 | R3 */
+    // FIXME: This does not go over all states
+    list_for_each(s, rtn->states) {
+        if (s == prod->end || s == prod->start)
+            continue;
+        struct regexp *loop = NULL;
+        for (int i=0; i < s->ntrans; i++) {
+            if (s == s->trans[i].to) {
+                assert(loop == NULL);
+                loop = s->trans[i].re;
+            }
+        }
+        list_for_each(s1, rtn->states) {
+            if (s == s1)
+                continue;
+            for (int t1=0; t1 < s1->ntrans; t1++) {
+                if (s == s1->trans[t1].to) {
+                    for (int t2=0; t2 < s->ntrans; t2++) {
+                        struct state *s2 = s->trans[t2].to;
+                        if (s2 == s)
+                            continue;
+                        collapse_trans(rtn, s1, s2,
+                                       s1->trans[t1].re, loop,
+                                       s->trans[t2].re);
+                        RTN_BAIL(rtn);
+                    }
+                }
+            }
+        }
+    }
+
+    /* Find the overall regexp */
+    struct regexp *result = NULL;
+    for (int i=0; i < prod->start->ntrans; i++) {
+        if (prod->start->trans[i].to == prod->end) {
+            assert(result == NULL);
+            result = ref(prod->start->trans[i].re);
+        }
+    }
+    return result;
+ error:
+    return NULL;
+}
+
+static void propagate_type(struct lens *l, enum lens_type lt) {
+    struct regexp **types = NULL;
+    int r;
+
+    if (! l->recursive || ltype(l, lt) != NULL)
+        return;
+
+    switch(l->tag) {
+    case L_CONCAT:
+        r = ALLOC_N(types, l->nchildren);
+        ERR_NOMEM(r < 0, l->info);
+        for (int i=0; i < l->nchildren; i++) {
+            propagate_type(l->children[i], lt);
+            types[i] = ltype(l->children[i], lt);
+        }
+        ltype(l, lt) = regexp_concat_n(l->info, l->nchildren, types);
+        FREE(types);
+        break;
+    case L_UNION:
+        r = ALLOC_N(types, l->nchildren);
+        ERR_NOMEM(r < 0, l->info);
+        for (int i=0; i < l->nchildren; i++) {
+            propagate_type(l->children[i], lt);
+            types[i] = ltype(l->children[i], lt);
+        }
+        ltype(l, lt) = regexp_union_n(l->info, l->nchildren, types);
+        FREE(types);
+        break;
+    case L_SUBTREE:
+        propagate_type(l->child, lt);
+        if (lt == ATYPE)
+            l->atype = subtree_atype(l->info, l->child->ktype, l->child->vtype);
+        if (lt == CTYPE)
+            l->ctype = ref(l->child->ctype);
+        break;
+    case L_STAR:
+        propagate_type(l->child, lt);
+        ltype(l, lt) = regexp_iter(l->info, ltype(l->child, lt), 0, -1);
+        break;
+    case L_MAYBE:
+        propagate_type(l->child, lt);
+        ltype(l, lt) = regexp_maybe(l->info, ltype(l->child, lt));
+        break;
+    case L_REC:
+        /* Nothing to do */
+        break;
+    default:
+        assert(0);
+    }
+
+ error:
+    FREE(types);
+}
+
+static struct value *typecheck(struct lens *l, int check);
+
+typedef struct value *typecheck_n_make(struct info *,
+                                       struct lens *, struct lens *, int);
+
+static struct info *merge_info(struct info *i1, struct info *i2) {
+    struct info *info;
+    make_ref(info);
+    ERR_NOMEM(info == NULL, i1);
+
+    info->filename = ref(i1->filename);
+    info->first_line = i1->first_line;
+    info->first_column = i1->first_column;
+    info->last_line    = i2->last_line;
+    info->last_column  = i2->last_column;
+    info->error        = i1->error;
+    return info;
+
+ error:
+    unref(info, info);
+    return NULL;
+}
+
+static struct value *typecheck_n(struct lens *l,
+                                 typecheck_n_make *make, int check) {
+    struct value *exn = NULL;
+    struct lens *acc = NULL;
+
+    assert(l->tag == L_CONCAT || l->tag == L_UNION);
+    for (int i=0; i < l->nchildren; i++) {
+        exn = typecheck(l->children[i], check);
+        if (exn != NULL)
+            goto error;
+    }
+    acc = ref(l->children[0]);
+    for (int i=1; i < l->nchildren; i++) {
+        struct info *info = merge_info(acc->info, l->children[i]->info);
+        ERR_BAIL(acc->info);
+        exn = (*make)(info, acc, ref(l->children[i]), check);
+        if (EXN(exn))
+            goto error;
+        assert(exn->tag == V_LENS);
+        acc = ref(exn->lens);
+        unref(exn, value);
+    }
+    l->value = acc->value;
+    l->key = acc->key;
+ error:
+    unref(acc, lens);
+    return exn;
+}
+
+static struct value *typecheck(struct lens *l, int check) {
+    struct value *exn = NULL;
+
+    /* Nonrecursive lenses are typechecked at build time */
+    if (! l->recursive)
+        return NULL;
+
+    switch(l->tag) {
+    case L_CONCAT:
+        exn = typecheck_n(l, lns_make_concat, check);
+        break;
+    case L_UNION:
+        exn = typecheck_n(l, lns_make_union, check);
+        break;
+    case L_SUBTREE:
+        exn = typecheck(l->child, check);
+        break;
+    case L_STAR:
+        if (check)
+            exn = typecheck_iter(l->info, l->child);
+        if (exn == NULL && l->value)
+            exn = make_exn_value(l->info, "Multiple stores in iteration");
+        if (exn == NULL && l->key)
+            exn = make_exn_value(l->info, "Multiple keys/labels in iteration");
+        break;
+    case L_MAYBE:
+        if (check)
+            exn = typecheck_maybe(l->info, l->child);
+        l->key = l->child->key;
+        l->value = l->child->value;
+        break;
+    case L_REC:
+        /* Nothing to do */
+        break;
+    default:
+        assert(0);
+    }
+
+    return exn;
+}
+
+static struct value *rtn_approx(struct lens *rec, enum lens_type lt) {
+    struct rtn *rtn = NULL;
+    struct value *result = NULL;
+
+    rtn = rtn_build(rec, lt);
+    RTN_BAIL(rtn);
+    ltype(rec, lt) = rtn_reduce(rtn, rec);
+    RTN_BAIL(rtn);
+    if (debugging("cf.approx"))
+        rtn_dot(rtn, "50-reduce");
+
+    propagate_type(rec->body, lt);
+    ERR_BAIL(rec->info);
+
+ done:
+    free_rtn(rtn);
+
+    if (debugging("cf.approx")) {
+        printf("approx %s  => ", lens_type_names[lt]);
+        print_regexp(stdout, ltype(rec, lt));
+        printf("\n");
+    }
+
+    return result;
+ error:
+    if (rtn->exn == NULL)
+        result = exn_error();
+    else
+        result = ref(rtn->exn);
+    goto done;
+}
+
+struct value *lns_check_rec(struct info *info,
+                            struct lens *body, struct lens *rec,
+                            int check) {
+    /* The types in the order of approximation */
+    static const enum lens_type types[] = { KTYPE, VTYPE, ATYPE };
+
+    assert(rec->tag == L_REC);
+    assert(body->recursive);
+    struct value *result = NULL;
+
+    /* To help memory management, we avoid the cycle inherent ina recursive
+     * lens by using two instances of an L_REC lens. One is marked with
+     * rec_internal, and used inside the body of the lens. The other is the
+     * "toplevel" which receives external references.
+     *
+     * The internal instance of the recursive lens is REC, the external one
+     * is TOP, constructed below
+     */
+    rec->rec_internal = 1;
+    rec->body = body;                          /* REC does not own BODY */
+
+    for (int i=0; i < ARRAY_CARDINALITY(types); i++) {
+        result = rtn_approx(rec, types[i]);
+        ERR_BAIL(info);
+    }
+
+    if (rec->atype == NULL) {
+        result = make_exn_value(rec->info,
+        "recursive lens generates the empty language for its %s",
+         rec->ctype == NULL ? "ctype" : "atype");
+        goto error;
+    }
+
+    rec->key = rec->body->key;
+    rec->value = rec->body->value;
+    rec->consumes_value = rec->body->consumes_value;
+
+    result = typecheck(rec->body, check);
+    if (result != NULL)
+        goto error;
+
+    result = lns_make_rec(ref(rec->info));
+    struct lens *top = result->lens;
+    for (int t=0; t < ntypes; t++)
+        ltype(top, t) = ref(ltype(rec, t));
+    top->value = rec->value;
+    top->key = rec->key;
+    top->consumes_value = rec->consumes_value;
+    top->body = ref(body);
+    top->alias = rec;
+    rec->alias = top;
+    ERR_BAIL(info);
+
+    return result;
+ error:
+    if (result == NULL)
+        result = exn_error();
+    return result;
 }
 
 /*
diff --git a/src/lens.h b/src/lens.h
index 0641fb3..c5d4a55 100644
--- a/src/lens.h
+++ b/src/lens.h
@@ -76,6 +76,8 @@ struct lens {
     unsigned int              key : 1;
     unsigned int              recursive : 1;
     unsigned int              consumes_value : 1;
+    /* Flag to help avoid cycles in recursive lenses */
+    unsigned int              rec_internal : 1;
     union {
         /* Primitive lenses */
         struct {                   /* L_DEL uses both */
@@ -88,7 +90,18 @@ struct lens {
             unsigned int nchildren;
             struct lens **children;
         };
-        struct lens *body;          /* L_REC */
+        struct {
+            struct lens *body;      /* L_REC */
+            /* We represent a recursive lens as two instances of struct
+             * lens with L_REC. One has rec_internal set to 1, the other
+             * has it set to 0. The one with rec_internal is used within
+             * the body, the other is what is used from the 'outside'. This
+             * is necessary to break the cycles inherent in recursive
+             * lenses with reference counting. The link through alias is
+             * set up in lns_check_rec, and not reference counted.
+             */
+            struct lens *alias;
+        };
     };
 };
 
@@ -119,6 +132,12 @@ char *format_lens(struct lens *l);
  * the caller */
 int lns_format_atype(struct lens *, char **buf);
 
+/* Recursive lenses */
+struct value *lns_make_rec(struct info *info);
+struct value *lns_check_rec(struct info *info,
+                            struct lens *body, struct lens *rec,
+                            int check);
+
 /* Auxiliary data structures used during get/put/create */
 struct skel {
     struct skel *next;
-- 
1.6.5.2




More information about the augeas-devel mailing list