[augeas-devel] [PATCH 04/11] Expand the grammar for path expressions

David Lutterkort lutter at redhat.com
Mon Jan 26 05:41:17 UTC 2009


* add predicates based on labels and values
* allow path expressions in predicates, e.g. '/foo/bar[../baz = "7"]'
* add explicit axes and some abbreviations thereof

This requires a rewrite of the interpreter of path expressions; the
interpreter is now based on a more explicit representation of the abstract
syntax of the path expression and is statically type checked.

Also add tests to check path expression evaluation
---
 .gitignore         |    1 +
 src/internal.h     |   16 +
 src/pathx.c        | 1685 ++++++++++++++++++++++++++++++++++++++++++++++------
 tests/Makefile.am  |    7 +-
 tests/test-xpath.c |  239 ++++++++
 tests/xpath.tests  |   99 +++
 6 files changed, 1851 insertions(+), 196 deletions(-)
 create mode 100644 tests/test-xpath.c
 create mode 100644 tests/xpath.tests

diff --git a/.gitignore b/.gitignore
index caabf4c..35466a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,7 @@ src/lexer.[ch]
 src/augtool
 src/augparse
 tests/fatest
+tests/test-xpath
 *.aux
 *.dvi
 *.log
diff --git a/src/internal.h b/src/internal.h
index 1c2c5b5..c201970 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -350,8 +350,24 @@ int close_memstream(struct memstream *ms);
  * Path expressions
  */
 
+typedef enum {
+    PATHX_NOERROR = 0,
+    PATHX_ENAME,
+    PATHX_ESTRING,
+    PATHX_ENUMBER,
+    PATHX_EDELIM,
+    PATHX_ENOEQUAL,
+    PATHX_ENOMEM,
+    PATHX_EPRED,
+    PATHX_ESLASH,
+    PATHX_EINTERNAL,
+    PATHX_ETYPE
+} pathx_errcode_t;
+
 struct pathx;
 
+const char *pathx_error(struct pathx *pathx, const char **txt, int *pos);
+
 int pathx_parse(const struct tree *root, const char *path, struct pathx **px);
 struct tree *pathx_first(struct pathx *path);
 struct tree *pathx_next(struct pathx *path, struct tree *cur);
diff --git a/src/pathx.c b/src/pathx.c
index 171f706..2461a8a 100644
--- a/src/pathx.c
+++ b/src/pathx.c
@@ -22,225 +22,1482 @@
 
 #include <config.h>
 #include <internal.h>
+#include <stdint.h>
 #include <memory.h>
+#include <ctype.h>
+
+static const char *const errcodes[] = {
+    "no error",
+    "empty name",
+    "illegal string literal",
+    "illegal number",
+    "string missing ending ' or \"",
+    "expected '='",
+    "allocation failed",
+    "unmatched ']'",
+    "expected a '/'",
+    "internal error",   /* PATHX_EINTERNAL */
+    "type error"        /* PATHX_ETYPE */
+};
+
+/*
+ * Path expressions are strings that use a notation modelled on XPath.
+ */
+
+enum type {
+    T_NONE = 0,     /* Not a type */
+    T_LOCPATH,
+    T_BOOLEAN,
+    T_NUMBER,
+    T_STRING
+};
+
+enum expr_tag {
+    E_BINARY,
+    E_VALUE,
+    E_APP
+};
+
+enum binary_op {
+    OP_EQ,         /* '='  */
+    OP_NEQ,        /* '!=' */
+    OP_PLUS,       /* '+'  */
+    OP_MINUS,      /* '-'  */
+    OP_STAR        /* '*'  */
+};
+
+struct pred {
+    int               nexpr;
+    struct expr     **exprs;
+};
+
+enum axis {
+    SELF,
+    CHILD,
+    DESCENDANT,
+    DESCENDANT_OR_SELF,
+    PARENT,
+    ANCESTOR,
+    ROOT
+};
+
+/* This array is indexed by enum axis */
+static const char *const axis_names[] = {
+    "self",
+    "child",
+    "descendant",
+    "descendant-or-self",
+    "parent",
+    "ancestor",
+    "root"
+};
+
+static const char *const axis_sep = "::";
+
+/* Doubly linked list of location steps. Besides the information from the
+ * path expression, also contains information to iterate over a node set,
+ * in particular, the context node CTX for the step, and the current node
+ * CUR within that context.
+ */
+struct step {
+    struct step *next;
+    struct step *prev;
+    enum axis    axis;
+    char        *name;              /* NULL to match any name */
+    struct pred *predicates;
+    struct tree *ctx;
+    struct tree *cur;
+};
+/* Iteration over the nodes on a step, ignoring the predicates */
+static struct tree *step_first(struct step *step, struct tree *ctx);
+static struct tree *step_next(struct step *step);
+
+struct pathx {
+    struct state   *state;
+    struct locpath *locpath;
+    struct tree    *origin;
+};
 
 #define L_BRACK '['
 #define R_BRACK ']'
 
-static const char *const last_func = "[last()]";
+/* Locpaths are represented as iterators, consisting of multiple steps */
+struct locpath {
+    struct step *steps;
+    struct step *last;
+    struct tree *ctx;
+};
+
+static struct tree *locpath_first(struct locpath *lp, struct state *state);
+static struct tree *locpath_next(struct locpath *lp, struct state *state);
+
+#define for_each_node(t, locpath, state)                                \
+    for (struct tree *t = locpath_first(locpath, state);                \
+         t != NULL;                                                     \
+         t = locpath_next(locpath, state))
+
+typedef uint32_t value_ind_t;
 
-/* Internal representation of a path in the tree */
-struct segment {
-    int          index;
-    unsigned int fixed : 1; /* Match only one index (either INDEX or LAST) */
-    unsigned int last  : 1; /* Match the last node with LABEL */
-    unsigned int any   : 1; /* Match any node with any label, implies !FIXED */
-    char        *label;
+struct value {
+    enum type tag;
+    union {
+        struct locpath  *locpath;     /* T_LOCPATH */
+        int              number;      /* T_NUMBER */
+        char            *string;      /* T_STRING  */
+        unsigned int     bool;        /* T_BOOLEAN */
+    };
 };
 
-struct pathx {
-    size_t      nsegments;
-    struct segment *segments;
-    struct tree    *root;
+struct expr {
+    enum expr_tag tag;
+    enum type     type;
+    union {
+        struct {                       /* E_BINARY */
+            enum binary_op op;
+            struct expr *left;
+            struct expr *right;
+        };
+        value_ind_t      value_ind;    /* E_VALUE */
+        struct {                       /* E_APP */
+            const struct func *func;
+            struct expr       *args[];
+        };
+    };
 };
 
-#define for_each_segment(seg, path)                                 \
-    for (typeof((path)->segments) (seg) = (path)->segments;         \
-         (seg) < (path)->segments + (path)->nsegments;              \
-         (seg)++)
+/* Internal state of the evaluator/parser */
+struct state {
+    pathx_errcode_t errcode;
+    const char     *file;
+    int             line;
 
-#define last_segment(path) (path->segments + path->nsegments - 1)
+    const char     *txt;  /* Entire expression */
+    const char     *pos;  /* Current position within TXT during parsing */
 
-static int xstrtoul(const char *nptr, char **endptr, int base) {
-    unsigned long val;
-    char *end;
-    int result;
+    struct step    *step; /* Evaluation context */
 
-    errno = 0;
-    val = strtoul(nptr, &end, base);
-    if (errno || (!endptr && *end) || end == nptr || (int) val != val) {
-        result = -1;
+    /* A table of all values. The table is dynamically reallocated, i.e.
+     * pointers to struct value should not be used across calls that
+     * might allocate new values
+     *
+     * value_pool[0] is always the boolean false, and value_pool[1]
+     * always the boolean true
+     */
+    struct value  *value_pool;
+    value_ind_t    value_pool_used;
+    value_ind_t    value_pool_size;
+    /* Stack of values (as indices into value_pool), with bottom of
+       stack in values[0] */
+    value_ind_t   *values;
+    size_t         values_used;
+    size_t         values_size;
+    /* Stack of expressions, with bottom of stack in exprs[0] */
+    struct expr  **exprs;
+    size_t         exprs_used;
+    size_t         exprs_size;
+};
+
+/* Functions */
+
+typedef void (*func_impl_t)(struct state *state);
+
+struct func {
+    const char      *name;
+    unsigned int     arity;
+    enum type        type;
+    const enum type *arg_types;
+    func_impl_t      impl;
+};
+
+static void func_last(struct state *state);
+static void func_value(struct state *state);
+
+static const struct func builtin_funcs[] = {
+    { .name = "value", .arity = 0, .type = T_STRING, .arg_types = NULL,
+      .impl = func_value },
+    { .name = "last", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
+      .impl = func_last }
+};
+
+#define CHECK_ERROR                                                     \
+    if (state->errcode != PATHX_NOERROR) return
+
+#define CHECK_ERROR_RET0                                                \
+    if (state->errcode != PATHX_NOERROR) return 0
+
+#define STATE_ERROR(state, err)                                         \
+    do {                                                                \
+        state->errcode = err;                                           \
+        state->file = __FILE__;                                         \
+        state->line = __LINE__;                                         \
+    } while (0)
+
+#define HAS_ERROR(state) (state->errcode != PATHX_NOERROR)
+
+#define STATE_ENOMEM STATE_ERROR(state, PATHX_ENOMEM)
+
+#define ENOMEM_ON_NULL(state, v)                                        \
+    do {                                                                \
+        if (v == NULL) {                                                \
+            STATE_ERROR(state, PATHX_ENOMEM);                           \
+            return NULL;                                                \
+        }                                                               \
+    } while (0);
+
+/*
+ * Free the various data structures
+ */
+
+static void free_expr(struct expr *expr) {
+    if (expr == NULL)
+        return;
+    switch (expr->tag) {
+    case E_BINARY:
+        free_expr(expr->left);
+        free_expr(expr->right);
+        break;
+    case E_VALUE:
+        break;
+    case E_APP:
+        for (int i=0; i < expr->func->arity; i++)
+            free_expr(expr->args[i]);
+        break;
+    default:
+        assert(0);
+    }
+    free(expr);
+}
+
+static void free_pred(struct pred *pred) {
+    if (pred == NULL)
+        return;
+
+    for (int i=0; i < pred->nexpr; i++) {
+        free_expr(pred->exprs[i]);
+    }
+    free(pred->exprs);
+    free(pred);
+}
+
+static void free_locpath(struct locpath *locpath) {
+    if (locpath == NULL)
+        return;
+    while (locpath->steps != NULL) {
+        struct step *step = locpath->steps;
+        locpath->steps = step->next;
+        free(step->name);
+        free_pred(step->predicates);
+        free(step);
+    }
+    free(locpath);
+}
+
+static void free_step(struct step *step) {
+    while (step != NULL) {
+        struct step *del = step;
+        step = del->next;
+        free(del->name);
+        free_pred(del->predicates);
+        free(del);
+    }
+}
+
+static void free_state(struct state *state) {
+    for(int i=0; i < state->exprs_used; i++)
+        free_expr(state->exprs[i]);
+    free(state->exprs);
+
+    for(int i=0; i < state->value_pool_used; i++) {
+        struct value *v = state->value_pool + i;
+        switch (v->tag) {
+        case T_LOCPATH:
+            free_locpath(v->locpath);
+            break;
+        case T_STRING:
+            free(v->string);
+            break;
+        case T_BOOLEAN:
+        case T_NUMBER:
+            break;
+        default:
+            assert(0);
+        }
+    }
+    free(state->value_pool);
+    free(state->values);
+    free(state);
+}
+
+void free_pathx(struct pathx *pathx) {
+    if (pathx == NULL)
+        return;
+    free_state(pathx->state);
+    free(pathx);
+}
+
+/*
+ * Handling values
+ */
+static value_ind_t make_value(enum type tag, struct state *state) {
+    assert(tag != T_BOOLEAN);
+
+    if (state->value_pool_used >= state->value_pool_size) {
+        value_ind_t new_size = 2*state->value_pool_size;
+        if (new_size <= state->value_pool_size) {
+            STATE_ENOMEM;
+            return 0;
+        }
+        if (REALLOC_N(state->value_pool, new_size) < 0) {
+            STATE_ENOMEM;
+            return 0;
+        }
+        state->value_pool_size = new_size;
+    }
+    state->value_pool[state->value_pool_used].tag = tag;
+    return state->value_pool_used++;
+}
+
+static value_ind_t pop_value_ind(struct state *state) {
+    if (state->values_used > 0) {
+        state->values_used -= 1;
+        return state->values[state->values_used];
     } else {
-        result = val;
+        STATE_ERROR(state, PATHX_EINTERNAL);
+        assert(0);
+        return 0;
     }
-    if (endptr)
-        *endptr = end;
-    return result;
 }
 
-static int sibling_index(struct tree *tree) {
-    int indx = 0;
+static struct value *pop_value(struct state *state) {
+    value_ind_t vind = pop_value_ind(state);
+    if (HAS_ERROR(state))
+        return NULL;
+    return state->value_pool + vind;
+}
 
-    list_for_each(t, tree->parent->children) {
-        if (streqv(t->label, tree->label))
-            indx += 1;
-        if (t == tree)
-            return indx;
+static void push_value(value_ind_t vind, struct state *state) {
+    if (state->values_used >= state->values_size) {
+        size_t new_size = 2*state->values_size;
+        if (new_size == 0) new_size = 8;
+        if (REALLOC_N(state->values, new_size) < 0) {
+            STATE_ENOMEM;
+            return;
+        }
+        state->values_size = new_size;
     }
-    return indx;
+    state->values[state->values_used++] = vind;
+}
+
+static void push_boolean_value(int b, struct state *state) {
+    push_value(b != 0, state);
+}
+
+ATTRIBUTE_PURE
+static struct value *expr_value(struct expr *expr, struct state *state) {
+    return state->value_pool + expr->value_ind;
 }
 
-void free_pathx(struct pathx *path) {
-    if (path == NULL)
+/*************************************************************************
+ * Evaluation
+ ************************************************************************/
+static void eval_expr(struct expr *expr, struct state *state);
+
+static void func_last(struct state *state) {
+    int count = 0;
+    value_ind_t vind;
+    struct step *step = state->step;
+    struct tree *cur = step->cur;
+
+    for (struct tree *t = step_first(step, step->ctx);
+         t != NULL;
+         t = step_next(step))
+        count += 1;
+    state->step->cur = cur;
+
+    vind = make_value(T_NUMBER, state);
+    CHECK_ERROR;
+    state->value_pool[vind].number = count;
+    push_value(vind, state);
+}
+
+static void func_value(struct state *state) {
+    value_ind_t vind = make_value(T_STRING, state);
+    const char *s = state->step->cur->value;
+    char *v;
+
+    if (s == NULL) s = "";
+
+    v = strdup(s);
+    if (v == NULL) {
+        STATE_ENOMEM;
         return;
+    }
+    state->value_pool[vind].string = v;
+    push_value(vind, state);
+}
+
+static int calc_eq_locpath_locpath(struct locpath *ns1, struct locpath *ns2,
+                                   int neq, struct state *state) {
+    for_each_node(t1, ns1, state) {
+        for_each_node(t2, ns2, state) {
+            if (neq) {
+                if (!streqv(t1->value, t2->value))
+                    return 1;
+            } else {
+                if (streqv(t1->value, t2->value))
+                    return 1;
+            }
+        }
+    }
+    return 0;
+}
+
+static int calc_eq_locpath_string(struct locpath *lp, const char *s,
+                                  int neq, struct state *state) {
+    lp->ctx = state->step->cur;
+    for_each_node(t, lp, state) {
+        if (neq) {
+            if (!streqv(t->value, s))
+                return 1;
+        } else {
+            if (streqv(t->value, s))
+                return 1;
+        }
+    }
+    return 0;
+}
+
+static void eval_eq(struct state *state, int neq) {
+    struct value *r = pop_value(state);
+    struct value *l = pop_value(state);
+    int res;
+
+    if (l->tag == T_LOCPATH && r->tag == T_LOCPATH) {
+        res = calc_eq_locpath_locpath(l->locpath, r->locpath, neq, state);
+    } else if (l->tag == T_LOCPATH) {
+        res = calc_eq_locpath_string(l->locpath, r->string, neq, state);
+    } else if (r->tag == T_LOCPATH) {
+        res = calc_eq_locpath_string(r->locpath, l->string, neq, state);
+    } else {
+        assert(l->tag == T_STRING);
+        assert(r->tag == T_STRING);
+        res = streqv(l->string, r->string);
+    }
+    CHECK_ERROR;
+
+    push_boolean_value(res, state);
+}
+
+static void eval_arith(struct state *state, enum binary_op op) {
+    struct value *r = pop_value(state);
+    struct value *l = pop_value(state);
+    value_ind_t vind;
+    int res;
+
+    assert(l->tag == T_NUMBER);
+    assert(r->tag == T_NUMBER);
+
+    vind = make_value(T_NUMBER, state);
+    CHECK_ERROR;
+
+    if (op == OP_PLUS)
+        res = l->number + r->number;
+    else if (op == OP_MINUS)
+        res = l->number - r->number;
+    else if (op == OP_STAR)
+        res = l->number * r->number;
+    else
+        assert(0);
+
+    state->value_pool[vind].number = res;
+    push_value(vind, state);
+}
+
+static void eval_binary(struct expr *expr, struct state *state) {
+    eval_expr(expr->left, state);
+    eval_expr(expr->right, state);
+    CHECK_ERROR;
+
+    switch (expr->op) {
+    case OP_EQ:
+        eval_eq(state, 0);
+        break;
+    case OP_NEQ:
+        eval_eq(state, 1);
+        break;
+    case OP_MINUS:
+    case OP_PLUS:
+    case OP_STAR:
+        eval_arith(state, expr->op);
+        break;
+    default:
+        assert(0);
+    }
+}
+
+static void eval_app(struct expr *expr, struct state *state) {
+    assert(expr->tag == E_APP);
+
+    for (int i=0; i < expr->func->arity; i++) {
+        eval_expr(expr->args[i], state);
+        CHECK_ERROR;
+    }
+    expr->func->impl(state);
+}
+
+static void eval_expr(struct expr *expr, struct state *state) {
+    CHECK_ERROR;
+    switch (expr->tag) {
+    case E_BINARY:
+        eval_binary(expr, state);
+        break;
+    case E_VALUE:
+        push_value(expr->value_ind, state);
+        break;
+    case E_APP:
+        eval_app(expr, state);
+        break;
+    default:
+        assert(0);
+    }
+}
 
-    if (path->segments != NULL) {
-        for (int i=0; i < path->nsegments; i++) {
-            free(path->segments[i].label);
+/*************************************************************************
+ * Typechecker
+ *************************************************************************/
+
+static void check_expr(struct expr *expr, struct state *state);
+
+/* Typecheck a list of predicates. A predicate is a function of
+ * one of the following types:
+ *
+ * T_LOCPATH -> T_BOOLEAN
+ * T_NUMBER  -> T_BOOLEAN  (position test)
+ * T_BOOLEAN -> T_BOOLEAN
+ */
+static void check_preds(struct pred *pred, struct state *state) {
+    for (int i=0; i < pred->nexpr; i++) {
+        struct expr *e = pred->exprs[i];
+        check_expr(e, state);
+        CHECK_ERROR;
+        if (e->type != T_LOCPATH && e->type != T_NUMBER &&
+            e->type != T_BOOLEAN) {
+            STATE_ERROR(state, PATHX_ETYPE);
+            return;
+        }
+    }
+}
+
+/* Typecheck a value; the type of the expression is the type of the value.
+ * We also need to make sure that the expressions inside predicates are
+ * typechecked for locpaths.
+ */
+static void check_value(struct expr *expr, struct state *state) {
+    assert(expr->tag == E_VALUE);
+    struct value *v = expr_value(expr, state);
+
+    if (v->tag == T_LOCPATH) {
+        struct locpath *locpath = v->locpath;
+        list_for_each(s, locpath->steps) {
+            if (s->predicates != NULL) {
+                check_preds(s->predicates, state);
+                CHECK_ERROR;
+            }
+        }
+    }
+    expr->type = v->tag;
+}
+
+static void check_app(struct expr *expr, struct state *state) {
+    assert(expr->tag == E_APP);
+    for (int i=0; i < expr->func->arity; i++) {
+        check_expr(expr->args[i], state);
+        CHECK_ERROR;
+        if (expr->args[i]->type != expr->func->arg_types[i]) {
+            STATE_ERROR(state, PATHX_ETYPE);
+            return;
         }
-        free(path->segments);
     }
-    free(path);
+    expr->type = expr->func->type;
 }
 
-/* Take a path expression PATH and turn it into a path structure usable for
- * search. The TREE components of the PATH segments will be NULL. Call
- * PATH_FIRST to initialize them to the first match.
+/* Check the binary operators. Type rules:
+ *
+ * '=', '!='  : T_LOCPATH -> T_LOCPATH -> T_BOOLEAN
+ *              T_STRING  -> T_LOCPATH -> T_BOOLEAN
+ *              T_LOCPATH -> T_STRING  -> T_BOOLEAN
+ *
+ * '|' :        T_LOCPATH -> T_LOCPATH -> T_LOCPATH
+ *
+ * '+', '-', '*': T_NUMBER -> T_NUMBER -> T_NUMBER
  *
- * A path expression follows the grammar
- * PATH = '/' ? SEGMENT ( '/' SEGMENT )* '/' ?
- * SEGMENT = STRING ('[' N ']') ? | '*'
- * where STRING is any string not containing '/' and N is a positive number
  */
-int pathx_parse(const struct tree *root, const char *path,
-                struct pathx **px) {
+static void check_binary(struct expr *expr, struct state *state) {
+    check_expr(expr->left, state);
+    check_expr(expr->right, state);
+    CHECK_ERROR;
 
-    if (ALLOC(*px) < 0)
-        return -1;
-    (*px)->root = (struct tree *) root;
-
-    if (*path != SEP)
-        (*px)->nsegments = 1;
-    for (const char *p = path; *p != '\0'; p++) {
-        if (*p == SEP) {
-            while (*p == SEP) p++;
-            if (*p == '\0')
+    enum type l = expr->left->type;
+    enum type r = expr->right->type;
+    int  ok = 1;
+    enum type res;
+
+    switch(expr->op) {
+    case OP_EQ:
+    case OP_NEQ:
+        ok = ((l == T_LOCPATH || l == T_STRING)
+              && (r == T_LOCPATH || r == T_STRING));
+        res = T_BOOLEAN;
+        break;
+    case OP_PLUS:
+    case OP_MINUS:
+    case OP_STAR:
+        ok =  (l == T_NUMBER && r == T_NUMBER);
+        res = T_NUMBER;
+        break;
+    default:
+        assert(0);
+    }
+    if (! ok) {
+        STATE_ERROR(state, PATHX_ETYPE);
+    } else {
+        expr->type = res;
+    }
+}
+
+/* Typecheck an expression */
+static void check_expr(struct expr *expr, struct state *state) {
+    CHECK_ERROR;
+    switch(expr->tag) {
+    case E_BINARY:
+        check_binary(expr, state);
+        break;
+    case E_VALUE:
+        check_value(expr, state);
+        break;
+    case E_APP:
+        check_app(expr, state);
+        break;
+    default:
+        assert(0);
+    }
+}
+
+static void append_step(struct locpath *locpath, struct step *tail) {
+    tail->prev = locpath->last;
+    if (locpath->last != NULL)
+        locpath->last->next = tail;
+    locpath->last = tail;
+    if (locpath->steps == NULL)
+        locpath->steps = tail;
+}
+
+static void prepend_step(struct locpath *locpath, struct step *head) {
+    if (locpath->steps != NULL)
+        locpath->steps->prev = head;
+    head->next = locpath->steps;
+    locpath->steps = head;
+    if (locpath->last == NULL)
+        locpath->last = head;
+}
+
+/*
+ * Utility functions for the parser
+ */
+
+static void skipws(struct state *state) {
+    while (isspace(*state->pos)) state->pos += 1;
+}
+
+static int match(struct state *state, char m) {
+    skipws(state);
+
+    if (*state->pos == '\0')
+        return 0;
+    if (*state->pos == m) {
+        state->pos += 1;
+        return 1;
+    }
+    return 0;
+}
+
+static int peek(struct state *state, const char *chars) {
+    return strchr(chars, *state->pos) != NULL;
+}
+
+/* Return 1 if STATE->POS starts with TOKEN, followed by optional
+ * whitespace, followed by FOLLOW. In that case, STATE->POS is set to the
+ * first character after FOLLOW. Return 0 otherwise and leave STATE->POS
+ * unchanged.
+ */
+static int looking_at(struct state *state, const char *token,
+                      const char *follow) {
+    if (STREQLEN(state->pos, token, strlen(token))) {
+        const char *p = state->pos + strlen(token);
+        while (isspace(*p)) p++;
+        if (STREQLEN(p, follow, strlen(follow))) {
+            state->pos = p + strlen(follow);
+            return 1;
+        }
+    }
+    return 0;
+}
+
+/*************************************************************************
+ * The parser
+ *************************************************************************/
+
+static void parse_expr(struct state *state);
+
+static struct expr* pop_expr(struct state *state) {
+    if (state->exprs_used > 0) {
+        state->exprs_used -= 1;
+        return state->exprs[state->exprs_used];
+    } else {
+        STATE_ERROR(state, PATHX_EINTERNAL);
+        assert(0);
+        return NULL;
+    }
+}
+
+static void push_expr(struct expr *expr, struct state *state) {
+    if (state->exprs_used >= state->exprs_size) {
+        size_t new_size = 2*state->exprs_size;
+        if (new_size == 0) new_size = 8;
+        if (REALLOC_N(state->exprs, new_size) < 0) {
+            STATE_ENOMEM;
+            return;
+        }
+        state->exprs_size = new_size;
+    }
+    state->exprs[state->exprs_used++] = expr;
+}
+
+static void push_new_binary_op(enum binary_op op, struct state *state) {
+    struct expr *expr = NULL;
+    if (ALLOC(expr) < 0) {
+        STATE_ENOMEM;
+        return;
+    }
+
+    expr->tag = E_BINARY;
+    expr->op  = op;
+    expr->right = pop_expr(state);
+    expr->left = pop_expr(state);
+    push_expr(expr, state);
+}
+
+/*
+ * Name ::= /[^/\[ \t\n]+/
+ */
+static char *parse_name(struct state *state) {
+    const char *s = state->pos;
+    char *result;
+
+    while (*state->pos != '\0' &&
+           *state->pos != L_BRACK && *state->pos != SEP &&
+           *state->pos != R_BRACK &&
+           !isspace(*state->pos))
+        state->pos += 1;
+
+    if (state->pos == s) {
+        STATE_ERROR(state, PATHX_ENAME);
+        return NULL;
+    }
+
+    result = strndup(s, state->pos - s);
+    if (result == NULL) {
+        STATE_ENOMEM;
+        return NULL;
+    }
+    return result;
+}
+
+/*
+ * Predicate    ::= "[" Expr "]" *
+ */
+static struct pred *parse_predicates(struct state *state) {
+    struct pred *pred = NULL;
+    int nexpr = 0;
+
+    while (match(state, L_BRACK)) {
+        parse_expr(state);
+        nexpr += 1;
+        CHECK_ERROR_RET0;
+
+        if (! match(state, R_BRACK)) {
+            STATE_ERROR(state, PATHX_EPRED);
+            return NULL;
+        }
+        skipws(state);
+    }
+
+    if (nexpr == 0)
+        return NULL;
+
+    if (ALLOC(pred) < 0) {
+        STATE_ENOMEM;
+        return NULL;
+    }
+    pred->nexpr = nexpr;
+
+    if (ALLOC_N(pred->exprs, nexpr) < 0) {
+        free_pred(pred);
+        STATE_ENOMEM;
+        return NULL;
+    }
+
+    for (int i = nexpr - 1; i >= 0; i--)
+        pred->exprs[i] = pop_expr(state);
+
+    return pred;
+}
+
+/*
+ * Step ::= AxisSpecifier NameTest Predicate* | '.' | '..'
+ * AxisSpecifier ::= AxisName '::' | <epsilon>
+ * AxisName ::= 'ancestor'
+ *            | 'ancestor-or-self'
+ *            | 'child'
+ *            | 'descendant'
+ *            | 'descendant-or-self'
+ *            | 'parent'
+ *            | 'self'
+ *            | 'root'
+ */
+static struct step *parse_step(struct state *state) {
+    struct step *step;
+
+    if (ALLOC(step) < 0) {
+        STATE_ENOMEM;
+        return NULL;
+    }
+
+    if (*state->pos == '.' && state->pos[1] == '.') {
+        state->pos += 2;
+        step->axis = PARENT;
+    } else if (match(state, '.')) {
+        step->axis = SELF;
+    } else {
+        step->axis = CHILD;
+        for (int i = 0; i < ARRAY_CARDINALITY(axis_names); i++) {
+            if (looking_at(state, axis_names[i], "::")) {
+                step->axis = i;
                 break;
-            (*px)->nsegments++;
+            }
+        }
+
+        if (! match(state, '*')) {
+            step->name = parse_name(state);
+            if (HAS_ERROR(state))
+                goto error;
         }
+
+        step->predicates = parse_predicates(state);
+        if (HAS_ERROR(state))
+            goto error;
     }
-    if ((*px)->nsegments == 0)
-        goto error;
+    return step;
 
-    CALLOC((*px)->segments, (*px)->nsegments);
-    if ((*px)->segments == NULL)
+ error:
+    free_step(step);
+    return NULL;
+}
+
+static struct step *make_step(enum axis axis, struct state *state) {
+    struct step *result = NULL;
+
+    if (ALLOC(result) < 0) {
+        STATE_ENOMEM;
+        return NULL;
+    }
+    result->axis = axis;
+    return result;
+}
+
+/*
+ * RelativeLocationPath ::= Step
+ *                        | RelativeLocationPath '/' Step
+ *                        | AbbreviatedRelativeLocationPath
+ * AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step
+ *
+ * The above is the same as
+ * RelativeLocationPath ::= Step ('/' Step | '//' Step)*
+ */
+static struct locpath *
+parse_relative_location_path(struct state *state) {
+    struct step *step = NULL;
+    struct locpath *locpath = NULL;
+
+    step = parse_step(state);
+    CHECK_ERROR_RET0;
+
+    if (ALLOC(locpath) < 0) {
+        STATE_ENOMEM;
         goto error;
+    }
+    append_step(locpath, step);
+    step = NULL;
 
-    for_each_segment(seg, *px) {
-        const char *next, *brack;
+    while (match(state, '/')) {
+        if (*state->pos == '/') {
+            state->pos += 1;
+            step = make_step(DESCENDANT_OR_SELF, state);
+            ENOMEM_ON_NULL(state, step);
+            append_step(locpath, step);
+        }
+        step = parse_step(state);
+        if (HAS_ERROR(state))
+            goto error;
+        append_step(locpath, step);
+        step = NULL;
+    }
+    return locpath;
 
-        while (*path == SEP)
-            path += 1;
-        assert(*path);
+ error:
+    free_step(step);
+    free_locpath(locpath);
+    return NULL;
+}
 
-        brack = strchr(path, L_BRACK);
-        next = strchr(path, SEP);
-        if (next == NULL)
-            next = path + strlen(path);
+/*
+ * LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
+ * AbsoluteLocationPath ::= '/' RelativeLocationPath?
+ *                        | AbbreviatedAbsoluteLocationPath
+ * AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath
+ *
+ */
+static void parse_location_path(struct state *state) {
+    struct expr *expr = NULL;
+    struct locpath *locpath = NULL;
 
-        if (path[0] == '*') {
-            seg->any = 1;
-        } else if (brack == NULL || brack >= next) {
-            seg->index = 0;
-            seg->label = strndup(path, next - path);
+    if (match(state, '/')) {
+        if (*state->pos == '/') {
+            state->pos += 1;
+            locpath = parse_relative_location_path(state);
+            if (HAS_ERROR(state))
+                return;
+            struct step *step = make_step(DESCENDANT_OR_SELF, state);
+            if (HAS_ERROR(state))
+                goto error;
+            prepend_step(locpath, step);
         } else {
-            seg->label = strndup(path, brack - path);
-            if (STREQLEN(brack, last_func, strlen(last_func))) {
-                seg->last = 1;
-                seg->fixed = 1;
+            if (*state->pos != '\0') {
+                locpath = parse_relative_location_path(state);
             } else {
-                char *end;
-                seg->index = xstrtoul(brack + 1, &end, 10);
-                seg->fixed = 1;
-                if (seg->index <= 0)
-                    goto error;
-                if (*end != R_BRACK)
-                    goto error;
+                if (ALLOC(locpath) < 0)
+                    goto err_nomem;
             }
+            struct step *step = make_step(ROOT, state);
+            if (HAS_ERROR(state))
+                goto error;
+            prepend_step(locpath, step);
         }
-        path = next;
-        while (*path == SEP) path += 1;
+    } else {
+        locpath = parse_relative_location_path(state);
     }
-    return 0;
 
+    if (ALLOC(expr) < 0)
+        goto err_nomem;
+    expr->tag = E_VALUE;
+    expr->value_ind = make_value(T_LOCPATH, state);
+    CHECK_ERROR;
+    expr_value(expr, state)->locpath = locpath;
+    push_expr(expr, state);
+    return;
+
+ err_nomem:
+    STATE_ENOMEM;
  error:
-    free_pathx(*px);
-    return -1;
+    free_expr(expr);
+    free_locpath(locpath);
+    return;
 }
 
-static void calc_last_index(struct segment *seg, struct tree *tree) {
-    if (seg->last) {
-        seg->index = 0;
-        list_for_each(t, tree->parent->children) {
-            if (streqv(t->label, seg->label))
-                seg->index += 1;
+/*
+ * Number       ::= /[0-9]+/
+ */
+static void parse_number(struct state *state) {
+    struct expr *expr = NULL;
+    unsigned long val;
+    char *end;
+
+    errno = 0;
+    val = strtoul(state->pos, &end, 10);
+    if (errno || end == state->pos || (int) val != val) {
+        STATE_ERROR(state, PATHX_ENUMBER);
+        return;
+    }
+
+    state->pos = end;
+
+    if (ALLOC(expr) < 0)
+        goto err_nomem;
+    expr->tag = E_VALUE;
+    expr->value_ind = make_value(T_NUMBER, state);
+    if (HAS_ERROR(state))
+        goto error;
+    expr_value(expr, state)->number = val;
+
+    push_expr(expr, state);
+    return;
+
+ err_nomem:
+    STATE_ENOMEM;
+ error:
+    free_expr(expr);
+    return;
+}
+
+/*
+ * Literal ::= '"' /[^"]* / '"' | "'" /[^']* / "'"
+ */
+static void parse_literal(struct state *state) {
+    char delim;
+    const char *s;
+    struct expr *expr = NULL;
+
+    if (*state->pos == '"')
+        delim = '"';
+    else if (*state->pos == '\'')
+        delim = '\'';
+    else {
+        STATE_ERROR(state, PATHX_ESTRING);
+        return;
+    }
+    state->pos += 1;
+
+    s = state->pos;
+    while (*state->pos != '\0' && *state->pos != delim) state->pos += 1;
+
+    if (*state->pos != delim) {
+        STATE_ERROR(state, PATHX_EDELIM);
+        return;
+    }
+    state->pos += 1;
+
+    if (ALLOC(expr) < 0)
+        goto err_nomem;
+    expr->tag = E_VALUE;
+    expr->value_ind = make_value(T_STRING, state);
+    if (HAS_ERROR(state))
+        goto error;
+    expr_value(expr, state)->string = strndup(s, state->pos - s - 1);
+    if (expr_value(expr, state)->string == NULL)
+        goto err_nomem;
+
+    push_expr(expr, state);
+    return;
+
+ err_nomem:
+    STATE_ENOMEM;
+ error:
+    free_expr(expr);
+    return;
+}
+
+/*
+ * FunctionCall ::= Name '(' ( Expr ( ',' Expr )* )? ')'
+ */
+static void parse_function_call(struct state *state) {
+    const struct func *func = NULL;
+    struct expr *expr = NULL;
+    int nargs = 0;
+
+    for (int i=0; i < ARRAY_CARDINALITY(builtin_funcs); i++) {
+        if (looking_at(state, builtin_funcs[i].name, "("))
+            func = builtin_funcs + i;
+    }
+    if (func == NULL) {
+        STATE_ERROR(state, PATHX_ENAME);
+        return;
+    }
+
+    if (! match(state, ')')) {
+        do {
+            nargs += 1;
+            parse_expr(state);
+            CHECK_ERROR;
+        } while (match(state, ','));
+
+        if (! match(state, ')')) {
+            STATE_ERROR(state, PATHX_EDELIM);
+            return;
         }
     }
+
+    if (nargs != func->arity) {
+        STATE_ERROR(state, PATHX_EDELIM);
+        return;
+    }
+
+    if (ALLOC(expr) < 0) {
+        STATE_ENOMEM;
+        return;
+    }
+    expr->tag = E_APP;
+    if (ALLOC_N(expr->args, nargs) < 0) {
+        free_expr(expr);
+        STATE_ENOMEM;
+        return;
+    }
+    expr->func = func;
+    for (int i = nargs - 1; i >= 0; i--)
+        expr->args[i] = pop_expr(state);
+
+    push_expr(expr, state);
 }
 
-/* Assumes that for SEG->LAST == 1, SEG->INDEX has been calculated
-   amongst the list of TREE's siblings */
-static int seg_match(struct segment *seg, struct tree *tree) {
-    if (seg->any || streqv(tree->label, seg->label)) {
-        int indx = sibling_index(tree);
-        if (!seg->fixed || indx == seg->index) {
-            seg->index = indx;
-            return 1;
+/*
+ * PrimaryExpr ::= Literal
+ *               | Number
+ *               | FunctionCall
+ *
+ */
+static void parse_primary_expr(struct state *state) {
+    if (peek(state, "'\"")) {
+        parse_literal(state);
+    } else if (peek(state, "0123456789")) {
+        parse_number(state);
+    } else {
+        parse_function_call(state);
+    }
+}
+
+static int looking_at_primary_expr(struct state *state) {
+    const char *s = state->pos;
+    /* Is it a Number or Literal ? */
+    if (peek(state, "'\"0123456789"))
+        return 1;
+
+    /* Or maybe a function call, i.e. a word followed by a '(' ?
+     * Note that our function names are only [a-zA-Z]+
+     */
+    while (*s != '\0' && isalpha(*s)) s++;
+    while (*s != '\0' && isspace(*s)) s++;
+    return *s == '(';
+}
+
+/*
+ * PathExpr ::= LocationPath | PrimaryExpr
+ *
+ * The grammar is ambiguous here: the expression '42' can either be the
+ * number 42 (a PrimaryExpr) or the RelativeLocationPath 'child::42'. The
+ * reason for this ambiguity is that we allow node names like '42' in the
+ * tree; rather than forbid them, we resolve the ambiguity by always
+ * parsing '42' as a number, and requiring that the user write the
+ * RelativeLocationPath in a different form, e.g. 'child::42' or './42'.
+ */
+static void parse_path_expr(struct state *state) {
+    if (looking_at_primary_expr(state)) {
+        parse_primary_expr(state);
+    } else {
+        parse_location_path(state);
+    }
+}
+
+/*
+ * MultiplicativeExpr ::= PathExpr ('*' PathExpr)*
+ */
+static void parse_multiplicative_expr(struct state *state) {
+    parse_path_expr(state);
+    while (match(state, '*')) {
+        parse_path_expr(state);
+        push_new_binary_op(OP_STAR, state);
+    }
+}
+
+/*
+ * AdditiveExpr ::= MultiplicativeExpr (AdditiveOp MultiplicativeExpr)*
+ * AdditiveOp   ::= '+' | '-'
+ */
+static void parse_additive_expr(struct state *state) {
+    parse_multiplicative_expr(state);
+    while (*state->pos == '+' || *state->pos == '-') {
+        enum binary_op op = (*state->pos == '+') ? OP_PLUS : OP_MINUS;
+        state->pos += 1;
+        skipws(state);
+        parse_multiplicative_expr(state);
+        push_new_binary_op(op, state);
+    }
+}
+
+/*
+ * EqualityExpr ::= AdditiveExpr (EqualityOp AdditiveExpr)?
+ * EqualityOp ::= "=" | "!="
+ */
+static void parse_equality_expr(struct state *state) {
+    parse_additive_expr(state);
+    if (*state->pos == '=' ||
+        (*state->pos == '!' && state->pos[1] == '=')) {
+        enum binary_op op = (*state->pos == '=') ? OP_EQ : OP_NEQ;
+        state->pos += (op == OP_EQ) ? 1 : 2;
+        skipws(state);
+        parse_additive_expr(state);
+        push_new_binary_op(op, state);
+    }
+}
+
+/*
+ * Expr ::= EqualityExpr
+ */
+static void parse_expr(struct state *state) {
+    skipws(state);
+    parse_equality_expr(state);
+}
+
+int pathx_parse(const struct tree *tree, const char *txt,
+                struct pathx **pathx) {
+    struct state *state = NULL;
+
+    *pathx = NULL;
+
+    if (ALLOC(*pathx) < 0)
+        return PATHX_ENOMEM;
+
+    if (tree != NULL)
+        (*pathx)->origin = (struct tree *) tree->parent;
+
+    /* Set up state */
+    if (ALLOC((*pathx)->state) < 0) {
+        free_pathx(*pathx);
+        *pathx = NULL;
+        return PATHX_ENOMEM;
+    }
+    state = (*pathx)->state;
+
+    state->errcode = PATHX_NOERROR;
+    state->txt = txt;
+    state->pos = txt;
+
+    if (ALLOC_N(state->value_pool, 8) < 0) {
+        STATE_ENOMEM;
+        goto done;
+    }
+    state->value_pool_size = 8;
+    state->value_pool[0].tag = T_BOOLEAN;
+    state->value_pool[0].bool = 0;
+    state->value_pool[1].tag = T_BOOLEAN;
+    state->value_pool[1].bool = 1;
+    state->value_pool_used = 2;
+
+    /* Parse */
+    parse_expr(state);
+    if (HAS_ERROR(state))
+        goto done;
+
+    if (state->exprs_used != 1) {
+        STATE_ERROR(state, PATHX_EINTERNAL);
+        goto done;
+    }
+
+    /* Typecheck */
+    check_expr(state->exprs[0], state);
+    if (HAS_ERROR(state))
+        goto done;
+
+    if (state->exprs[0]->type != T_LOCPATH) {
+        STATE_ERROR(state, PATHX_ETYPE);
+        goto done;
+    }
+
+    /* Evaluate */
+    eval_expr(state->exprs[0], state);
+    if (HAS_ERROR(state))
+        goto done;
+
+    if (state->values_used != 1) {
+        STATE_ERROR(state, PATHX_EINTERNAL);
+        goto done;
+    }
+
+    (*pathx)->locpath = pop_value(state)->locpath;
+
+ done:
+    return state->errcode;
+}
+
+/*************************************************************************
+ * Searching in the tree
+ *************************************************************************/
+
+/* Return the 1-based position of STEP->CUR amongst its siblings */
+static int position(struct step *step) {
+    int pos = 0;
+    struct tree *cur = step->cur;
+
+    for (struct tree *t = step_first(step, step->ctx);
+         t != NULL;
+         t = step_next(step)) {
+        if (streqv(t->label, cur->label)) {
+            pos += 1;
+            if (t == cur) {
+                step->cur = cur;
+                return pos;
+            }
         }
     }
-    return 0;
+    assert(0);
 }
 
-static int tree_next(struct tree **tree, int *depth, int mindepth) {
-    while ((*tree)->next == NULL && *depth >= mindepth) {
-        *depth -= 1;
-        *tree = (*tree)->parent;
+static int pred_matches(struct step *step, struct state *state) {
+    struct pred *pred = step->predicates;
+
+    if (step->cur == NULL)
+        return 0;
+
+    if (pred == NULL)
+        return 1;
+
+    for (int i=0; i < pred->nexpr; i++) {
+        state->step = step;
+        eval_expr(pred->exprs[i], state);
+        struct value *v = pop_value(state);
+        switch(v->tag) {
+        case T_BOOLEAN:
+            if (! v->bool)
+                return 0;
+            break;
+        case T_NUMBER:
+            if (position(state->step) != v->number)
+                return 0;
+            break;
+        case T_LOCPATH:
+            v->locpath->ctx = state->step->cur;
+            if (locpath_first(v->locpath, state) == NULL)
+                return 0;
+            break;
+        default:
+            assert(0);
+        }
     }
-    if (*depth < mindepth)
-        return -1;
-    *tree = (*tree)->next;
-    return 0;
+    return 1;
+}
+
+static struct tree *step_first(struct step *step, struct tree *ctx) {
+    step->ctx = ctx;
+    switch (step->axis) {
+    case SELF:
+        step->cur = ctx;
+        break;
+    case CHILD:
+        step->cur = ctx->children;
+        break;
+    case DESCENDANT:
+        step->cur = ctx->children;
+        break;
+    case DESCENDANT_OR_SELF:
+        step->cur = ctx;
+        break;
+    case PARENT:
+    case ANCESTOR:
+        step->cur = ctx->parent;
+        break;
+    case ROOT:
+        while (ctx->parent != ctx)
+            ctx = ctx->parent;
+        step->cur = ctx;
+        break;
+    default:
+        assert(0);
+    }
+    if (step->cur == NULL)
+        return NULL;
+    if (step->name != NULL && ! streqv(step->cur->label, step->name))
+        step_next(step);
+    return step->cur;
 }
 
-/* Given a node TREE that should match the segment SEGNR in PATH,
-   find the next node that matches all of PATH, or return NULL */
-static struct tree *complete_path(struct pathx *path, int segnr,
-                                  struct tree *tree) {
-    int cur = segnr;
+static struct tree *step_next(struct step *step) {
+    while (step->cur != NULL) {
+        switch (step->axis) {
+        case SELF:
+            step->cur = NULL;
+            break;
+        case CHILD:
+            step->cur = step->cur->next;
+            break;
+        case DESCENDANT:
+        case DESCENDANT_OR_SELF:
+            if (step->cur->children != NULL) {
+                step->cur = step->cur->children;
+            } else {
+                while (step->cur->next == NULL && step->cur != step->ctx)
+                    step->cur = step->cur->parent;
+                if (step->cur == step->ctx)
+                    step->cur = NULL;
+                else
+                    step->cur = step->cur->next;
+            }
+            break;
+        case PARENT:
+            step->cur = NULL;
+            break;
+        case ANCESTOR:
+            if (step->cur->parent == step->cur)
+                step->cur = NULL;
+            else
+                step->cur = step->cur->parent;
+            break;
+        case ROOT:
+            step->cur = NULL;
+            break;
+        default:
+            assert(0);
+        }
+        if (step->cur != NULL)
+            if (step->name == NULL || streqv(step->cur->label, step->name))
+                break;
+    }
+    return step->cur;
+}
 
-    if (segnr >= path->nsegments || tree == NULL)
+static struct tree *complete_path(struct step *step, struct state *state) {
+    if (step == NULL)
         return NULL;
 
-    calc_last_index(path->segments + cur, tree->parent->children);
     while (1) {
-        int found = seg_match(path->segments + cur, tree);
-
-        if (found && cur == path->nsegments - 1) {
-            return tree;
-        } else if (found && cur + 1 < path->nsegments &&
-                   tree->children != NULL) {
-            cur += 1;
-            tree = tree->children;
-            calc_last_index(path->segments + cur, tree);
+        int found = pred_matches(step, state);
+
+        if (found && step->next == NULL) {
+            return step->cur;
+        } else if (found && step_first(step->next, step->cur) != NULL) {
+            step = step->next;
         } else {
-            if (tree_next(&tree, &cur, segnr) < 0)
-                return NULL;
+            do {
+                step_next(step);
+                if (step->cur == NULL) {
+                    step = step->prev;
+                    if (step == NULL)
+                        return NULL;
+                    step_next(step);
+                }
+            } while (step->cur == NULL);
         }
     }
 }
 
-struct tree *pathx_next(struct pathx *path, struct tree *cur) {
-    int segnr = path->nsegments - 1;
+static struct tree *locpath_next(struct locpath *lp, struct state *state) {
+    step_next(lp->last);
+    return complete_path(lp->last, state);
+}
 
-    if (tree_next(&cur, &segnr, -1) < 0)
-        return NULL;
+struct tree *pathx_next(struct pathx *pathx,
+                        ATTRIBUTE_UNUSED struct tree *cur) {
+    return locpath_next(pathx->locpath, pathx->state);
+}
 
-    return complete_path(path, segnr, cur);
+static struct tree *locpath_first(struct locpath *lp, struct state *state) {
+    step_first(lp->steps, lp->ctx);
+    return complete_path(lp->steps, state);
 }
 
 /* Find the first node in TREE matching PATH. */
-struct tree *pathx_first(struct pathx *path) {
-    return complete_path(path, 0, path->root);
+struct tree *pathx_first(struct pathx *pathx) {
+    if (pathx->origin == NULL)
+        return NULL;
+    pathx->locpath->ctx = pathx->origin;
+    return locpath_first(pathx->locpath, pathx->state);
 }
 
 /* Find a node in the tree that matches the longest prefix of PATH.
@@ -248,65 +1505,80 @@ struct tree *pathx_first(struct pathx *path) {
  * Return 1 if a node was found that exactly matches PATH, 0 if an incomplete
  * prefix matches, and -1 if more than one node in the tree match.
  *
- * MATCH is set to the tree node that matches, and SEGNR to the
- * number of the segment in PATH where MATCH matched. If no node matches,
- * MATCH will be NULL, and SEGNR -1
+ * TMATCH is set to the tree node that matches, and SMATCH to the next step
+ * after the one where TMATCH matched. If no node matches or multiple nodes
+ * at the same depth match, TMATCH and SMATCH will be NULL. When exactly
+ * one node matches, TMATCH will be that node, and SMATCH will be NULL.
  */
-static int path_search(struct pathx *path, struct tree **match, int *segnr) {
+static int locpath_search(struct locpath *lp, struct state *state,
+                          struct tree **tmatch, struct step **smatch) {
     struct tree **matches = NULL;
-    int *nmatches = NULL;
-    int result;
+    struct step *step;
 
-    if (ALLOC_N(matches, path->nsegments) < 0)
-        return -1;
+    int *nmatches = NULL, nsteps = 0;
+    int result, level;
 
-    if (ALLOC_N(nmatches, path->nsegments) < 0) {
-        free(matches);
+    lp->ctx = *tmatch;
+    *smatch = lp->steps;
+
+    if (lp->ctx == NULL)
         return -1;
-    }
 
-    if (match)
-        *match = NULL;
-    if (segnr)
-        *segnr = -1;
+    list_for_each(s, lp->steps) nsteps++;
 
-    if (path->root == NULL)
+    if (ALLOC_N(matches, nsteps) < 0)
         return -1;
 
-    struct tree *tree = path->root;
-    int cur = 0;
-    calc_last_index(path->segments, tree);
+    if (ALLOC_N(nmatches, nsteps) < 0) {
+        free(matches);
+        return -1;
+    }
 
-    while (1) {
-        int found = seg_match(path->segments + cur, tree);
+    step = lp->steps;
+    if (step_first(step, lp->ctx) == NULL) {
+        *smatch = lp->steps;
+        return 0;
+    }
+
+    level = 0;
+    while (step != NULL) {
+        int found = pred_matches(step, state);
 
         if (found) {
-            if (nmatches[cur] == 0)
-                matches[cur] = tree;
-            nmatches[cur]++;
+            if (nmatches[level] == 0)
+                matches[level] = step->cur;
+            nmatches[level]++;
         }
 
-        if (found && cur + 1 < path->nsegments && tree->children != NULL) {
-            cur += 1;
-            tree = tree->children;
-            calc_last_index(path->segments + cur, tree);
+        if (found && step->next == NULL) {
+            break;
+        } else if (found && step_first(step->next, step->cur) != NULL) {
+            step = step->next;
+            level += 1;
         } else {
-            if (tree_next(&tree, &cur, 0) < 0)
-                break;
+            do {
+                step_next(step);
+                if (step->cur == NULL) {
+                    step = step->prev;
+                    level -= 1;
+                    if (step != NULL)
+                        step_next(step);
+                }
+            } while (step != NULL && step->cur == NULL);
         }
     }
 
-    result = nmatches[path->nsegments - 1] == 1;
-    for (cur = path->nsegments-1; cur >=0; cur--) {
-        if (nmatches[cur] > 1) {
+    result = nmatches[nsteps - 1] == 1;
+    for (level = nsteps-1, step = lp->last;
+         level >=0;
+         level--, step = step->prev) {
+        if (nmatches[level] > 1) {
             result = -1;
             break;
         }
-        if (nmatches[cur] == 1) {
-            if (match)
-                *match = matches[cur];
-            if (segnr)
-                *segnr = cur;
+        if (nmatches[level] == 1) {
+            *tmatch = matches[level];
+            *smatch = step->next;
             break;
         }
     }
@@ -323,25 +1595,27 @@ static int path_search(struct pathx *path, struct tree **match, int *segnr) {
  * error.
  */
 int pathx_expand_tree(struct pathx *path, struct tree **tree) {
-    int r, segnr;
+    int r;
+    struct step *step;
 
-    *tree = path->root;
-    r = path_search(path, tree, &segnr);
+    *tree = path->origin;
+    r = locpath_search(path->locpath, path->state, tree, &step);
     if (r == -1)
         return -1;
 
-    if (segnr == path->nsegments - 1)
+    if (step == NULL)
         return 0;
 
     struct tree *first_child = NULL;
     struct tree *parent = *tree;
-    struct segment *seg = path->segments + segnr + 1;
 
     if (parent == NULL)
-        parent = path->root->parent;
+        parent = path->origin;
 
-    for (struct segment *s = seg ; s <= last_segment(path); s++) {
-        struct tree *t = make_tree(strdup(s->label), NULL, parent, NULL);
+    list_for_each(s, step) {
+        if (s->name == NULL || s->axis != CHILD)
+            goto error;
+        struct tree *t = make_tree(strdup(s->name), NULL, parent, NULL);
         if (first_child == NULL)
             first_child = t;
         if (t == NULL || t->label == NULL)
@@ -357,8 +1631,10 @@ int pathx_expand_tree(struct pathx *path, struct tree **tree) {
     return 0;
 
  error:
-    list_remove(first_child, first_child->parent->children);
-    free_tree(first_child);
+    if (first_child != NULL) {
+        list_remove(first_child, first_child->parent->children);
+        free_tree(first_child);
+    }
     *tree = NULL;
     return -1;
 }
@@ -374,6 +1650,27 @@ int pathx_find_one(struct pathx *path, struct tree **tree) {
     }
     return 1;
 }
+
+const char *pathx_error(struct pathx *path, const char **txt, int *pos) {
+    int errcode = PATHX_ENOMEM;
+
+    if (path != NULL) {
+        if (path->state->errcode < ARRAY_CARDINALITY(errcodes))
+            errcode = path->state->errcode;
+        else
+            errcode = PATHX_EINTERNAL;
+    }
+
+    if (txt)
+        *txt = path->state->txt;
+
+    if (pos)
+        *pos = path->state->pos - path->state->txt;
+
+    return errcodes[errcode];
+}
+
+
 /*
  * Local variables:
  *  indent-tabs-mode: nil
diff --git a/tests/Makefile.am b/tests/Makefile.am
index bd29917..20229c8 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -15,11 +15,11 @@ check_SCRIPTS=test-lenses.sh test-interpreter.sh test-get.sh \
 	      test-events-saved.sh test-save-mode.sh
 
 EXTRA_DIST=augtest $(AUGTESTS) root \
-	   $(check_SCRIPTS) $(wildcard modules/*.aug)
+	   $(check_SCRIPTS) $(wildcard modules/*.aug) xpath.tests
 
 noinst_SCRIPTS = $(check_SCRIPTS)
 
-check_PROGRAMS = fatest
+check_PROGRAMS = fatest test-xpath
 
 TESTS_ENVIRONMENT = \
   PATH='$(abs_top_builddir)/src$(PATH_SEPARATOR)'"$$PATH" \
@@ -32,3 +32,6 @@ INCLUDES = -I$(top_srcdir)/src
 
 fatest_SOURCES = fatest.c cutest.c cutest.h
 fatest_LDADD = $(top_builddir)/src/libaugeas.la $(GNULIB)
+
+test_xpath_SOURCES = test-xpath.c
+test_xpath_LDADD = $(top_builddir)/src/libaugeas.la $(GNULIB)
diff --git a/tests/test-xpath.c b/tests/test-xpath.c
new file mode 100644
index 0000000..0293a31
--- /dev/null
+++ b/tests/test-xpath.c
@@ -0,0 +1,239 @@
+/*
+ * test-xpath.c: check that XPath expressions yield the expected result
+ *
+ * Copyright (C) 2007 Red Hat Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ *
+ * Author: David Lutterkort <dlutter at redhat.com>
+ */
+
+#include <config.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include <augeas.h>
+#include <internal.h>
+#include <memory.h>
+
+static const char *abs_top_srcdir;
+static char *root;
+
+#define KW_TEST "test"
+
+struct entry {
+    struct entry *next;
+    char *path;
+    char *value;
+};
+
+struct test {
+    struct test *next;
+    char *name;
+    char *match;
+    struct entry *entries;
+};
+
+#define die(msg)                                                    \
+    do {                                                            \
+        fprintf(stderr, "%d: Fatal error: %s\n", __LINE__, msg);    \
+        exit(EXIT_FAILURE);                                         \
+    } while(0)
+
+static char *skipws(char *s) {
+    while (isspace(*s)) s++;
+    return s;
+}
+
+static char *findws(char *s) {
+    while (*s && ! isspace(*s)) s++;
+    return s;
+}
+
+static char *token(char *s, char **tok) {
+    char *t = skipws(s);
+    s = findws(t);
+    *tok = strndup(t, s - t);
+    return s;
+}
+
+static char *token_to_eol(char *s, char **tok) {
+    char *t = skipws(s);
+    while (*s && *s != '\n') s++;
+    *tok = strndup(t, s - t);
+    return s;
+}
+
+static struct test *read_tests(void) {
+    char *fname;
+    FILE *fp;
+    char line[BUFSIZ];
+    struct test *result = NULL, *t = NULL;
+    int lc = 0;
+
+    if (asprintf(&fname, "%s/tests/xpath.tests", abs_top_srcdir) < 0)
+        die("asprintf fname");
+
+    if ((fp = fopen(fname, "r")) == NULL)
+        die("fopen xpath.tests");
+
+    while (fgets(line, BUFSIZ, fp) != NULL) {
+        lc += 1;
+        char *s = skipws(line);
+        if (*s == '#' || *s == '\0')
+            continue;
+        if (STREQLEN(s, KW_TEST, strlen(KW_TEST))) {
+            if (ALLOC(t) < 0)
+                die("out of memory");
+            list_append(result, t);
+            s = token(s + strlen(KW_TEST), &(t->name));
+            s = token_to_eol(s, &(t->match));
+        } else {
+            struct entry *e = NULL;
+            if (ALLOC(e) < 0)
+                die("out of memory");
+            list_append(t->entries, e);
+            s = token(s, &(e->path));
+            s = skipws(s);
+            if (*s) {
+                if (*s != '=') {
+                    fprintf(stderr,
+                     "line %d: either list only a path or path = value\n", lc);
+                    die("xpath.tests has incorrect format");
+                }
+                s = token_to_eol(s + 1, &(e->value));
+            }
+        }
+        s = skipws(s);
+        if (*s != '\0') {
+            fprintf(stderr, "line %d: junk at end of line\n", lc);
+            die("xpath.tests has incorrect format");
+        }
+    }
+    return result;
+}
+
+static void print_pv(const char *path, const char *value) {
+    if (value)
+        printf("    %s = %s\n", path, value);
+    else
+        printf("    %s\n", path);
+}
+
+static int run_one_test(struct augeas *aug, struct test *t) {
+    int nexp = 0, nact;
+    char **matches;
+    int result = 0;
+
+    printf("%-30s ... ", t->name);
+    list_for_each(e, t->entries)
+        nexp++;
+    nact = aug_match(aug, t->match, &matches);
+    if (nact != nexp) {
+        result = -1;
+    } else {
+        struct entry *e;
+        int i;
+        const char *val;
+
+        for (e = t->entries, i = 0; e != NULL; e = e->next, i++) {
+            if (!STREQ(e->path, matches[i]))
+                result = -1;
+            aug_get(aug, e->path, &val);
+            if (!streqv(e->value, val))
+                result = -1;
+        }
+    }
+    if (result == 0) {
+        printf("PASS\n");
+    } else {
+        printf("FAIL\n");
+
+        printf("  Match: %s\n", t->match);
+        printf("  Expected: %d entries\n", nexp);
+        list_for_each(e, t->entries) {
+            print_pv(e->path, e->value);
+        }
+        if (nact < 0) {
+            printf("  Actual: aug_match failed\n");
+            } else {
+            printf("  Actual: %d entries\n", nact);
+        }
+        for (int i=0; i < nact; i++) {
+            const char *val;
+            aug_get(aug, matches[i], &val);
+            print_pv(matches[i], val);
+        }
+    }
+    return result;
+}
+
+static int run_tests(struct test *tests) {
+    char *lensdir;
+    struct augeas *aug = NULL;
+    int result = EXIT_SUCCESS;
+
+    if (asprintf(&lensdir, "%s/lenses", abs_top_srcdir) < 0)
+        die("asprintf lensdir failed");
+
+    aug = aug_init(root, lensdir, AUG_NO_STDINC|AUG_SAVE_NEWFILE);
+    if (aug == NULL)
+        die("aug_init");
+    list_for_each(t, tests) {
+        if (run_one_test(aug, t) < 0)
+            result = EXIT_FAILURE;
+    }
+    aug_close(aug);
+
+    return result;
+}
+
+int main(void) {
+    struct test *tests;
+
+    abs_top_srcdir = getenv("abs_top_srcdir");
+    if (abs_top_srcdir == NULL)
+        die("env var abs_top_srcdir must be set");
+
+    if (asprintf(&root, "%s/tests/root", abs_top_srcdir) < 0) {
+        die("failed to set root");
+    }
+
+    tests = read_tests();
+    return run_tests(tests);
+    /*
+    list_for_each(t, tests) {
+        printf("Test %s\n", t->name);
+        printf("match %s\n", t->match);
+        list_for_each(e, t->entries) {
+            if (e->value)
+                printf("    %s = %s\n", e->path, e->value);
+            else
+                printf("    %s\n", e->path);
+        }
+    }
+    */
+}
+
+/*
+ * Local variables:
+ *  indent-tabs-mode: nil
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 4
+ * End:
+ */
diff --git a/tests/xpath.tests b/tests/xpath.tests
new file mode 100644
index 0000000..1eda449
--- /dev/null
+++ b/tests/xpath.tests
@@ -0,0 +1,99 @@
+# Tests of the XPath matching
+
+# Blank lines and lines starting with '#' are ignored
+#
+# Each test consists of one line declaring the test followed by a complete
+# list of the expected result of the match.
+#
+# The test is declared with a line 'test NAME MATCH'. A result is either
+# just a path (meaning that the value associated with that node must be
+# NULL) or of the form PATH = VALUE, meaning that the value for the node at
+# PATH must be VALUE
+#
+# The MATCH XPath expression is matched against a fixed tree (the one from the
+# root/ subdirectory) and the result of the aug_match is compared with the
+# results listed in the test
+
+# Very simple to warm up
+test wildcard /files/etc/hosts/*/ipaddr
+     /files/etc/hosts/1/ipaddr = 127.0.0.1
+     /files/etc/hosts/2/ipaddr = 172.31.122.14
+
+# Compare the value of the current node with a constant
+test self-value /files/etc/hosts/*/ipaddr[ value() = '127.0.0.1' ]
+     /files/etc/hosts/1/ipaddr = 127.0.0.1
+
+# Find nodes that have a child named 'ipaddr' with a fixed value
+test child-value /files/etc/hosts/*[ipaddr = '127.0.0.1']
+     /files/etc/hosts/1
+
+# Find nodes that have a child 'ipaddr' that has no value
+test child-nil-value /files/etc/hosts/*[ipaddr = '']
+
+# Find nodes that have no value
+test self-nil-value /files/etc/hosts/*[value() = '']
+     /files/etc/hosts/1
+     /files/etc/hosts/2
+
+# Match over two levels of the tree
+test two-wildcards /files/etc/*/*[ipaddr = '127.0.0.1']
+     /files/etc/hosts/1
+
+test pam-system-auth /files/etc/pam.d/*/*[module = 'system-auth']
+     /files/etc/pam.d/login/2
+     /files/etc/pam.d/login/4
+     /files/etc/pam.d/login/5
+     /files/etc/pam.d/login/8
+     /files/etc/pam.d/postgresql/1
+     /files/etc/pam.d/postgresql/2
+     /files/etc/pam.d/newrole/1
+     /files/etc/pam.d/newrole/2
+     /files/etc/pam.d/newrole/3
+
+# Multiple predicates are treated with 'and'
+test pam-two-preds /files/etc/pam.d/*/*[module = 'system-auth'][type = 'account']
+     /files/etc/pam.d/login/4
+     /files/etc/pam.d/postgresql/2
+     /files/etc/pam.d/newrole/2
+
+# Find nodes that have siblings with a given value
+test pam-two-preds-control /files/etc/pam.d/*/*[module = 'system-auth'][type = 'account']/control
+     /files/etc/pam.d/login/4/control = include
+     /files/etc/pam.d/postgresql/2/control = include
+     /files/etc/pam.d/newrole/2/control = include
+
+# last() gives the last node with a certain name
+test last /files/etc/hosts/*[ipaddr = "127.0.0.1"]/alias[last()]
+     /files/etc/hosts/1/alias[3] = galia
+
+# We can get nodes counting from the right with 'last()-N'
+test last-minus-one /files/etc/hosts/*[ipaddr = "127.0.0.1"]/alias[ last() - 1 ]
+     /files/etc/hosts/1/alias[2] = galia.watzmann.net
+
+# Make sure we look at all nodes with a given label (ticket #23)
+test transparent-multi-node /files/etc/ssh/sshd_config/AcceptEnv/10
+     /files/etc/ssh/sshd_config/AcceptEnv[2]/10 = LC_ADDRESS
+
+test abbrev-descendants /files/etc//1
+     /files/etc/apt/sources.list/1
+     /files/etc/ssh/sshd_config/AcceptEnv[1]/1 = LANG
+     /files/etc/aliases/1
+     /files/etc/fstab/1
+     /files/etc/pam.d/login/1
+     /files/etc/pam.d/postgresql/1
+     /files/etc/pam.d/newrole/1
+     /files/etc/hosts/1
+     /files/etc/inittab/1
+
+test descendant-or-self /files/descendant-or-self :: 4
+     /files/etc/ssh/sshd_config/AcceptEnv[1]/4 = LC_TIME
+     /files/etc/aliases/4
+     /files/etc/fstab/4
+     /files/etc/pam.d/login/4
+     /files/etc/pam.d/newrole/4
+     /files/etc/inittab/4
+
+test descendant /files/etc/aliases/4/descendant::4
+
+test descendant-or-self-2 /files/etc/aliases/4/descendant-or-self::4
+     /files/etc/aliases/4
-- 
1.6.0.6




More information about the augeas-devel mailing list