[augeas-devel] Prototype of xml lens

Francis Giraldeau francis.giraldeau at revolutionlinux.com
Wed Jul 21 02:06:52 UTC 2010


Hi, 

For those that may want to try the XML lens, here it is. The latest
square2 branch is needed. Lot of work to be done, but still it shows
what it's possible to do. 

Cheer, 

Francis
-------------- next part --------------
(* XML lens for Augeas
   Author: Francis Giraldeau <francis.giraldeau at usherbrooke.ca>

   Reference: http://www.w3.org/TR/2006/REC-xml11-20060816/
*)

module Xml = 

(************************************************************************
 *                           Utilities lens
 *************************************************************************)

let dels (s:string)     = del s s 
let sep_spc           = del /[ \t\n]+/ " "
let sep_osp           = del /[ \t\n]*/ ""
let sep_eq            = del /[ \t]*=[ \t]*/ "="

let nmtoken             = /[a-zA-Z:_][a-zA-Z0-9:_\.-]*/
let word                = /[a-zA-Z][a-zA-Z0-9\._\-]*/ 
let sto_dquote          = dels "\"" . store /[^"]*/ . dels "\""
let sto_squote          = dels "'" . store /[^']*/ . dels "'"
let comment             = [ label "#comment" . dels "<!--" . store /.*[^-]/ . dels "-->" ]
let pi                  = nmtoken - /[Xx][Mm][Ll]/ 
let vers_info           = [ sep_spc . key "version" . sep_eq . sto_dquote  ]
let enc_info            = [ sep_spc . key "encoding" . sep_eq . sto_dquote ] 
let prolog              = [ label "#declaration" 
                          . dels "<?xml" 
                          . vers_info 
                          . enc_info?
                          . sep_osp
                          . dels "?>" ]


(************************************************************************
 *                            Tags
 *************************************************************************)

let text = [ label "#text" . store /[^<>]+/ ]

let element (body:lens) =
    let h = del ">" ">" . body* . del "</" "</" in  
        [ del "<" "<" . square word h . del />[\n]?/ ">\n" ] *

let rec content = element (content|text|comment)

let doc = (sep_osp . prolog)? . sep_osp . content . sep_osp

-------------- next part --------------
module Test_xml = 

let knode (r:regexp) = [ key r ]

(************************************************************************
 *                           Utilities lens
 *************************************************************************)

test Xml.comment get "<!-- declarations for <head> & <body> -->" = 
    { "#comment" = " declarations for <head> & <body> " } 
test Xml.comment get "<!-- B+, B, or B--->" = * 

test Xml.prolog get "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" = 
  { "#declaration"
    { "version" = "1.0" }
    { "encoding" = "UTF-8" }
  }

let input1 = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>
<html>
    <head>
        <title>Wiki</title>
    </head>
    <body>
        <h1>Augeas</h1>
        <p>Augeas is now able to parse XML files!</p> 
        <ul>
            <li>Translate from XML to a tree syntax</li>
            <li>Translate from the tree back to XML</li> <!-- this is some comment -->
            <li>this</li>
        </ul>
    </body>
</html>
"

test Xml.doc get input1 =  
  { "#declaration"
    { "version" = "1.0" }
    { "encoding" = "UTF-8" }
  }
  { "html"
    { "#text" = "
    " }
    { "head"
      { "#text" = "
        " }
      { "title"
        { "#text" = "Wiki" }
      }
      { "#text" = "    " }
    }
    { "#text" = "    " }
    { "body"
      { "#text" = "
        " }
      { "h1"
        { "#text" = "Augeas" }
      }
      { "#text" = "        " }
      { "p"
        { "#text" = "Augeas is now able to parse XML files!" }
      }
      { "#text" = " 
        " }
      { "ul"
        { "#text" = "
            " }
        { "li"
          { "#text" = "Translate from XML to a tree syntax" }
        }
        { "#text" = "            " }
        { "li"
          { "#text" = "Translate from the tree back to XML" }
        }
        { "#text" = " " }
        { "#comment" = " this is some comment " }
        { "#text" = "
            " }
        { "li"
          { "#text" = "this" }
        }
        { "#text" = "        " }
      }
      { "#text" = "    " }
    }
  }


let input2 = "
<ul>
    <li>test1</li>
    <li>test2</li>
</ul>
"

test Xml.doc put input2 after set "/ul/li[3]/#text" "test3" = "
<ul>
    <li>test1</li>
    <li>test2</li>
<li>test3</li>
</ul>
"

test Xml.doc put input2 after rm "/ul/li[2]" = "
<ul>
    <li>test1</li>
    </ul>
"



More information about the augeas-devel mailing list