[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: XML Encoding of XPath: Examples
- From: Paul T <paul@pault.com>
- To: Charles Reitzel <creitzel@mediaone.net>, Evan Lenz <elenz@xyzfind.com>
- Date: Thu, 15 Mar 2001 21:41:46 -0800
Hi,
I think I don't really understand what are you doing and
why, but just in case I'm attaching my JavaCC grammar
of Xpath ( it also takes care of those 'div' things).
I've tested it about one year ago ...
Just in case. Public domain.
Rgds.Paul.
---------- xpath.jj
/*
XPath grammar defintion for use with JavaCC. v 0.5
S I M P L I F I C A T I O N S.
o Meaning
NCNAME ( CombiningChar , Extender )
NUMBER
'div' 'or' 'and' 'mod' as QNAME ( should be NCNAME )
o Implementation
FUNCTION_NAME
AXIS_NAME
sep()
*/
options {
STATIC = false;
LOOKAHEAD = 1;
DEBUG_PARSER = false;
COMMON_TOKEN_ACTION = true;
}
PARSER_BEGIN(XPathParser_Grammar)
package com.pault.xpath;
import java.io.*;
public class XPathParser_Grammar {
public static void main(String args[]) throws Exception {
BufferedReader tests =
new BufferedReader ( new InputStreamReader(System.in) );
String str;
while ( (str=tests.readLine()) != null ) {
System.out.println( str );
XPathParser_Grammar parser =
new XPathParser_Grammar( new ByteArrayInputStream( str.getBytes() ) );
parser.parse();
}
}
}
PARSER_END(XPathParser_Grammar)
TOKEN_MGR_DECLS: {
int last_tok_kind = -1;
void CommonTokenAction(Token t) {
last_tok_kind = t.kind;
}
int sep( int a, int b ) {
switch ( last_tok_kind ) {
case AT:
case AXIS_NAME:
case LBR:
case LSQBR:
/* ( */
case FUNCTION_NAME:
case PI:
case COMMENT:
case TEXT:
case NODE:
/* Operator */
case EQUALS:
case NOTEQUALS:
case GT:
case LT:
case GTE:
case LTE:
case PLUS:
case MINUS:
case MULTIPLY:
case VBAR:
case SLASH_SLASH:
case SLASH:
case DIV:
case OR:
case AND:
case MOD:
case -1:
return a;
default:
return b;
}
}
}
SKIP :
{
" "
| "\r"
| "\n"
| "\t"
}
TOKEN :
{
< LITERAL: ( "\"" (~["\""])* "\"" ) | ( "\'" (~["\'"])* "\'" ) > |
< NUMBER: (["0"-"9"])+ (".")? (["0"-"9"])* > |
< DIV: " div" > |
"div" { matchedToken.kind = sep( QNAME, DIV ); } |
< OR: " or" > |
"or" { matchedToken.kind = sep( QNAME, OR ); } |
< AND: " and" > |
"and" { matchedToken.kind = sep( QNAME, AND ); } |
< MOD: " mod" > |
"mod" { matchedToken.kind = sep( QNAME, MOD ); } |
< MULTIPLY: " *" > |
"*" { matchedToken.kind = sep( STAR, MULTIPLY ); } |
< STAR: [] > |
< #WSPACE: ([" ","\t"])* > |
< AXIS_NAME: "ancestor" <WSPACE> "::" |
"ancestor-or-self" <WSPACE> "::" |
"attribute" <WSPACE> "::" |
"child" <WSPACE> "::" |
"descendant" <WSPACE> "::" |
"descendant-or-self" <WSPACE> "::" |
"following" <WSPACE> "::" |
"following-sibling" <WSPACE> "::" |
"namespace" <WSPACE> "::" |
"parent" <WSPACE> "::" |
"preceding" <WSPACE> "::" |
"preceding-sibling" <WSPACE> "::" |
"self" <WSPACE> "::" > |
< FUNCTION_NAME:
"last" <WSPACE> "(" |
"position" <WSPACE> "(" |
"count" <WSPACE> "(" |
"id" <WSPACE> "(" |
"local-name" <WSPACE> "(" |
"namespace-uri" <WSPACE> "(" |
"name" <WSPACE> "(" |
"string" <WSPACE> "(" |
"concat" <WSPACE> "(" |
"starts-with" <WSPACE> "(" |
"contains" <WSPACE> "(" |
"substring-before" <WSPACE> "(" |
"substring-after" <WSPACE> "(" |
"substring" <WSPACE> "(" |
"string-length" <WSPACE> "(" |
"normalize-space" <WSPACE> "(" |
"translate" <WSPACE> "(" |
"boolean" <WSPACE> "(" |
"not" <WSPACE> "(" |
"true" <WSPACE> "(" |
"false" <WSPACE> "(" |
"lang" <WSPACE> "(" |
"number" <WSPACE> "(" |
"sum" <WSPACE> "(" |
"floor" <WSPACE> "(" |
"ceiling" <WSPACE> "(" |
"round" <WSPACE> "(" > |
< PI: "processing-instruction" <WSPACE> "(" > |
< COMMENT: "comment" <WSPACE> "(" > |
< TEXT: "text" <WSPACE> "(" > |
< NODE: "node" <WSPACE> "(" > |
< NAME_COLON_STAR: <NCNAME> ":*" > |
< QNAME: ( <NCNAME> ":" ) ? <NCNAME> > |
< NCNAME: (["a"-"z","A"-"Z","0"-"9","_"])+ (["a"-"z","A"-"Z","0"-"9",".","_","-"])* > |
< EQUALS: "=" > |
< NOTEQUALS: "!=" > |
< GT: " > " | ">" > |
< LT: " < " | "<" > |
< GTE: " >= " | ">=" > |
< LTE: " <= " | "<=" > |
< PLUS: "+" > |
< MINUS: "-" > |
< VBAR: "|" > |
< SLASH_SLASH: "//" > |
< SLASH: "/" > |
< DOT_DOT: ".." > |
< DOT: "." > |
< AT: "@" > |
< LBR: "(" > |
< LSQBR: "[" > |
< DOLLAR_QNAME: "$" <QNAME> >
}
void parse() :
{}
{
Xpath() <EOF>
}
void Xpath() :
{}
{
( expr() )*
}
void expr() :
{}
{
or_expr()
}
void or_expr() :
{}
{
and_expr() ( <OR> and_expr() )*
}
void and_expr() :
{}
{
equality_expr() ( <AND> equality_expr() )*
}
void equality_expr() :
{}
{
relational_expr() ( ( <EQUALS> | <NOTEQUALS> ) relational_expr() )*
}
void relational_expr() :
{}
{
additive_expr() ( ( <LT> | <GT> | <LTE> | <GTE> ) additive_expr() )*
}
void additive_expr() :
{}
{
multiplicative_expr() ( LOOKAHEAD(3) ( <PLUS> | <MINUS> ) multiplicative_expr() )*
}
void multiplicative_expr() :
{}
{
unary_expr() ( ( <MULTIPLY> | <DIV> | <MOD> ) unary_expr() )*
}
void unary_expr() :
{}
{
[ <MINUS> ] union_expr()
}
void union_expr() :
{}
{
path_expr() ( <VBAR> path_expr() )*
}
void path_expr() :
{}
{
location_path() |
filter_expr() [ LOOKAHEAD(2) ( <SLASH> | <SLASH_SLASH> ) relative_location_path() ]
}
void filter_expr() :
{}
{
primary_expr() predicates()
}
void location_path() :
{}
{
relative_location_path() |
absolute_location_path()
}
void absolute_location_path() :
{}
{
<SLASH> [ LOOKAHEAD(2) relative_location_path() ] |
<SLASH_SLASH> relative_location_path()
}
void relative_location_path() :
{}
{
step() ( LOOKAHEAD(2) ( <SLASH> | <SLASH_SLASH> ) step() )*
}
void step() :
{}
{
axis_specifier() node_test() predicates() |
<DOT> |
<DOT_DOT>
}
void axis_specifier() :
{}
{
[ <AXIS_NAME> | <AT> ]
}
void predicates() :
{}
{
( <LSQBR> expr() "]" )*
}
void primary_expr() :
{}
{
<DOLLAR_QNAME>
| <LBR> expr() ")"
| <LITERAL>
| <NUMBER>
| function_call()
}
void function_call() :
{}
{
<FUNCTION_NAME> opt_args() ")"
}
void opt_args() :
{}
{
[ args() ]
}
void args() :
{}
{
expr() ( "," expr() )*
}
void node_test() :
{}
{
<QNAME>
| <STAR>
| <NAME_COLON_STAR>
| <PI> opt_literal() ")"
| <COMMENT> ")"
| <TEXT> ")"
| <NODE> ")"
}
void opt_literal() :
{}
{
[ <LITERAL> ]
}