File: System\Xml\Xsl\XPath\XPathParser.cs
Project: ndp\fx\src\XmlUtils\System.Data.SqlXml.csproj (System.Data.SqlXml)
//------------------------------------------------------------------------------
// <copyright file="XPathParser.cs" company="Microsoft">
//     Copyright (c) Microsoft Corporation.  All rights reserved.
// </copyright>
// <owner current="true" primary="true">Microsoft</owner>
//------------------------------------------------------------------------------
 
using System.Collections.Generic;
using System.Diagnostics;
 
namespace System.Xml.Xsl.XPath {
    using Res           = System.Xml.Utils.Res;
    using XPathNodeType = System.Xml.XPath.XPathNodeType;
 
    internal class XPathParser<Node> {
        private XPathScanner        scanner;
        private IXPathBuilder<Node> builder;
        private Stack<int>          posInfo = new Stack<int>();
 
        // Six possible causes of exceptions in the builder:
        // 1. Undefined prefix in a node test.
        // 2. Undefined prefix in a variable reference, or unknown variable.
        // 3. Undefined prefix in a function call, or unknown function, or wrong number/types of arguments.
        // 4. Argument of Union operator is not a node-set.
        // 5. First argument of Predicate is not a node-set.
        // 6. Argument of Axis is not a node-set.
 
        public Node Parse(XPathScanner scanner, IXPathBuilder<Node> builder, LexKind endLex) {
            Debug.Assert(this.scanner == null && this.builder == null);
            Debug.Assert(scanner != null && builder != null);
 
            Node result     = default(Node);
            this.scanner    = scanner;
            this.builder    = builder;
            this.posInfo.Clear();
 
            try {
                builder.StartBuild();
                result = ParseExpr();
                scanner.CheckToken(endLex);
            }
            catch (XPathCompileException e) {
                if (e.queryString == null) {
                    e.queryString = scanner.Source;
                    PopPosInfo(out e.startChar, out e.endChar);
                }
                throw;
            }
            finally {
                result = builder.EndBuild(result);
#if DEBUG
                this.builder = null;
                this.scanner = null;
#endif
            }
            Debug.Assert(posInfo.Count == 0, "PushPosInfo() and PopPosInfo() calls have been unbalanced");
            return result;
        }
 
        #region Location paths and node tests
        /**************************************************************************************************/
        /*  Location paths and node tests                                                                 */
        /**************************************************************************************************/
 
        internal static bool IsStep(LexKind lexKind) {
            return (
                lexKind == LexKind.Dot    ||
                lexKind == LexKind.DotDot ||
                lexKind == LexKind.At     ||
                lexKind == LexKind.Axis   ||
                lexKind == LexKind.Star   ||
                lexKind == LexKind.Name   // NodeTest is also Name
            );
        }
 
        /*
        *   LocationPath ::= RelativeLocationPath | '/' RelativeLocationPath? | '//' RelativeLocationPath
        */
        private Node ParseLocationPath() {
            if (scanner.Kind == LexKind.Slash) {
                scanner.NextLex();
                Node opnd = builder.Axis(XPathAxis.Root, XPathNodeType.All, null, null);
 
                if (IsStep(scanner.Kind)) {
                    opnd = builder.JoinStep(opnd, ParseRelativeLocationPath());
                }
                return opnd;
            } else if (scanner.Kind == LexKind.SlashSlash) {
                scanner.NextLex();
                return builder.JoinStep(
                    builder.Axis(XPathAxis.Root, XPathNodeType.All, null, null),
                    builder.JoinStep(
                        builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null),
                        ParseRelativeLocationPath()
                    )
                );
            } else {
                return ParseRelativeLocationPath();
            }
        }
 
        /*
        *   RelativeLocationPath ::= Step (('/' | '//') Step)*
        */
        //Max depth to avoid StackOverflow
        const int MaxParseRelativePathDepth = 1024;
        private int parseRelativePath = 0;
        private Node ParseRelativeLocationPath() {
            if (++parseRelativePath > MaxParseRelativePathDepth) {
                if (System.Xml.XmlConfiguration.XsltConfigSection.LimitXPathComplexity) {
                    throw scanner.CreateException(System.Xml.Utils.Res.Xslt_InputTooComplex);
                }
            }
            Node opnd = ParseStep();
            if (scanner.Kind == LexKind.Slash) {
                scanner.NextLex();
                opnd = builder.JoinStep(opnd, ParseRelativeLocationPath());
            } else if (scanner.Kind == LexKind.SlashSlash) {
                scanner.NextLex();
                opnd = builder.JoinStep(opnd,
                    builder.JoinStep(
                        builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null),
                        ParseRelativeLocationPath()
                    )
                );
            }
            --parseRelativePath;
            return opnd;
        }
 
        /*
        *   Step ::= '.' | '..' | (AxisName '::' | '@')? NodeTest Predicate*
        */
        private Node ParseStep() {
            Node opnd;
            if (LexKind.Dot == scanner.Kind) {                  // '.'
                scanner.NextLex();
                opnd = builder.Axis(XPathAxis.Self, XPathNodeType.All, null, null);
                if (LexKind.LBracket == scanner.Kind) {
                    throw scanner.CreateException(Res.XPath_PredicateAfterDot);
                }
            } else if (LexKind.DotDot == scanner.Kind) {        // '..'
                scanner.NextLex();
                opnd = builder.Axis(XPathAxis.Parent, XPathNodeType.All, null, null);
                if (LexKind.LBracket == scanner.Kind) {
                    throw scanner.CreateException(Res.XPath_PredicateAfterDotDot);
                }
            } else {                                            // (AxisName '::' | '@')? NodeTest Predicate*
                XPathAxis axis;
                switch (scanner.Kind) {
                case LexKind.Axis:                              // AxisName '::'
                    axis = scanner.Axis;
                    scanner.NextLex();
                    scanner.NextLex();
                    break;
                case LexKind.At:                                // '@'
                    axis = XPathAxis.Attribute;
                    scanner.NextLex();
                    break;
                case LexKind.Name:
                case LexKind.Star:
                    // NodeTest must start with Name or '*'
                    axis = XPathAxis.Child;
                    break;
                default:
                    throw scanner.CreateException(Res.XPath_UnexpectedToken, scanner.RawValue);
                }
 
                opnd = ParseNodeTest(axis);
 
                while (LexKind.LBracket == scanner.Kind) {
                    opnd = builder.Predicate(opnd, ParsePredicate(), IsReverseAxis(axis));
                }
            }
            return opnd;
        }
 
        private static bool IsReverseAxis(XPathAxis axis) {
            return (
                axis == XPathAxis.Ancestor       || axis == XPathAxis.Preceding ||
                axis == XPathAxis.AncestorOrSelf || axis == XPathAxis.PrecedingSibling
            );
        }
 
        /*
        *   NodeTest ::= NameTest | ('comment' | 'text' | 'node') '(' ')' | 'processing-instruction' '('  Literal? ')'
        *   NameTest ::= '*' | NCName ':' '*' | QName
        */
        private Node ParseNodeTest(XPathAxis axis) {
            XPathNodeType nodeType;
            string        nodePrefix, nodeName;
 
            int startChar = scanner.LexStart;
            InternalParseNodeTest(scanner, axis, out nodeType, out nodePrefix, out nodeName);
            PushPosInfo(startChar, scanner.PrevLexEnd);
            Node result = builder.Axis(axis, nodeType, nodePrefix, nodeName);
            PopPosInfo();
            return result;
        }
 
        private static bool IsNodeType(XPathScanner scanner) {
            return scanner.Prefix.Length == 0 && (
                scanner.Name == "node"                   ||
                scanner.Name == "text"                   ||
                scanner.Name == "processing-instruction" ||
                scanner.Name == "comment"
            );
        }
 
        private static XPathNodeType PrincipalNodeType(XPathAxis axis) {
            return (
                axis == XPathAxis.Attribute ? XPathNodeType.Attribute :
                axis == XPathAxis.Namespace ? XPathNodeType.Namespace :
                /*else*/                      XPathNodeType.Element
            );
        }
 
        internal static void InternalParseNodeTest(XPathScanner scanner, XPathAxis axis, out XPathNodeType nodeType, out string nodePrefix, out string nodeName) {
            switch (scanner.Kind) {
            case LexKind.Name :
                if (scanner.CanBeFunction && IsNodeType(scanner)) {
                    nodePrefix = null;
                    nodeName   = null;
                    switch (scanner.Name) {
                    case "comment": nodeType = XPathNodeType.Comment; break;
                    case "text"   : nodeType = XPathNodeType.Text;    break;
                    case "node"   : nodeType = XPathNodeType.All;     break;
                    default:
                        Debug.Assert(scanner.Name == "processing-instruction");
                        nodeType = XPathNodeType.ProcessingInstruction;
                        break;
                    }
 
                    scanner.NextLex();
                    scanner.PassToken(LexKind.LParens);
 
                    if (nodeType == XPathNodeType.ProcessingInstruction) {
                        if (scanner.Kind != LexKind.RParens) {  // 'processing-instruction' '(' Literal ')'
                            scanner.CheckToken(LexKind.String);
                            // It is not needed to set nodePrefix here, but for our current implementation
                            // comparing whole QNames is faster than comparing just local names
                            nodePrefix = string.Empty;
                            nodeName   = scanner.StringValue;
                            scanner.NextLex();
                        }
                    }
 
                    scanner.PassToken(LexKind.RParens);
                } else {
                    nodePrefix = scanner.Prefix;
                    nodeName   = scanner.Name;
                    nodeType   = PrincipalNodeType(axis);
                    scanner.NextLex();
                    if (nodeName == "*") {
                        nodeName = null;
                    }
                }
                break;
            case LexKind.Star :
                nodePrefix = null;
                nodeName   = null;
                nodeType   = PrincipalNodeType(axis);
                scanner.NextLex();
                break;
            default :
                throw scanner.CreateException(Res.XPath_NodeTestExpected, scanner.RawValue);
            }
        }
 
        /*
        *   Predicate ::= '[' Expr ']'
        */
        private Node ParsePredicate() {
            scanner.PassToken(LexKind.LBracket);
            Node opnd = ParseExpr();
            scanner.PassToken(LexKind.RBracket);
            return opnd;
        }
        #endregion
 
        #region Expressions
        /**************************************************************************************************/
        /*  Expressions                                                                                   */
        /**************************************************************************************************/
 
        /*
        *   Expr   ::= OrExpr
        *   OrExpr ::= AndExpr ('or' AndExpr)*
        *   AndExpr ::= EqualityExpr ('and' EqualityExpr)*
        *   EqualityExpr ::= RelationalExpr (('=' | '!=') RelationalExpr)*
        *   RelationalExpr ::= AdditiveExpr (('<' | '>' | '<=' | '>=') AdditiveExpr)*
        *   AdditiveExpr ::= MultiplicativeExpr (('+' | '-') MultiplicativeExpr)*
        *   MultiplicativeExpr ::= UnaryExpr (('*' | 'div' | 'mod') UnaryExpr)*
        *   UnaryExpr ::= ('-')* UnionExpr
        */
        private Node ParseExpr() {
            return ParseSubExpr(/*callerPrec:*/0);
        }
 
        //Max depth to avoid StackOverflow
        //limit the recursive call from ParseSubExpr -> ParseSubExpr
        //and also ParseSubExpr->ParseUnionExpr->ParsePathExpr->...->ParseExpr->ParseSubExpr
        const int MaxParseSubExprDepth = 1024;
        private int parseSubExprDepth = 0;
        private Node ParseSubExpr(int callerPrec) {
            if (++parseSubExprDepth > MaxParseSubExprDepth) {
                if (System.Xml.XmlConfiguration.XsltConfigSection.LimitXPathComplexity) {
                    throw scanner.CreateException(System.Xml.Utils.Res.Xslt_InputTooComplex);
                }
            }
 
            XPathOperator op;
            Node opnd;
 
            // Check for unary operators
            if (scanner.Kind == LexKind.Minus) {
                op = XPathOperator.UnaryMinus;
                int opPrec = XPathOperatorPrecedence[(int)op];
                scanner.NextLex();
                opnd = builder.Operator(op, ParseSubExpr(opPrec), default(Node));
            } else {
                opnd = ParseUnionExpr();
            }
 
            // Process binary operators
            while (true) {
                op = (scanner.Kind <= LexKind.LastOperator) ? (XPathOperator)scanner.Kind : XPathOperator.Unknown;
                int opPrec = XPathOperatorPrecedence[(int)op];
                if (opPrec <= callerPrec) {
                    break;
                }
 
                // Operator's precedence is greater than the one of our caller, so process it here
                scanner.NextLex();
                opnd = builder.Operator(op, opnd, ParseSubExpr(/*callerPrec:*/opPrec));
            }
            --parseSubExprDepth;
            return opnd;
        }
 
        private static int[] XPathOperatorPrecedence = {
            /*Unknown    */ 0,
            /*Or         */ 1,
            /*And        */ 2,
            /*Eq         */ 3,
            /*Ne         */ 3,
            /*Lt         */ 4,
            /*Le         */ 4,
            /*Gt         */ 4,
            /*Ge         */ 4,
            /*Plus       */ 5,
            /*Minus      */ 5,
            /*Multiply   */ 6,
            /*Divide     */ 6,
            /*Modulo     */ 6,
            /*UnaryMinus */ 7,
            /*Union      */ 8,  // Not used
        };
 
        /*
        *   UnionExpr ::= PathExpr ('|' PathExpr)*
        */
        private Node ParseUnionExpr() {
            int startChar = scanner.LexStart;
            Node opnd1 = ParsePathExpr();
 
            if (scanner.Kind == LexKind.Union) {
                PushPosInfo(startChar, scanner.PrevLexEnd);
                opnd1 = builder.Operator(XPathOperator.Union, default(Node), opnd1);
                PopPosInfo();
 
                while (scanner.Kind == LexKind.Union) {
                    scanner.NextLex();
                    startChar = scanner.LexStart;
                    Node opnd2 = ParsePathExpr();
                    PushPosInfo(startChar, scanner.PrevLexEnd);
                    opnd1 = builder.Operator(XPathOperator.Union, opnd1, opnd2);
                    PopPosInfo();
                }
            }
            return opnd1;
        }
 
        /*
        *   PathExpr ::= LocationPath | FilterExpr (('/' | '//') RelativeLocationPath )?
        */
        private Node ParsePathExpr() {
            // Here we distinguish FilterExpr from LocationPath - the former starts with PrimaryExpr
            if (IsPrimaryExpr()) {
                int startChar = scanner.LexStart;
                Node opnd = ParseFilterExpr();
                int endChar = scanner.PrevLexEnd;
 
                if (scanner.Kind == LexKind.Slash) {
                    scanner.NextLex();
                    PushPosInfo(startChar, endChar);
                    opnd = builder.JoinStep(opnd, ParseRelativeLocationPath());
                    PopPosInfo();
                } else if (scanner.Kind == LexKind.SlashSlash) {
                    scanner.NextLex();
                    PushPosInfo(startChar, endChar);
                    opnd = builder.JoinStep(opnd,
                        builder.JoinStep(
                            builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null),
                            ParseRelativeLocationPath()
                        )
                    );
                    PopPosInfo();
                }
                return opnd;
            } else {
                return ParseLocationPath();
            }
        }
 
        /*
        *   FilterExpr ::= PrimaryExpr Predicate*
        */
        private Node ParseFilterExpr() {
            int startChar = scanner.LexStart;
            Node opnd = ParsePrimaryExpr();
            int endChar = scanner.PrevLexEnd;
 
            while (scanner.Kind == LexKind.LBracket) {
                PushPosInfo(startChar, endChar);
                opnd = builder.Predicate(opnd, ParsePredicate(), /*reverseStep:*/false);
                PopPosInfo();
            }
            return opnd;
        }
 
        private bool IsPrimaryExpr() {
            return (
                scanner.Kind == LexKind.String  ||
                scanner.Kind == LexKind.Number  ||
                scanner.Kind == LexKind.Dollar  ||
                scanner.Kind == LexKind.LParens ||
                scanner.Kind == LexKind.Name && scanner.CanBeFunction && !IsNodeType(scanner)
            );
        }
 
        /*
        *   PrimaryExpr ::= Literal | Number | VariableReference | '(' Expr ')' | FunctionCall
        */
        private Node ParsePrimaryExpr() {
            Debug.Assert(IsPrimaryExpr());
            Node opnd;
            switch (scanner.Kind) {
            case LexKind.String:
                opnd = builder.String(scanner.StringValue);
                scanner.NextLex();
                break;
            case LexKind.Number:
                opnd = builder.Number(XPathConvert.StringToDouble(scanner.RawValue));
                scanner.NextLex();
                break;
            case LexKind.Dollar:
                int startChar = scanner.LexStart;
                scanner.NextLex();
                scanner.CheckToken(LexKind.Name);
                PushPosInfo(startChar, scanner.LexStart + scanner.LexSize);
                opnd = builder.Variable(scanner.Prefix, scanner.Name);
                PopPosInfo();
                scanner.NextLex();
                break;
            case LexKind.LParens:
                scanner.NextLex();
                opnd = ParseExpr();
                scanner.PassToken(LexKind.RParens);
                break;
            default:
                Debug.Assert(
                    scanner.Kind == LexKind.Name && scanner.CanBeFunction && !IsNodeType(scanner),
                    "IsPrimaryExpr() returned true, but the lexeme is not recognized"
                );
                opnd = ParseFunctionCall();
                break;
            }
            return opnd;
        }
 
        /*
        *   FunctionCall ::= FunctionName '(' (Expr (',' Expr)* )? ')'
        */
        private Node ParseFunctionCall() {
            List<Node> argList = new List<Node>();
            string name   = scanner.Name;
            string prefix = scanner.Prefix;
            int startChar = scanner.LexStart;
 
            scanner.PassToken(LexKind.Name);
            scanner.PassToken(LexKind.LParens);
 
            if (scanner.Kind != LexKind.RParens) {
                while (true) {
                    argList.Add(ParseExpr());
                    if (scanner.Kind != LexKind.Comma) {
                        scanner.CheckToken(LexKind.RParens);
                        break;
                    }
                    scanner.NextLex();  // move off the ','
                }
            }
 
            scanner.NextLex();          // move off the ')'
            PushPosInfo(startChar, scanner.PrevLexEnd);
            Node result = builder.Function(prefix, name, argList);
            PopPosInfo();
            return result;
        }
        #endregion
 
        /**************************************************************************************************/
        /*  Helper methods                                                                                */
        /**************************************************************************************************/
 
        private void PushPosInfo(int startChar, int endChar) {
            posInfo.Push(startChar);
            posInfo.Push(endChar);
        }
 
        private void PopPosInfo() {
            posInfo.Pop();
            posInfo.Pop();
        }
 
        private void PopPosInfo(out int startChar, out int endChar) {
            endChar   = posInfo.Pop();
            startChar = posInfo.Pop();
        }
    }
}