File: System\Data\Services\Parsing\ExpressionLexer.cs
Project: ndp\fx\src\DataWeb\Server\System.Data.Services.csproj (System.Data.Services)
//---------------------------------------------------------------------
// <copyright file="ExpressionLexer.cs" company="Microsoft">
//      Copyright (c) Microsoft Corporation.  All rights reserved.
// </copyright>
// <summary>
//      Provides a type to tokenize text.
// </summary>
//
// @owner  Microsoft
//---------------------------------------------------------------------
 
namespace System.Data.Services.Parsing
{
    using System;
    using System.Diagnostics;
    using System.Text;
 
    /// <summary>Use this class to parse an expression in the Astoria URI format.</summary>
    /// <remarks>
    /// Literals (non-normative "handy" reference - see spec for correct expression):
    /// Null        null
    /// Boolean     true | false
    /// Int32       (digit+)
    /// Int64       (digit+)(L|l)
    /// Decimal     (digit+ ['.' digit+])(M|m)
    /// Float       (digit+ ['.' digit+][e|E [+|-] digit+)(f|F)
    /// Double      (digit+ ['.' digit+][e|E [+|-] digit+)
    /// String      "'" .* "'"
    /// DateTime    datetime"'"dddd-dd-dd[T|' ']dd:mm[ss[.fffffff]]"'"
    /// Binary      (binary|X)'digit*'
    /// GUID        guid'digit*'
    /// </remarks>
    [DebuggerDisplay("ExpressionLexer ({text} @ {textPos} [{token}]")]
    internal class ExpressionLexer
    {
        #region Private fields.
 
        /// <summary>Suffix for single literals.</summary>
        private const char SingleSuffixLower = 'f';
 
        /// <summary>Suffix for single literals.</summary>
        private const char SingleSuffixUpper = 'F';
 
        /// <summary>Text being parsed.</summary>
        private readonly string text;
 
        /// <summary>Length of text being parsed.</summary>
        private readonly int textLen;
 
        /// <summary>Position on text being parsed.</summary>
        private int textPos;
 
        /// <summary>Character being processed.</summary>
        private char ch;
 
        /// <summary>Token being processed.</summary>
        private Token token;
 
        #endregion Private fields.
 
        #region Constructors.
 
        /// <summary>Initializes a new <see cref="ExpressionLexer"/>.</summary>
        /// <param name="expression">Expression to parse.</param>
        internal ExpressionLexer(string expression)
        {
            Debug.Assert(expression != null, "expression != null");
 
            this.text = expression;
            this.textLen = this.text.Length;
            this.SetTextPos(0);
            this.NextToken();
        }
 
        #endregion Constructors.
 
        #region Internal properties.
 
        /// <summary>Token being processed.</summary>
        internal Token CurrentToken
        {
            get { return this.token; }
            set { this.token = value; }
        }
 
        /// <summary>Text being parsed.</summary>
        internal string ExpressionText
        {
            get { return this.text; }
        }
 
        /// <summary>Position on text being parsed.</summary>
        internal int Position
        {
            get { return this.token.Position; }
        }
 
        #endregion Internal properties.
 
        #region Internal methods.
 
        /// <summary>Whether the specified token identifier is a numeric literal.</summary>
        /// <param name="id">Token to check.</param>
        /// <returns>true if it's a numeric literal; false otherwise.</returns>
        internal static bool IsNumeric(TokenId id)
        {
            return 
                id == TokenId.IntegerLiteral || id == TokenId.DecimalLiteral ||
                id == TokenId.DoubleLiteral || id == TokenId.Int64Literal ||
                id == TokenId.SingleLiteral;
        }
 
        /// <summary>Reads the next token, skipping whitespace as necessary.</summary>
        internal void NextToken()
        {
            while (Char.IsWhiteSpace(this.ch))
            {
                this.NextChar();
            }
 
            TokenId t;
            int tokenPos = this.textPos;
            switch (this.ch)
            {
                case '(':
                    this.NextChar();
                    t = TokenId.OpenParen;
                    break;
                case ')':
                    this.NextChar();
                    t = TokenId.CloseParen;
                    break;
                case ',':
                    this.NextChar();
                    t = TokenId.Comma;
                    break;
                case '-':
                    bool hasNext = this.textPos + 1 < this.textLen;
                    if (hasNext && Char.IsDigit(this.text[this.textPos + 1]))
                    {
                        this.NextChar();
                        t = this.ParseFromDigit();
                        if (IsNumeric(t))
                        {
                            break;
                        }
                        
                        // If it looked like a numeric but wasn't (because it was a binary 0x... value for example), 
                        // we'll rewind and fall through to a simple '-' token.
                        this.SetTextPos(tokenPos);
                    }
                    else if (hasNext && this.text[tokenPos + 1] == XmlConstants.XmlInfinityLiteral[0])
                    {
                        this.NextChar();
                        this.ParseIdentifier();
                        string currentIdentifier = this.text.Substring(tokenPos + 1, this.textPos - tokenPos - 1);
 
                        if (IsInfinityLiteralDouble(currentIdentifier))
                        {
                            t = TokenId.DoubleLiteral;
                            break;
                        }
                        else if (IsInfinityLiteralSingle(currentIdentifier))
                        {
                            t = TokenId.SingleLiteral;
                            break;
                        }
 
                        // If it looked like '-INF' but wasn't we'll rewind and fall through to a simple '-' token.
                        this.SetTextPos(tokenPos);
                    }
 
                    this.NextChar();
                    t = TokenId.Minus;
                    break;
                case '=':
                    this.NextChar();
                    t = TokenId.Equal;
                    break;
                case '/':
                    this.NextChar();
                    t = TokenId.Slash;
                    break;
                case '?':
                    this.NextChar();
                    t = TokenId.Question;
                    break;
                case '.':
                    this.NextChar();
                    t = TokenId.Dot;
                    break;
                case '\'':
                    char quote = this.ch;
                    do
                    {
                        this.NextChar();
                        while (this.textPos < this.textLen && this.ch != quote)
                        {
                            this.NextChar();
                        }
 
                        if (this.textPos == this.textLen)
                        {
                            throw ParseError(Strings.RequestQueryParser_UnterminatedStringLiteral(this.textPos, this.text));
                        }
 
                        this.NextChar();
                    } 
                    while (this.ch == quote);
                    t = TokenId.StringLiteral;
                    break;
                case '*':
                    this.NextChar();
                    t = TokenId.Star;
                    break;
                default:
                    if (Char.IsLetter(this.ch) || this.ch == '_')
                    {
                        this.ParseIdentifier();
                        t = TokenId.Identifier;
                        break;
                    }
 
                    if (Char.IsDigit(this.ch))
                    {
                        t = this.ParseFromDigit();
                        break;
                    }
                    
                    if (this.textPos == this.textLen)
                    {
                        t = TokenId.End;
                        break;
                    }
 
                    throw ParseError(Strings.RequestQueryParser_InvalidCharacter(this.ch, this.textPos));
            }
 
            this.token.Id = t;
            this.token.Text = this.text.Substring(tokenPos, this.textPos - tokenPos);
            this.token.Position = tokenPos;
 
            // Handle type-prefixed literals such as binary, datetime or guid.
            this.HandleTypePrefixedLiterals();
 
            // Handle keywords.
            if (this.token.Id == TokenId.Identifier)
            {
                if (IsInfinityOrNaNDouble(this.token.Text))
                {
                    this.token.Id = TokenId.DoubleLiteral;
                }
                else if (IsInfinityOrNanSingle(this.token.Text))
                {
                    this.token.Id = TokenId.SingleLiteral;
                }
                else if (this.token.Text == ExpressionConstants.KeywordTrue || this.token.Text == ExpressionConstants.KeywordFalse)
                {
                    this.token.Id = TokenId.BooleanLiteral;
                }
                else if (this.token.Text == ExpressionConstants.KeywordNull)
                {
                    this.token.Id = TokenId.NullLiteral;
                }
            }
        }
 
        /// <summary>
        /// Starting from an identifier, reads a sequence of dots and 
        /// identifiers, and returns the text for it, with whitespace 
        /// stripped.
        /// </summary>
        /// <returns>The dotted identifier starting at the current identifie.</returns>
        internal string ReadDottedIdentifier()
        {
            this.ValidateToken(TokenId.Identifier);
            StringBuilder builder = null;
            string result = this.CurrentToken.Text;
            this.NextToken();
            while (this.CurrentToken.Id == TokenId.Dot)
            {
                this.NextToken();
                this.ValidateToken(TokenId.Identifier);
                if (builder == null)
                {
                    builder = new StringBuilder(result, result.Length + 1 + this.CurrentToken.Text.Length);
                }
 
                builder.Append('.');
                builder.Append(this.CurrentToken.Text);                
                this.NextToken();
            }
 
            if (builder != null)
            {
                result = builder.ToString();
            }
 
            return result;
        }
 
        /// <summary>Returns the next token without advancing the lexer.</summary>
        /// <returns>The next token.</returns>
        internal Token PeekNextToken()
        {
            int savedTextPos = this.textPos;
            char savedChar = this.ch;
            Token savedToken = this.token;
 
            this.NextToken();
            Token result = this.token;
 
            this.textPos = savedTextPos;
            this.ch = savedChar;
            this.token = savedToken;
 
            return result;
        }
 
        /// <summary>Validates the current token is of the specified kind.</summary>
        /// <param name="t">Expected token kind.</param>
        internal void ValidateToken(TokenId t)
        {
            if (this.token.Id != t)
            {
                throw ParseError(Strings.RequestQueryParser_SyntaxError(this.textPos));
            }
        }
 
        #endregion Internal methods.
 
        #region Private methods.
 
        /// <summary>Checks if the <paramref name="tokenText"/> is INF or NaN.</summary>
        /// <param name="tokenText">Input token.</param>
        /// <returns>true if match found, false otherwise.</returns>
        private static bool IsInfinityOrNaNDouble(string tokenText)
        {
            if (tokenText.Length == 3)
            {
                if (tokenText[0] == XmlConstants.XmlInfinityLiteral[0])
                {
                    return IsInfinityLiteralDouble(tokenText);
                }
                else
                if (tokenText[0] == XmlConstants.XmlNaNLiteral[0])
                {
                    return String.CompareOrdinal(tokenText, 0, XmlConstants.XmlNaNLiteral, 0, 3) == 0;
                }
            }
            
            return false;
        }
 
        /// <summary>
        /// Checks whether <paramref name="text"/> equals to 'INF'
        /// </summary>
        /// <param name="text">Text to look in.</param>
        /// <returns>true if the substring is equal using an ordinal comparison; false otherwise.</returns>
        private static bool IsInfinityLiteralDouble(string text)
        {
            Debug.Assert(text != null, "text != null");
            return String.CompareOrdinal(text, 0, XmlConstants.XmlInfinityLiteral, 0, text.Length) == 0;
        }
 
        /// <summary>Checks if the <paramref name="tokenText"/> is INFf/INFF or NaNf/NaNF.</summary>
        /// <param name="tokenText">Input token.</param>
        /// <returns>true if match found, false otherwise.</returns>
        private static bool IsInfinityOrNanSingle(string tokenText)
        {
            if (tokenText.Length == 4)
            {
                if (tokenText[0] == XmlConstants.XmlInfinityLiteral[0])
                {
                    return IsInfinityLiteralSingle(tokenText);
                }
                else if (tokenText[0] == XmlConstants.XmlNaNLiteral[0])
                {
                    return (tokenText[3] == ExpressionLexer.SingleSuffixLower || tokenText[3] == ExpressionLexer.SingleSuffixUpper) && 
                            String.CompareOrdinal(tokenText, 0, XmlConstants.XmlNaNLiteral, 0, 3) == 0;
                }
            }
            
            return false;
        }
 
        /// <summary>
        /// Checks whether <paramref name="text"/> EQUALS to 'INFf' or 'INFF' at position 
        /// </summary>
        /// <param name="text">Text to look in.</param>
        /// <returns>true if the substring is equal using an ordinal comparison; false otherwise.</returns>
        private static bool IsInfinityLiteralSingle(string text)
        {
            Debug.Assert(text != null, "text != null");
            return text.Length == 4 &&
                   (text[3] == ExpressionLexer.SingleSuffixLower || text[3] == ExpressionLexer.SingleSuffixUpper) &&
                   String.CompareOrdinal(text, 0, XmlConstants.XmlInfinityLiteral, 0, 3) == 0;
        }
 
        /// <summary>Creates an exception for a parse error.</summary>
        /// <param name="message">Message text.</param>
        /// <returns>A new Exception.</returns>
        private static Exception ParseError(string message)
        {
            return DataServiceException.CreateSyntaxError(message);
        }
 
        /// <summary>Handles lexemes that are formed by an identifier followed by a quoted string.</summary>
        /// <remarks>This method modified the token field as necessary.</remarks>
        private void HandleTypePrefixedLiterals()
        {
            TokenId id = this.token.Id;
            if (id != TokenId.Identifier)
            {
                return;
            }
 
            bool quoteFollows = this.ch == '\'';
            if (!quoteFollows)
            {
                return;
            }
 
            string tokenText = this.token.Text;
            if (String.Equals(tokenText, "datetime", StringComparison.OrdinalIgnoreCase))
            {
                id = TokenId.DateTimeLiteral;
            }
            else if (String.Equals(tokenText, "guid", StringComparison.OrdinalIgnoreCase))
            {
                id = TokenId.GuidLiteral;
            }
            else if (String.Equals(tokenText, "binary", StringComparison.OrdinalIgnoreCase) || tokenText == "X" || tokenText == "x")
            {
                id = TokenId.BinaryLiteral;
            }
            else
            {
                return;
            }
 
            int tokenPos = this.token.Position;
            do
            {
                this.NextChar();
            }
            while (this.ch != '\0' && this.ch != '\'');
 
            if (this.ch == '\0')
            {
                throw ParseError(Strings.RequestQueryParser_UnterminatedLiteral(this.textPos, this.text));
            }
 
            this.NextChar();
            this.token.Id = id;
            this.token.Text = this.text.Substring(tokenPos, this.textPos - tokenPos);
        }
 
        /// <summary>Advanced to the next character.</summary>
        private void NextChar()
        {
            if (this.textPos < this.textLen)
            {
                this.textPos++;
            }
 
            this.ch = this.textPos < this.textLen ? this.text[this.textPos] : '\0';
        }
 
        /// <summary>Parses a token that starts with a digit.</summary>
        /// <returns>The kind of token recognized.</returns>
        private TokenId ParseFromDigit()
        {
            Debug.Assert(Char.IsDigit(this.ch), "Char.IsDigit(this.ch)");
            TokenId result;
            char startChar = this.ch;
            this.NextChar();
            if (startChar == '0' && this.ch == 'x' || this.ch == 'X')
            {
                result = TokenId.BinaryLiteral;
                do
                {
                    this.NextChar();
                }
                while (WebConvert.IsCharHexDigit(this.ch));
            }
            else
            {
                result = TokenId.IntegerLiteral;
                while (Char.IsDigit(this.ch))
                {
                    this.NextChar();
                }
 
                if (this.ch == '.')
                {
                    result = TokenId.DoubleLiteral;
                    this.NextChar();
                    this.ValidateDigit();
 
                    do
                    {
                        this.NextChar();
                    }
                    while (Char.IsDigit(this.ch));
                }
 
                if (this.ch == 'E' || this.ch == 'e')
                {
                    result = TokenId.DoubleLiteral;
                    this.NextChar();
                    if (this.ch == '+' || this.ch == '-')
                    {
                        this.NextChar();
                    }
 
                    this.ValidateDigit();
                    do
                    {
                        this.NextChar();
                    }
                    while (Char.IsDigit(this.ch));
                }
 
                if (this.ch == 'M' || this.ch == 'm')
                {
                    result = TokenId.DecimalLiteral;
                    this.NextChar();
                }
                else
                if (this.ch == 'd' || this.ch == 'D')
                {
                    result = TokenId.DoubleLiteral;
                    this.NextChar();
                }
                else if (this.ch == 'L' || this.ch == 'l')
                {
                    result = TokenId.Int64Literal;
                    this.NextChar();
                }
                else if (this.ch == 'f' || this.ch == 'F')
                {
                    result = TokenId.SingleLiteral;
                    this.NextChar();
                }
            }
 
            return result;
        }
 
        /// <summary>Parses an identifier by advancing the current character.</summary>
        private void ParseIdentifier()
        {
            Debug.Assert(Char.IsLetter(this.ch) || this.ch == '_', "Char.IsLetter(this.ch) || this.ch == '_'");
            do
            {
                this.NextChar();
            }
            while (Char.IsLetterOrDigit(this.ch) || this.ch == '_');
        }
 
        /// <summary>Sets the text position.</summary>
        /// <param name="pos">New text position.</param>
        private void SetTextPos(int pos)
        {
            this.textPos = pos;
            this.ch = this.textPos < this.textLen ? this.text[this.textPos] : '\0';
        }
 
        /// <summary>Validates the current character is a digit.</summary>
        private void ValidateDigit()
        {
            if (!Char.IsDigit(this.ch))
            {
                throw ParseError(Strings.RequestQueryParser_DigitExpected(this.textPos));
            }
        }
 
        #endregion Private methods.
    }
}