RegexParser.cs

//------------------------------------------------------------------------------
// <copyright file="RegexParser.cs" company="Microsoft">
//     Copyright (c) Microsoft Corporation.  All rights reserved.
// </copyright>                                                                
//------------------------------------------------------------------------------
 
// This RegexParser class is internal to the Regex package.
// It builds a tree of RegexNodes from a regular expression
 
// Implementation notes:
//
// It would be nice to get rid of the comment modes, since the
// ScanBlank() calls are just kind of duct-taped in.
 
 
namespace System.Text.RegularExpressions {
 
    using System.Collections;
    using System.Collections.Generic;
    using System.Globalization;
        
    internal sealed class RegexParser {
        internal RegexNode _stack;
        internal RegexNode _group;
        internal RegexNode _alternation;
        internal RegexNode _concatenation;
        internal RegexNode _unit;
 
        internal String _pattern;
        internal int _currentPos;
        internal CultureInfo _culture;
        
        internal int _autocap;
        internal int _capcount;
        internal int _captop;
        internal int _capsize;
#if SILVERLIGHT
        internal Dictionary<Int32, Int32> _caps;
        internal Dictionary<String, Int32> _capnames;
#else
        internal Hashtable _caps;
        internal Hashtable _capnames;
#endif
        internal Int32[] _capnumlist;
        internal List<String> _capnamelist;
 
        internal RegexOptions _options;
        internal List<RegexOptions> _optionsStack;
 
        internal bool _ignoreNextParen = false;
        
        internal const int MaxValueDiv10 = Int32.MaxValue / 10;
        internal const int MaxValueMod10 = Int32.MaxValue % 10;
 
        /*
         * This static call constructs a RegexTree from a regular expression
         * pattern string and an option string.
         *
         * The method creates, drives, and drops a parser instance.
         */
        internal static RegexTree Parse(String re, RegexOptions op) {
            RegexParser p;
            RegexNode root;
            String[] capnamelist;
 
            p = new RegexParser((op & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture);
 
            p._options = op;
 
            p.SetPattern(re);
            p.CountCaptures();
            p.Reset(op);
            root = p.ScanRegex();
 
            if (p._capnamelist == null)
                capnamelist = null;
            else
                capnamelist = p._capnamelist.ToArray();
 
            return new RegexTree(root, p._caps, p._capnumlist, p._captop, p._capnames, capnamelist, op);
        }
 
        /*
         * This static call constructs a flat concatenation node given
         * a replacement pattern.
         */
#if SILVERLIGHT
        internal static RegexReplacement ParseReplacement(String rep, Dictionary<Int32, Int32> caps, int capsize, Dictionary<String, Int32> capnames, RegexOptions op) {
#else
        internal static RegexReplacement ParseReplacement(String rep, Hashtable caps, int capsize, Hashtable capnames, RegexOptions op) {
#endif
            RegexParser p;
            RegexNode root;
 
            p = new RegexParser((op & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture);
 
            p._options = op;
 
            p.NoteCaptures(caps, capsize, capnames);
            p.SetPattern(rep);
            root = p.ScanReplacement();
 
            return new RegexReplacement(rep, root, caps);
        }
 
        /*
         * Escapes all metacharacters (including |,(,),[,{,|,^,$,*,+,?,\, spaces and #)
         */
        internal static String Escape(String input) {
            for (int i = 0; i < input.Length; i++) {
                if (IsMetachar(input[i])) {
                    StringBuilder sb = new StringBuilder();
                    char ch = input[i];
                    int lastpos;
 
                    sb.Append(input, 0, i);
                    do {
                        sb.Append('\\');
                        switch (ch) {
                            case '\n':
                                ch = 'n';
                                break;
                            case '\r':
                                ch = 'r';
                                break;
                            case '\t':
                                ch = 't';
                                break;
                            case '\f':
                                ch = 'f';
                                break;
                        }
                        sb.Append(ch);
                        i++;
                        lastpos = i;
 
                        while (i < input.Length) {
                            ch = input[i];
                            if (IsMetachar(ch))
                                break;
 
                            i++;
                        }
 
                        sb.Append(input, lastpos, i - lastpos);
 
                    } while (i < input.Length);
 
                    return sb.ToString();
                }
            }
 
            return input;
        }
 
        /*
         * Escapes all metacharacters (including (,),[,],{,},|,^,$,*,+,?,\, spaces and #)
         */
        internal static String Unescape(String input) {
            for (int i = 0; i < input.Length; i++) {
                if (input[i] == '\\') {
                    StringBuilder sb = new StringBuilder();
                    RegexParser p = new RegexParser(CultureInfo.InvariantCulture);
                    int lastpos;
                    p.SetPattern(input);
 
                    sb.Append(input, 0, i);
                    do {
                        i++;
                        p.Textto(i);
                        if (i < input.Length)
                            sb.Append(p.ScanCharEscape());
                        i = p.Textpos();
                        lastpos = i;
                        while (i < input.Length && input[i] != '\\')
                            i++;
                        sb.Append(input, lastpos, i - lastpos);
 
                    } while (i < input.Length);
 
                    return sb.ToString();
                }
            }
 
            return input;
        }
 
        /*
         * Private constructor.
         */
        private RegexParser(CultureInfo culture) {
            _culture = culture;
            _optionsStack = new List<RegexOptions>();
#if SILVERLIGHT
            _caps = new Dictionary<Int32,Int32>();
#else
            _caps = new Hashtable();
#endif
 
        }
 
        /*
         * Drops a string into the pattern buffer.
         */
        internal void SetPattern(String Re) {
            if (Re == null)
                Re = String.Empty;
            _pattern = Re;
            _currentPos = 0;
        }
 
        /*
         * Resets parsing to the beginning of the pattern.
         */
        internal void Reset(RegexOptions topopts) {
            _currentPos = 0;
            _autocap = 1;
            _ignoreNextParen = false;
 
            if (_optionsStack.Count > 0)
                _optionsStack.RemoveRange(0, _optionsStack.Count - 1);
 
            _options = topopts;
            _stack = null;
        }
 
        /*
         * The main parsing function.
         */
        internal RegexNode ScanRegex() {
            char ch = '@'; // nonspecial ch, means at beginning
            bool isQuantifier = false;
 
            StartGroup(new RegexNode(RegexNode.Capture, _options, 0, -1));
 
            while (CharsRight() > 0) {
                bool wasPrevQuantifier = isQuantifier;
                isQuantifier = false;
 
                ScanBlank();
 
                int startpos = Textpos();
 
                // move past all of the normal characters.  We'll stop when we hit some kind of control character, 
                // or if IgnorePatternWhiteSpace is on, we'll stop when we see some whitespace. 
                if (UseOptionX())
                    while (CharsRight() > 0 && (!IsStopperX(ch = RightChar()) || ch == '{' && !IsTrueQuantifier()))
                        MoveRight();
                else
                    while (CharsRight() > 0 && (!IsSpecial(ch = RightChar()) || ch == '{' && !IsTrueQuantifier()))
                        MoveRight();
 
                int endpos = Textpos();
 
                ScanBlank();
 
                if (CharsRight() == 0)
                    ch = '!'; // nonspecial, means at end
                else if (IsSpecial(ch = RightChar())) {
                    isQuantifier = IsQuantifier(ch);
                    MoveRight();
                } else
                    ch = ' '; // nonspecial, means at ordinary char
 
                if (startpos < endpos) {
                    int cchUnquantified = endpos - startpos - (isQuantifier ? 1 : 0);
 
                    wasPrevQuantifier = false;
 
                    if (cchUnquantified > 0)
                        AddConcatenate(startpos, cchUnquantified, false);
 
                    if (isQuantifier)
                        AddUnitOne(CharAt(endpos - 1));
                }
 
                switch (ch) {
                    case '!':
                        goto BreakOuterScan;
 
                    case ' ':
                        goto ContinueOuterScan;
 
                    case '[':
                        AddUnitSet(ScanCharClass(UseOptionI()).ToStringClass());
                        break;
 
                    case '(': {
                            RegexNode grouper;
 
                            PushOptions();
 
                            if (null == (grouper = ScanGroupOpen())) {
                                PopKeepOptions();
                            }
                            else {
                                PushGroup();
                                StartGroup(grouper);
                            }
                        }
                        continue;
 
                    case '|':
                        AddAlternate();
                        goto ContinueOuterScan;
 
                    case ')':
                        if (EmptyStack())
                            throw MakeException(SR.GetString(SR.TooManyParens));
 
                        AddGroup();
                        PopGroup();
                        PopOptions();
 
                        if (Unit() == null)
                            goto ContinueOuterScan;
                        break;
 
                    case '\\':
                        AddUnitNode(ScanBackslash());
                        break;
 
                    case '^':
                        AddUnitType(UseOptionM() ? RegexNode.Bol : RegexNode.Beginning);
                        break;
 
                    case '$':
                        AddUnitType(UseOptionM() ? RegexNode.Eol : RegexNode.EndZ);
                        break;
 
                    case '.':
                        if (UseOptionS())
                            AddUnitSet(RegexCharClass.AnyClass);
                        else
                            AddUnitNotone('\n');
                        break;
 
                    case '{':
                    case '*':
                    case '+':
                    case '?':
                        if (Unit() == null)
                            throw MakeException(wasPrevQuantifier ?
                                                SR.GetString(SR.NestedQuantify, ch.ToString()) :
                                                SR.GetString(SR.QuantifyAfterNothing));
                        MoveLeft();
                        break;
 
                    default:
                        throw MakeException(SR.GetString(SR.InternalError));
                }
 
                ScanBlank();
 
                if (CharsRight() == 0 || !(isQuantifier = IsTrueQuantifier())) {
                    AddConcatenate();
                    goto ContinueOuterScan;
                }
 
                ch = MoveRightGetChar();
 
                // Handle quantifiers
                while (Unit() != null) {
                    int min;
                    int max;
                    bool lazy;
 
                    switch (ch) {
                        case '*':
                            min = 0;
                            max = Int32.MaxValue;
                            break;
 
                        case '?':
                            min = 0;
                            max = 1;
                            break;
 
                        case '+':
                            min = 1;
                            max = Int32.MaxValue;
                            break;
 
                        case '{': {
                                startpos = Textpos();
                                max = min = ScanDecimal();
                                if (startpos < Textpos()) {
                                    if (CharsRight() > 0 && RightChar() == ',') {
                                        MoveRight();
                                        if (CharsRight() == 0 || RightChar() == '}')
                                            max = Int32.MaxValue;
                                        else
                                            max = ScanDecimal();
                                    }
                                }
 
                                if (startpos == Textpos() || CharsRight() == 0 || MoveRightGetChar() != '}') {
                                    AddConcatenate();
                                    Textto(startpos - 1);
                                    goto ContinueOuterScan;
                                }
                            }
 
                            break;
 
                        default:
                            throw MakeException(SR.GetString(SR.InternalError));
                    }
 
                    ScanBlank();
 
                    if (CharsRight() == 0 || RightChar() != '?')
                        lazy = false;
                    else {
                        MoveRight();
                        lazy = true;
                    }
 
                    if (min > max)
                        throw MakeException(SR.GetString(SR.IllegalRange));
 
                    AddConcatenate(lazy, min, max);
                }
 
                ContinueOuterScan:
                ;
            }
 
            BreakOuterScan: 
            ;
 
            if (!EmptyStack())
                throw MakeException(SR.GetString(SR.NotEnoughParens));
 
            AddGroup();
 
            return Unit();
        }
 
        /*
         * Simple parsing for replacement patterns
         */
        internal RegexNode ScanReplacement() {
            int c;
            int startpos;
 
            _concatenation = new RegexNode(RegexNode.Concatenate, _options);
 
            for (;;) {
                c = CharsRight();
                if (c == 0)
                    break;
 
                startpos = Textpos();
 
                while (c > 0 && RightChar() != '$') {
                    MoveRight();
                    c--;
                }
 
                AddConcatenate(startpos, Textpos() - startpos, true);
 
                if (c > 0) {
                    if (MoveRightGetChar() == '$')
                        AddUnitNode(ScanDollar());
                    AddConcatenate();
                }
            }
 
            return _concatenation;
        }
 
        /*
         * Scans contents of [] (not including []'s), and converts to a
         * RegexCharClass.
         */
        internal RegexCharClass ScanCharClass(bool caseInsensitive) {
            return ScanCharClass(caseInsensitive, false);
        }
 
        /*
         * Scans contents of [] (not including []'s), and converts to a
         * RegexCharClass.
         */
        internal RegexCharClass ScanCharClass(bool caseInsensitive, bool scanOnly) {
            char    ch = '\0';
            char    chPrev = '\0';
            bool inRange = false;
            bool firstChar = true;
            bool closed = false;
 
            RegexCharClass cc;
 
            cc = scanOnly ? null : new RegexCharClass();
 
            if (CharsRight() > 0 && RightChar() == '^') {
                MoveRight();
                if (!scanOnly)
                    cc.Negate = true;
            }
 
            for ( ; CharsRight() > 0; firstChar = false) {
                bool fTranslatedChar = false;
                ch = MoveRightGetChar();
                if (ch == ']') {
                    if (!firstChar) {
                        closed = true;
                        break;
                    }
                }
                else if (ch == '\\' && CharsRight() > 0) {
 
                    switch (ch = MoveRightGetChar()) {
                        case 'D':
                        case 'd':
                            if (!scanOnly) {
                                if (inRange)
                                    throw MakeException(SR.GetString(SR.BadClassInCharRange, ch.ToString()));
                                cc.AddDigit(UseOptionE(), ch == 'D', _pattern);
                            }
                            continue;
 
                        case 'S':
                        case 's':
                            if (!scanOnly) {
                                if (inRange)
                                    throw MakeException(SR.GetString(SR.BadClassInCharRange, ch.ToString()));
                                cc.AddSpace(UseOptionE(), ch == 'S');
                            }
                            continue;
 
                        case 'W':
                        case 'w':
                            if (!scanOnly) {
                                if (inRange)
                                    throw MakeException(SR.GetString(SR.BadClassInCharRange, ch.ToString()));
 
                                cc.AddWord(UseOptionE(), ch == 'W');
                            }
                            continue;
 
                        case 'p':
                        case 'P':
                            if (!scanOnly) {
                                if (inRange)
                                    throw MakeException(SR.GetString(SR.BadClassInCharRange, ch.ToString()));
                                cc.AddCategoryFromName(ParseProperty(), (ch != 'p'), caseInsensitive, _pattern);
                            }
                            else 
                                ParseProperty();
 
                            continue;
                            
                        case '-':
                            if (!scanOnly)
                                cc.AddRange(ch, ch);
                            continue;
                            
                        default:
                            MoveLeft();
                            ch = ScanCharEscape(); // non-literal character
                            fTranslatedChar = true;
                            break;          // this break will only break out of the switch
                    }
                }
                else if (ch == '[') {
                    // This is code for Posix style properties - [:Ll:] or [:IsTibetan:].
                    // It currently doesn't do anything other than skip the whole thing!
                    if (CharsRight() > 0 && RightChar() == ':' && !inRange) {
                        String name;
                        int savePos = Textpos();
 
                        MoveRight();
                        name = ScanCapname();
                        if (CharsRight() < 2 || MoveRightGetChar() != ':' || MoveRightGetChar() != ']')
                            Textto(savePos);
                        // else lookup name (nyi)
                    }
                }
 
                
                if (inRange) {
                    inRange = false;
                    if (!scanOnly) {
                        if (ch == '[' && !fTranslatedChar && !firstChar) {
                            // We thought we were in a range, but we're actually starting a subtraction. 
                            // In that case, we'll add chPrev to our char class, skip the opening [, and
                            // scan the new character class recursively. 
                            cc.AddChar(chPrev);
                            cc.AddSubtraction(ScanCharClass(caseInsensitive, false));
 
                            if (CharsRight() > 0 && RightChar() != ']')
                                throw MakeException(SR.GetString(SR.SubtractionMustBeLast));
                        } 
                        else {
                            // a regular range, like a-z
                            if (chPrev > ch)
                                throw MakeException(SR.GetString(SR.ReversedCharRange));
                            cc.AddRange(chPrev, ch);
                        }
                    }
                }
                else if (CharsRight() >= 2 && RightChar() == '-' && RightChar(1) != ']') {
                    // this could be the start of a range
                    chPrev = ch;
                    inRange = true;
                    MoveRight();
                }
                else if (CharsRight() >= 1 && ch == '-' && !fTranslatedChar && RightChar() == '[' && !firstChar) {
                    // we aren't in a range, and now there is a subtraction.  Usually this happens
                    // only when a subtraction follows a range, like [a-z-[b]]
                    if (!scanOnly) {
                        MoveRight(1);
                        cc.AddSubtraction(ScanCharClass(caseInsensitive, false));
 
                        if (CharsRight() > 0 && RightChar() != ']')
                            throw MakeException(SR.GetString(SR.SubtractionMustBeLast));
                    }
                    else {
                        MoveRight(1);
                        ScanCharClass(caseInsensitive, true);
                    }
                }
                else {
                    if (!scanOnly)
                        cc.AddRange(ch, ch);
                }
            }
 
            if (!closed)
                throw MakeException(SR.GetString(SR.UnterminatedBracket));
 
            if (!scanOnly && caseInsensitive)
                cc.AddLowercase(_culture);
            
            return cc;
        }
 
        /*
         * Scans chars following a '(' (not counting the '('), and returns
         * a RegexNode for the type of group scanned, or null if the group
         * simply changed options (?cimsx-cimsx) or was a comment (#...).
         */
        internal RegexNode ScanGroupOpen() {
            char ch = '\0';
            int NodeType;
            char close = '>';
 
 
            // just return a RegexNode if we have:
            // 1. "(" followed by nothing
            // 2. "(x" where x != ?
            // 3. "(?)"
            if (CharsRight() == 0 || RightChar() != '?' || (RightChar() == '?' && (CharsRight() > 1 && RightChar(1) == ')'))) {
                if (UseOptionN() || _ignoreNextParen) {
                    _ignoreNextParen = false;
                    return new RegexNode(RegexNode.Group, _options);
                }
                else
                    return new RegexNode(RegexNode.Capture, _options, _autocap++, -1);
            }
 
            MoveRight();
 
            for (;;) {
                if (CharsRight() == 0)
                    break;
 
                switch (ch = MoveRightGetChar()) {
                    case ':':
                        NodeType = RegexNode.Group;
                        break;
 
                    case '=':
                        _options &= ~(RegexOptions.RightToLeft);
                        NodeType = RegexNode.Require;
                        break;
 
                    case '!':
                        _options &= ~(RegexOptions.RightToLeft);
                        NodeType = RegexNode.Prevent;
                        break;
 
                    case '>':
                        NodeType = RegexNode.Greedy;
                        break;
 
                    case '\'':
                        close = '\'';
                        goto case '<';
                        // fallthrough
 
                    case '<':
                        if (CharsRight() == 0)
                            goto BreakRecognize;
 
                        switch (ch = MoveRightGetChar()) {
                            case '=':
                                if (close == '\'')
                                    goto BreakRecognize;
 
                                _options |= RegexOptions.RightToLeft;
                                NodeType = RegexNode.Require;
                                break;
 
                            case '!':
                                if (close == '\'')
                                    goto BreakRecognize;
 
                                _options |= RegexOptions.RightToLeft;
                                NodeType = RegexNode.Prevent;
                                break;
 
                            default:
                                MoveLeft();
                                int capnum = -1;
                                int uncapnum = -1;
                                bool proceed = false;
 
                                // grab part before -
 
                                if (ch >= '0' && ch <= '9') {
                                    capnum = ScanDecimal();
 
                                    if (!IsCaptureSlot(capnum))
                                        capnum = -1;
 
                                    // check if we have bogus characters after the number
                                    if (CharsRight() > 0 && !(RightChar() == close || RightChar() == '-'))
                                        throw MakeException(SR.GetString(SR.InvalidGroupName));
                                    if (capnum == 0)
                                        throw MakeException(SR.GetString(SR.CapnumNotZero));
                                }
                                else if (RegexCharClass.IsWordChar(ch)) {
                                    String capname = ScanCapname();
 
                                    if (IsCaptureName(capname))
                                        capnum = CaptureSlotFromName(capname);
 
                                    // check if we have bogus character after the name
                                    if (CharsRight() > 0 && !(RightChar() == close || RightChar() == '-'))
                                        throw MakeException(SR.GetString(SR.InvalidGroupName));
                                }
                                else if (ch == '-') {
                                    proceed = true;
                                }
                                else {
                                    // bad group name - starts with something other than a word character and isn't a number
                                    throw MakeException(SR.GetString(SR.InvalidGroupName));
                                }
 
                                // grab part after - if any
 
                                if ((capnum != -1 || proceed == true) && CharsRight() > 0 && RightChar() == '-') {
                                    MoveRight();
                                    ch = RightChar();
 
                                    if (ch >= '0' && ch <= '9') {
                                        uncapnum = ScanDecimal();
                                        
                                        if (!IsCaptureSlot(uncapnum))
                                            throw MakeException(SR.GetString(SR.UndefinedBackref, uncapnum));
                                        
                                        // check if we have bogus characters after the number
                                        if (CharsRight() > 0 && RightChar() != close)
                                            throw MakeException(SR.GetString(SR.InvalidGroupName));
                                    }
                                    else if (RegexCharClass.IsWordChar(ch)) {
                                        String uncapname = ScanCapname();
 
                                        if (IsCaptureName(uncapname))
                                            uncapnum = CaptureSlotFromName(uncapname);
                                        else
                                            throw MakeException(SR.GetString(SR.UndefinedNameRef, uncapname));
 
                                        // check if we have bogus character after the name
                                        if (CharsRight() > 0 && RightChar() != close)
                                            throw MakeException(SR.GetString(SR.InvalidGroupName));
                                    }
                                    else {
                                        // bad group name - starts with something other than a word character and isn't a number
                                        throw MakeException(SR.GetString(SR.InvalidGroupName));
                                    }
                                }
 
                                // actually make the node
 
                                if ((capnum != -1 || uncapnum != -1) && CharsRight() > 0 && MoveRightGetChar() == close) {
                                    return new RegexNode(RegexNode.Capture, _options, capnum, uncapnum);
                                }
                                goto BreakRecognize;
                        }
                        break;
 
                    case '(': 
                        // alternation construct (?(...) | )
                 
                        int parenPos = Textpos();
                        if (CharsRight() > 0)   	
                        {
                            ch = RightChar();
    
                            // check if the alternation condition is a backref
                            if (ch >= '0' && ch <= '9') {
                                int capnum = ScanDecimal();
                                if (CharsRight() > 0 && MoveRightGetChar() == ')') {
                                    if (IsCaptureSlot(capnum))
                                        return new RegexNode(RegexNode.Testref, _options, capnum);
                                    else
                                        throw MakeException(SR.GetString(SR.UndefinedReference, capnum.ToString(CultureInfo.CurrentCulture)));
                                }
                                else
                                    throw MakeException(SR.GetString(SR.MalformedReference, capnum.ToString(CultureInfo.CurrentCulture)));
    
                            }
                            else if (RegexCharClass.IsWordChar(ch)) {
                                String capname = ScanCapname();
    
                                if (IsCaptureName(capname) && CharsRight() > 0 && MoveRightGetChar() == ')')
                                    return new RegexNode(RegexNode.Testref, _options, CaptureSlotFromName(capname));
                            }
                        }
                        // not a backref
                        NodeType = RegexNode.Testgroup;
                        Textto(parenPos - 1);       // jump to the start of the parentheses
                        _ignoreNextParen = true;    // but make sure we don't try to capture the insides
 
                        int charsRight = CharsRight();
                        if (charsRight >= 3 && RightChar(1) == '?') {
                            char rightchar2 = RightChar(2);
                            // disallow comments in the condition
                            if (rightchar2 == '#')
                                throw MakeException(SR.GetString(SR.AlternationCantHaveComment));
 
                            // disallow named capture group (?<..>..) in the condition
                            if (rightchar2 == '\'' ) 
                                throw MakeException(SR.GetString(SR.AlternationCantCapture));
                            else {
                                if (charsRight >=4 && (rightchar2 == '<' && RightChar(3) != '!' && RightChar(3) != '='))
                                    throw MakeException(SR.GetString(SR.AlternationCantCapture));
                            }
                        }
                            
                        break;
 
 
                    default:
                        MoveLeft();
 
                        NodeType = RegexNode.Group;
                        ScanOptions();
                        if (CharsRight() == 0)
                            goto BreakRecognize;
 
                        if ((ch = MoveRightGetChar()) == ')')
                            return null;
 
                        if (ch != ':')
                            goto BreakRecognize;
                        break;
                }
 
                return new RegexNode(NodeType, _options);
            }
 
            BreakRecognize: 
            ;
            // break Recognize comes here
 
            throw MakeException(SR.GetString(SR.UnrecognizedGrouping));
        }
 
        /*
         * Scans whitespace or x-mode comments.
         */
        internal void ScanBlank() {
            if (UseOptionX()) {
                for (;;) {
                    while (CharsRight() > 0 && IsSpace(RightChar()))
                        MoveRight();
 
                    if (CharsRight() == 0)
                        break;
 
                    if (RightChar() == '#') {
                        while (CharsRight() > 0 && RightChar() != '\n')
                            MoveRight();
                    }
                    else if (CharsRight() >= 3 && RightChar(2) == '#' &&
                             RightChar(1) == '?' && RightChar() == '(') {
                        while (CharsRight() > 0 && RightChar() != ')')
                            MoveRight();
                        if (CharsRight() == 0)
                            throw MakeException(SR.GetString(SR.UnterminatedComment));
                        MoveRight();
                    }
                    else
                        break;
                }
            }
            else {
                for (;;) {
                    if (CharsRight() < 3 || RightChar(2) != '#' ||
                        RightChar(1) != '?' || RightChar() != '(')
                        return;
 
                    while (CharsRight() > 0 && RightChar() != ')')
                        MoveRight();
                    if (CharsRight() == 0)
                        throw MakeException(SR.GetString(SR.UnterminatedComment));
                    MoveRight();
                }
            }
        }
 
        /*
         * Scans chars following a '\' (not counting the '\'), and returns
         * a RegexNode for the type of atom scanned.
         */
        internal RegexNode ScanBackslash() {
            char ch;
            RegexCharClass cc;
 
            if (CharsRight() == 0)
                throw MakeException(SR.GetString(SR.IllegalEndEscape));
 
            switch (ch = RightChar()) {
                case 'b':
                case 'B':
                case 'A':
                case 'G':
                case 'Z':
                case 'z':
                    MoveRight();
                    return new RegexNode(TypeFromCode(ch), _options);
 
                case 'w':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMAWordClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.WordClass);
 
                case 'W':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMAWordClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotWordClass);
 
                case 's':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMASpaceClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.SpaceClass);
 
                case 'S':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMASpaceClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotSpaceClass);
 
                case 'd':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.ECMADigitClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.DigitClass);
 
                case 'D':
                    MoveRight();
                    if (UseOptionE())
                        return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotECMADigitClass);
                    return new RegexNode(RegexNode.Set, _options, RegexCharClass.NotDigitClass);
 
                case 'p':
                case 'P':
                    MoveRight();
                    cc = new RegexCharClass();
                    cc.AddCategoryFromName(ParseProperty(), (ch != 'p'), UseOptionI(), _pattern);
                    if (UseOptionI())
                        cc.AddLowercase(_culture);
                    
                    return new RegexNode(RegexNode.Set, _options, cc.ToStringClass());
 
                default:
                    return ScanBasicBackslash();
            }
        }
 
        /*
         * Scans \-style backreferences and character escapes
         */
        internal RegexNode ScanBasicBackslash() {
            if (CharsRight() == 0)
                throw MakeException(SR.GetString(SR.IllegalEndEscape));
 
            char ch;
            bool angled = false;
            char close = '\0';
            int backpos;
 
            backpos = Textpos();
            ch = RightChar();
 
            // allow \k<foo> instead of \<foo>, which is now deprecated
 
            if (ch == 'k') {
                if (CharsRight() >= 2) {
                    MoveRight();
                    ch = MoveRightGetChar();
 
                    if (ch == '<' || ch == '\'') {
                        angled = true;
                        close = (ch == '\'') ? '\'' : '>';
                    }
                }
 
                if (!angled || CharsRight() <= 0)
                    throw MakeException(SR.GetString(SR.MalformedNameRef));
 
                ch = RightChar();
            }
 
            // Note angle without \g <
 
            else if ((ch == '<' || ch == '\'') && CharsRight() > 1) {
                angled = true;
                close = (ch == '\'') ? '\'' : '>';
 
                MoveRight();
                ch = RightChar();
            }
 
            // Try to parse backreference: \<1> or \<cap>
 
            if (angled && ch >= '0' && ch <= '9') {
                int capnum = ScanDecimal();
 
                if (CharsRight() > 0 && MoveRightGetChar() == close) {
                    if (IsCaptureSlot(capnum))
                        return new RegexNode(RegexNode.Ref, _options, capnum);
                    else
                        throw MakeException(SR.GetString(SR.UndefinedBackref, capnum.ToString(CultureInfo.CurrentCulture)));
                }
            }
 
            // Try to parse backreference or octal: \1
 
            else if (!angled && ch >= '1' && ch <= '9') {
                if (UseOptionE()) {
                    int capnum = -1;
                    int newcapnum = (int)(ch - '0');
                    int pos = Textpos() - 1;
                    while (newcapnum <= _captop) {
                        if (IsCaptureSlot(newcapnum) && (_caps == null || (int)_caps[newcapnum] < pos))
                            capnum = newcapnum;
                        MoveRight();
                        if (CharsRight() == 0 || (ch = RightChar()) < '0' || ch > '9')
                            break;
                        newcapnum = newcapnum * 10 + (int)(ch - '0');
                    }
                    if (capnum >= 0)
                        return new RegexNode(RegexNode.Ref, _options, capnum);
                } else
                {
 
                  int capnum = ScanDecimal();
                  if (IsCaptureSlot(capnum))
                      return new RegexNode(RegexNode.Ref, _options, capnum);
                  else if (capnum <= 9)
                      throw MakeException(SR.GetString(SR.UndefinedBackref, capnum.ToString(CultureInfo.CurrentCulture)));
                }
            }
 
            else if (angled && RegexCharClass.IsWordChar(ch)) {
                String capname = ScanCapname();
 
                if (CharsRight() > 0 && MoveRightGetChar() == close) {
                    if (IsCaptureName(capname))
                        return new RegexNode(RegexNode.Ref, _options, CaptureSlotFromName(capname));
                    else
                        throw MakeException(SR.GetString(SR.UndefinedNameRef, capname));
                }
            }
 
            // Not backreference: must be char code
 
            Textto(backpos);
            ch = ScanCharEscape();
 
            if (UseOptionI())
                ch = Char.ToLower(ch, _culture);
 
            return new RegexNode(RegexNode.One, _options, ch);
        }
 
        /*
         * Scans $ patterns recognized within replacment patterns
         */
        internal RegexNode ScanDollar() {
            if (CharsRight() == 0)
                return new RegexNode(RegexNode.One, _options, '$');
 
            char ch = RightChar();
            bool angled;
            int backpos = Textpos();
            int lastEndPos = backpos;
 
            // Note angle
 
            if (ch == '{' && CharsRight() > 1) {
                angled = true;
                MoveRight();
                ch = RightChar();
            }
            else {
                angled = false;
            }
 
            // Try to parse backreference: \1 or \{1} or \{cap}
 
            if (ch >= '0' && ch <= '9') {
                if (!angled && UseOptionE()) {
                    int capnum = -1;
                    int newcapnum = (int)(ch - '0');
                    MoveRight();
                    if (IsCaptureSlot(newcapnum)) {
                        capnum = newcapnum;
                        lastEndPos = Textpos();
                    }
 
                    while (CharsRight() > 0 && (ch = RightChar()) >= '0' && ch <= '9') {
                        int digit = (int)(ch - '0');
                        if (newcapnum > (MaxValueDiv10) || (newcapnum == (MaxValueDiv10) && digit > (MaxValueMod10)))
                            throw MakeException(SR.GetString(SR.CaptureGroupOutOfRange));
 
                        newcapnum = newcapnum * 10 + digit;
 
                        MoveRight();
                        if (IsCaptureSlot(newcapnum)) {
                            capnum = newcapnum;
                            lastEndPos = Textpos();
                        }
                    }
                    Textto(lastEndPos);
                    if (capnum >= 0)
                        return new RegexNode(RegexNode.Ref, _options, capnum);
                } 
                else
                {
                    int capnum = ScanDecimal();
                    if (!angled || CharsRight() > 0 && MoveRightGetChar() == '}') {
                        if (IsCaptureSlot(capnum))
                            return new RegexNode(RegexNode.Ref, _options, capnum);
                    }
                }
            }
            else if (angled && RegexCharClass.IsWordChar(ch)) {
                String capname = ScanCapname();
 
                if (CharsRight() > 0 && MoveRightGetChar() == '}') {
                    if (IsCaptureName(capname))
                        return new RegexNode(RegexNode.Ref, _options, CaptureSlotFromName(capname));
                }
            }
            else if (!angled) {
                int capnum = 1;
 
                switch (ch) {
                    case '$':
                        MoveRight();
                        return new RegexNode(RegexNode.One, _options, '$');
 
                    case '&':
                        capnum = 0;
                        break;
 
                    case '`':
                        capnum = RegexReplacement.LeftPortion;
                        break;
 
                    case '\'':
                        capnum = RegexReplacement.RightPortion;
                        break;
 
                    case '+':
                        capnum = RegexReplacement.LastGroup;
                        break;
 
                    case '_':
                        capnum = RegexReplacement.WholeString;
                        break;
                }
 
                if (capnum != 1) {
                    MoveRight();
                    return new RegexNode(RegexNode.Ref, _options, capnum);
                }
            }
 
            // unrecognized $: literalize
 
            Textto(backpos);
            return new RegexNode(RegexNode.One, _options, '$');
        }
 
        /*
         * Scans a capture name: consumes word chars
         */
        internal String ScanCapname() {
            int startpos = Textpos();
 
            while (CharsRight() > 0) {
                if (!RegexCharClass.IsWordChar(MoveRightGetChar())) {
                    MoveLeft();
                    break;
                }
            }
 
            return _pattern.Substring(startpos, Textpos() - startpos);
        }
 
 
        /*
         * Scans up to three octal digits (stops before exceeding 0377).
         */
        internal char ScanOctal() {
            int d;
            int i;
            int c;
 
            // Consume octal chars only up to 3 digits and value 0377
 
            c = 3;
 
            if (c > CharsRight())
                c = CharsRight();
 
            for (i = 0; c > 0 && (uint)(d = RightChar() - '0') <= 7; c -= 1) {
                MoveRight();
                i *= 8;
                i += d;
                if (UseOptionE() && i >= 0x20)
                    break;
            }
 
            // Octal codes only go up to 255.  Any larger and the behavior that Perl follows
            // is simply to truncate the high bits. 
            i &= 0xFF;
            
            return(char)i;
        }
 
        /*
         * Scans any number of decimal digits (pegs value at 2^31-1 if too large)
         */
        internal int ScanDecimal() {
            int i = 0;
            int d;
 
            while (CharsRight() > 0 && (uint)(d = (char)(RightChar() - '0')) <= 9) {
                MoveRight();
 
                if (i > (MaxValueDiv10) || (i == (MaxValueDiv10) && d > (MaxValueMod10)))
                    throw MakeException(SR.GetString(SR.CaptureGroupOutOfRange));
 
                i *= 10;
                i += d;
            }
 
            return i;
        }
 
        /*
         * Scans exactly c hex digits (c=2 for \xFF, c=4 for \uFFFF)
         */
        internal char ScanHex(int c) {
            int i;
            int d;
 
            i = 0;
 
            if (CharsRight() >= c) {
                for (; c > 0 && ((d = HexDigit(MoveRightGetChar())) >= 0); c -= 1) {
                    i *= 0x10;
                    i += d;
                }
            }
 
            if (c > 0)
                throw MakeException(SR.GetString(SR.TooFewHex));
 
            return(char)i;
        }
 
        /*
         * Returns n <= 0xF for a hex digit.
         */
        internal static int HexDigit(char ch) {
            int d;
 
            if ((uint)(d = ch - '0') <= 9)
                return d;
 
            if ((uint)(d = ch - 'a') <= 5)
                return d + 0xa;
 
            if ((uint)(d = ch - 'A') <= 5)
                return d + 0xa;
 
            return -1;
        }
 
        /*
         * Grabs and converts an ascii control character
         */
        internal char ScanControl() {
            char ch;
 
            if (CharsRight() <= 0)
                throw MakeException(SR.GetString(SR.MissingControl));
 
            ch = MoveRightGetChar();
 
            // \ca interpreted as \cA
 
            if (ch >= 'a' && ch <= 'z')
                ch = (char)(ch - ('a' - 'A'));
 
            if ((ch = (char)(ch - '@')) < ' ')
                return ch;
 
            throw MakeException(SR.GetString(SR.UnrecognizedControl));
        }
 
        /*
         * Returns true for options allowed only at the top level
         */
        internal bool IsOnlyTopOption(RegexOptions option) {
            return(option == RegexOptions.RightToLeft
#if !SILVERLIGHT
                || option == RegexOptions.Compiled
#endif
                || option == RegexOptions.CultureInvariant
                || option == RegexOptions.ECMAScript
            );
        }
 
        /*
         * Scans cimsx-cimsx option string, stops at the first unrecognized char.
         */
        internal void ScanOptions() {
            char ch;
            bool off;
            RegexOptions option;
 
            for (off = false; CharsRight() > 0; MoveRight()) {
                ch = RightChar();
 
                if (ch == '-') {
                    off = true;
                }
                else if (ch == '+') {
                    off = false;
                }
                else {
                    option = OptionFromCode(ch);
                    if (option == 0 || IsOnlyTopOption(option))
                        return;
 
                    if (off)
                        _options &= ~option;
                    else
                        _options |= option;
                }
            }
        }
 
        /*
         * Scans \ code for escape codes that map to single unicode chars.
         */
        internal char ScanCharEscape() {
            char ch;
 
            ch = MoveRightGetChar();
 
            if (ch >= '0' && ch <= '7') {
                MoveLeft();
                return ScanOctal();
            }
 
            switch (ch) {
                case 'x':
                    return ScanHex(2);
                case 'u':
                    return ScanHex(4);
                case 'a':
                    return '\u0007';
                case 'b':
                    return '\b';
                case 'e':
                    return '\u001B';
                case 'f':
                    return '\f';
                case 'n':
                    return '\n';
                case 'r':
                    return '\r';
                case 't':
                    return '\t';
                case 'v':
                    return '\u000B';
                case 'c':
                    return ScanControl();
                default:
                    if (!UseOptionE() && RegexCharClass.IsWordChar(ch))
                        throw MakeException(SR.GetString(SR.UnrecognizedEscape, ch.ToString()));
                    return ch;
            }
        }
 
        /*
         * Scans X for \p{X} or \P{X}
         */
        internal String ParseProperty() {
            if (CharsRight() < 3) {
                throw MakeException(SR.GetString(SR.IncompleteSlashP));
            }
            char ch = MoveRightGetChar();
            if (ch != '{') {
                throw MakeException(SR.GetString(SR.MalformedSlashP));
            }
            
            int startpos = Textpos();
            while (CharsRight() > 0) {
                ch = MoveRightGetChar();
                if (!(RegexCharClass.IsWordChar(ch) || ch == '-')) {
                    MoveLeft();
                    break;
                }
            }
            String capname = _pattern.Substring(startpos, Textpos() - startpos);
 
            if (CharsRight() == 0 || MoveRightGetChar() != '}')
                throw MakeException(SR.GetString(SR.IncompleteSlashP));
 
            return capname;
        }
 
        /*
         * Returns ReNode type for zero-length assertions with a \ code.
         */
        internal int TypeFromCode(char ch) {
            switch (ch) {
                case 'b':
                    return UseOptionE() ? RegexNode.ECMABoundary : RegexNode.Boundary;
                case 'B':
                    return UseOptionE() ? RegexNode.NonECMABoundary : RegexNode.Nonboundary;
                case 'A':
                    return RegexNode.Beginning;
                case 'G':
                    return RegexNode.Start;
                case 'Z':
                    return RegexNode.EndZ;
                case 'z':
                    return RegexNode.End;
                default:
                    return RegexNode.Nothing;
            }
        }
 
        /*
         * Returns option bit from single-char (?cimsx) code.
         */
        internal static RegexOptions OptionFromCode(char ch) {
            // case-insensitive
            if (ch >= 'A' && ch <= 'Z')
                ch += (char)('a' - 'A');
 
            switch (ch) {
#if !SILVERLIGHT
                case 'c':
                    return RegexOptions.Compiled;
#endif
                case 'i':
                    return RegexOptions.IgnoreCase;
                case 'r':
                    return RegexOptions.RightToLeft;
                case 'm':
                    return RegexOptions.Multiline;
                case 'n':
                    return RegexOptions.ExplicitCapture;
                case 's':
                    return RegexOptions.Singleline;
                case 'x':
                    return RegexOptions.IgnorePatternWhitespace;
#if DBG
                case 'd':
                    return RegexOptions.Debug;
#endif
                case 'e':
                    return RegexOptions.ECMAScript;
                default:
                    return 0;
            }
        }
 
        /*
         * a prescanner for deducing the slots used for
         * captures by doing a partial tokenization of the pattern.
         */
        internal void CountCaptures() {
            char ch;
 
            NoteCaptureSlot(0, 0);
 
            _autocap = 1;
 
            while (CharsRight() > 0) {
                int pos = Textpos();
                ch = MoveRightGetChar();
                switch (ch) {
                    case '\\':
                        if (CharsRight() > 0)
                            MoveRight();
                        break;
 
                    case '#':
                        if (UseOptionX()) {
                            MoveLeft();
                            ScanBlank();
                        }
                        break;
 
                    case '[':
                        ScanCharClass(false, true);
                        break;
 
                    case ')':
                        if (!EmptyOptionsStack())
                            PopOptions();
                        break;
 
                    case '(':
                        if (CharsRight() >= 2 && RightChar(1) == '#' && RightChar() == '?') {
                            MoveLeft();
                            ScanBlank();
                        } 
                        else {
                            
                            PushOptions();
                            if (CharsRight() > 0 && RightChar() == '?') {
                                // we have (?...
                                MoveRight();
 
                                if (CharsRight() > 1 && (RightChar() == '<' || RightChar() == '\'')) {
                                    // named group: (?<... or (?'...
 
                                    MoveRight();
                                    ch = RightChar();
 
                                    if (ch != '0' && RegexCharClass.IsWordChar(ch)) {
                                        //if (_ignoreNextParen) 
                                        //    throw MakeException(SR.GetString(SR.AlternationCantCapture));
                                        if (ch >= '1' && ch <= '9') 
                                            NoteCaptureSlot(ScanDecimal(), pos);
                                        else 
                                            NoteCaptureName(ScanCapname(), pos);
                                    }
                                }
                                else {
                                    // (?...
 
                                    // get the options if it's an option construct (?cimsx-cimsx...)
                                    ScanOptions();
 
                                    if (CharsRight() > 0) {
                                        if (RightChar() == ')') {
                                            // (?cimsx-cimsx)
                                            MoveRight();
                                            PopKeepOptions();
                                        }
                                        else if (RightChar() == '(') {
                                            // alternation construct: (?(foo)yes|no)
                                            // ignore the next paren so we don't capture the condition
                                            _ignoreNextParen = true;
 
                                            // break from here so we don't reset _ignoreNextParen
                                            break;
                                        }
                                    }
                                }
                            }
                            else {
                                if (!UseOptionN() && !_ignoreNextParen)
                                    NoteCaptureSlot(_autocap++, pos);
                            }
                        }
 
                        _ignoreNextParen = false;
                        break;
                }
            }
 
            AssignNameSlots();
        }
 
        /*
         * Notes a used capture slot
         */
        internal void NoteCaptureSlot(int i, int pos) {
            if (!_caps.ContainsKey(i)) {
                // the rhs of the hashtable isn't used in the parser
 
                _caps.Add(i, pos);
                _capcount++;
 
                if (_captop <= i) {
                    if (i == Int32.MaxValue)
                        _captop = i;
                    else
                        _captop = i + 1;
                }
            }
        }
 
        /*
         * Notes a used capture slot
         */
        internal void NoteCaptureName(String name, int pos) {
            if (_capnames == null) {
#if SILVERLIGHT
                _capnames = new Dictionary<String, Int32>();
#else
                _capnames = new Hashtable();
#endif
                _capnamelist = new List<String>();
            }
 
            if (!_capnames.ContainsKey(name)) {
                _capnames.Add(name, pos);
                _capnamelist.Add(name);
            }
        }
 
        /*
         * For when all the used captures are known: note them all at once
         */
#if SILVERLIGHT
        internal void NoteCaptures(Dictionary<Int32, Int32> caps, int capsize, Dictionary<String, Int32> capnames) {
#else
        internal void NoteCaptures(Hashtable caps, int capsize, Hashtable capnames) {
#endif
            _caps = caps;
            _capsize = capsize;
            _capnames = capnames;
        }
 
        /*
         * Assigns unused slot numbers to the capture names
         */
        internal void AssignNameSlots() {
            if (_capnames != null) {
                for (int i = 0; i < _capnamelist.Count; i++) {
                    while (IsCaptureSlot(_autocap))
                        _autocap++;
                    string name = _capnamelist[i];
                    int pos = (int)_capnames[name];
                    _capnames[name] = _autocap;
                    NoteCaptureSlot(_autocap, pos);
 
                    _autocap++;
                }
            }
 
            // if the caps array has at least one gap, construct the list of used slots
 
            if (_capcount < _captop) {
                _capnumlist = new Int32[_capcount];
                int i = 0;
 
                for (IDictionaryEnumerator de = _caps.GetEnumerator(); de.MoveNext(); )
                    _capnumlist[i++] = (int)de.Key;
 
                System.Array.Sort(_capnumlist, Comparer<Int32>.Default);
            }
 
            // merge capsnumlist into capnamelist
 
            if (_capnames != null || _capnumlist != null) {
                List<String> oldcapnamelist;
                int next;
                int k = 0;
 
                if (_capnames == null) {
                    oldcapnamelist = null;
#if SILVERLIGHT
                    _capnames = new Dictionary<String, Int32>();
#else
                    _capnames = new Hashtable();
#endif
                    _capnamelist = new List<String>();
                    next = -1;
                }
                else {
                    oldcapnamelist = _capnamelist;
                    _capnamelist = new List<String>();
                    next = (int)_capnames[oldcapnamelist[0]];
                }
 
                for (int i = 0; i < _capcount; i++) {
                    int j = (_capnumlist == null) ? i : (int)_capnumlist[i];
 
                    if (next == j) {
                        _capnamelist.Add(oldcapnamelist[k++]);
                        next = (k == oldcapnamelist.Count) ? -1 : (int)_capnames[oldcapnamelist[k]];
                    }
                    else {
                        String str = Convert.ToString(j, _culture);
                        _capnamelist.Add(str);
                        _capnames[str] = j;
                    }
                }
            }
        }
 
        /*
         * Looks up the slot number for a given name
         */
        internal int CaptureSlotFromName(String capname) {
            return(int)_capnames[capname];
        }
 
        /*
         * True if the capture slot was noted
         */
        internal bool IsCaptureSlot(int i) {
            if (_caps != null)
                return _caps.ContainsKey(i);
 
            return(i >= 0 && i < _capsize);
        }
 
        /*
         * Looks up the slot number for a given name
         */
        internal bool IsCaptureName(String capname) {
            if (_capnames == null)
                return false;
 
            return _capnames.ContainsKey(capname);
        }
 
        /*
         * True if N option disabling '(' autocapture is on.
         */
        internal bool UseOptionN() {
            return(_options & RegexOptions.ExplicitCapture) != 0;
        }
 
        /*
         * True if I option enabling case-insensitivity is on.
         */
        internal bool UseOptionI() {
            return(_options & RegexOptions.IgnoreCase) != 0;
        }
 
        /*
         * True if M option altering meaning of $ and ^ is on.
         */
        internal bool UseOptionM() {
            return(_options & RegexOptions.Multiline) != 0;
        }
 
        /*
         * True if S option altering meaning of . is on.
         */
        internal bool UseOptionS() {
            return(_options & RegexOptions.Singleline) != 0;
        }
 
        /*
         * True if X option enabling whitespace/comment mode is on.
         */
        internal bool UseOptionX() {
            return(_options & RegexOptions.IgnorePatternWhitespace) != 0;
        }
 
        /*
         * True if E option enabling ECMAScript behavior is on.
         */
        internal bool UseOptionE() {
            return(_options & RegexOptions.ECMAScript) != 0;
        }
 
        internal const byte Q = 5;    // quantifier
        internal const byte S = 4;    // ordinary stoppper
        internal const byte Z = 3;    // ScanBlank stopper
        internal const byte X = 2;    // whitespace
        internal const byte E = 1;    // should be escaped
 
        /*
         * For categorizing ascii characters.
        */
        internal static readonly byte[] _category = new byte[] {
            // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 A B C D E F 
               0,0,0,0,0,0,0,0,0,X,X,0,X,X,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
            //   ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? 
               X,0,0,Z,S,0,0,0,S,S,Q,Q,0,0,S,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Q,
            // @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _
               0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,S,S,0,S,0,
            // ' a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ 
               0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Q,S,0,0,0};
 
        /*
         * Returns true for those characters that terminate a string of ordinary chars.
         */
        internal static bool IsSpecial(char ch) {
            return(ch <= '|' && _category[ch] >= S);
        }
 
        /*
         * Returns true for those characters that terminate a string of ordinary chars.
         */
        internal static bool IsStopperX(char ch) {
            return(ch <= '|' && _category[ch] >= X);
        }
 
        /*
         * Returns true for those characters that begin a quantifier.
         */
        internal static bool IsQuantifier(char ch) {
            return(ch <= '{' && _category[ch] >= Q);
        }
 
        internal bool IsTrueQuantifier() {
            int nChars = CharsRight();
            if (nChars == 0)
                return false;
            int startpos = Textpos();
            char ch = CharAt(startpos);
            if (ch != '{')
                return ch <= '{' && _category[ch] >= Q;
            int pos = startpos;
            while (--nChars > 0 && (ch = CharAt(++pos)) >= '0' && ch <= '9') ;
            if (nChars == 0 || pos - startpos == 1)
                return false;
            if (ch == '}')
                return true;
            if (ch != ',')
                return false;
            while (--nChars > 0 && (ch = CharAt(++pos)) >= '0' && ch <= '9') ;
            return nChars > 0 && ch == '}';
        }
 
        /*
         * Returns true for whitespace.
         */
        internal static bool IsSpace(char ch) {
            return(ch <= ' ' && _category[ch] == X);
        }
 
        /*
         * Returns true for chars that should be escaped.
         */
        internal static bool IsMetachar(char ch) {
            return(ch <= '|' && _category[ch] >= E);
        }
 
 
        /*
         * Add a string to the last concatenate.
         */
        internal void AddConcatenate(int pos, int cch, bool isReplacement) {
            RegexNode node;
 
            if (cch == 0)
                return;
 
            if (cch > 1) {
                String str = _pattern.Substring(pos, cch);
 
                if (UseOptionI() && !isReplacement) {
                    // We do the ToLower character by character for consistency.  With surrogate chars, doing
                    // a ToLower on the entire string could actually change the surrogate pair.  This is more correct
                    // linguistically, but since Regex doesn't support surrogates, it's more important to be 
                    // consistent. 
                    StringBuilder sb = new StringBuilder(str.Length);
                    for (int i=0; i<str.Length; i++)
                        sb.Append(Char.ToLower(str[i], _culture));
                    str = sb.ToString();
                }
 
                node = new RegexNode(RegexNode.Multi, _options, str);
            }
            else {
                char ch = _pattern[pos];
 
                if (UseOptionI() && !isReplacement)
                    ch = Char.ToLower(ch, _culture);
 
                node = new RegexNode(RegexNode.One, _options, ch);
            }
 
            _concatenation.AddChild(node);
        }
 
        /*
         * Push the parser state (in response to an open paren)
         */
        internal void PushGroup() {
            _group._next = _stack;
            _alternation._next = _group;
            _concatenation._next = _alternation;
            _stack = _concatenation;
        }
 
        /*
         * Remember the pushed state (in response to a ')')
         */
        internal void PopGroup() {
            _concatenation = _stack;
            _alternation = _concatenation._next;
            _group = _alternation._next;
            _stack = _group._next;
 
            // The first () inside a Testgroup group goes directly to the group
            if (_group.Type() == RegexNode.Testgroup && _group.ChildCount() == 0) {
                if (_unit == null)
                    throw MakeException(SR.GetString(SR.IllegalCondition));
 
                _group.AddChild(_unit);
                _unit = null;
            }
        }
 
        /*
         * True if the group stack is empty.
         */
        internal bool EmptyStack() {
            return _stack == null;
        }
 
        /*
         * Start a new round for the parser state (in response to an open paren or string start)
         */
        internal void StartGroup(RegexNode openGroup) {
            _group = openGroup;
            _alternation = new RegexNode(RegexNode.Alternate, _options);
            _concatenation = new RegexNode(RegexNode.Concatenate, _options);
        }
 
        /*
         * Finish the current concatenation (in response to a |)
         */
        internal void AddAlternate() {
            // The | parts inside a Testgroup group go directly to the group
 
            if (_group.Type() == RegexNode.Testgroup || _group.Type() == RegexNode.Testref) {
                _group.AddChild(_concatenation.ReverseLeft());
            }
            else {
                _alternation.AddChild(_concatenation.ReverseLeft());
            }
 
            _concatenation = new RegexNode(RegexNode.Concatenate, _options);
        }
 
        /*
         * Finish the current quantifiable (when a quantifier is not found or is not possible)
         */
        internal void AddConcatenate() {
            // The first (| inside a Testgroup group goes directly to the group
 
            _concatenation.AddChild(_unit);
            _unit = null;
        }
 
        /*
         * Finish the current quantifiable (when a quantifier is found)
         */
        internal void AddConcatenate(bool lazy, int min, int max) {
            _concatenation.AddChild(_unit.MakeQuantifier(lazy, min, max));
            _unit = null;
        }
 
        /*
         * Returns the current unit
         */
        internal RegexNode Unit() {
            return _unit;
        }
 
        /*
         * Sets the current unit to a single char node
         */
        internal void AddUnitOne(char ch) {
            if (UseOptionI())
                ch = Char.ToLower(ch, _culture);
 
            _unit = new RegexNode(RegexNode.One, _options, ch);
        }
 
        /*
         * Sets the current unit to a single inverse-char node
         */
        internal void AddUnitNotone(char ch) {
            if (UseOptionI())
                ch = Char.ToLower(ch, _culture);
 
            _unit = new RegexNode(RegexNode.Notone, _options, ch);
        }
 
        /*
         * Sets the current unit to a single set node
         */
        internal void AddUnitSet(string cc) {
            _unit = new RegexNode(RegexNode.Set, _options, cc);
        }
 
        /*
         * Sets the current unit to a subtree
         */
        internal void AddUnitNode(RegexNode node) {
            _unit = node;
        }
 
        /*
         * Sets the current unit to an assertion of the specified type
         */
        internal void AddUnitType(int type) {
            _unit = new RegexNode(type, _options);
        }
 
        /*
         * Finish the current group (in response to a ')' or end)
         */
        internal void AddGroup() {
            if (_group.Type() == RegexNode.Testgroup || _group.Type() == RegexNode.Testref) {
                _group.AddChild(_concatenation.ReverseLeft());
 
                if (_group.Type() == RegexNode.Testref && _group.ChildCount() > 2 || _group.ChildCount() > 3)
                    throw MakeException(SR.GetString(SR.TooManyAlternates));
            }
            else {
                _alternation.AddChild(_concatenation.ReverseLeft());
                _group.AddChild(_alternation);
            }
 
            _unit = _group;
        }
 
        /*
         * Saves options on a stack.
         */
        internal void PushOptions() {
            _optionsStack.Add(_options);
        }
 
        /*
         * Recalls options from the stack.
         */
        internal void PopOptions() {
            _options = _optionsStack[_optionsStack.Count - 1];
            _optionsStack.RemoveAt(_optionsStack.Count - 1);
        }
 
        /*
         * True if options stack is empty.
         */
        internal bool EmptyOptionsStack() {
            return(_optionsStack.Count == 0);
        }
 
        /*
         * Pops the option stack, but keeps the current options unchanged.
         */
        internal void PopKeepOptions() {
            _optionsStack.RemoveAt(_optionsStack.Count - 1);
        }
 
        /*
         * Fills in an ArgumentException
         */
        internal ArgumentException MakeException(String message) {
            return new ArgumentException(SR.GetString(SR.MakeException, _pattern, message));
        }
 
        /*
         * Returns the current parsing position.
         */
        internal int Textpos() {
            return _currentPos;
        }
 
        /*
         * Zaps to a specific parsing position.
         */
        internal void Textto(int pos) {
            _currentPos = pos;
        }
 
        /*
         * Returns the char at the right of the current parsing position and advances to the right.
         */
        internal char MoveRightGetChar() {
            return _pattern[_currentPos++];
        }
 
        /*
         * Moves the current position to the right. 
         */
        internal void MoveRight() {
            MoveRight(1);
        }
 
        internal void MoveRight(int i) {
            _currentPos += i;
        }
 
        /*
         * Moves the current parsing position one to the left.
         */
        internal void MoveLeft() {
            --_currentPos;
        }
 
        /*
         * Returns the char left of the current parsing position.
         */
        internal char CharAt(int i) {
            return _pattern[i];
        }
 
        /*
         * Returns the char right of the current parsing position.
         */
        internal char RightChar() {
            return _pattern[_currentPos];
        }
 
        /*
         * Returns the char i chars right of the current parsing position.
         */
        internal char RightChar(int i) {
            return _pattern[_currentPos + i];
        }
 
        /*
         * Number of characters to the right of the current parsing position.
         */
        internal int CharsRight() {
            return _pattern.Length - _currentPos;
        }
    }
}
File: regex\system\text\regularexpressions\RegexParser.cs
Project: ndp\fx\src\System.csproj (System)