parser.cs

// ==++==
// 
//   Copyright (c) Microsoft Corporation.  All rights reserved.
// 
// ==--==
/*============================================================
**
** CLASS:    Parser
** 
** <OWNER>Microsoft</OWNER>
**
**
** PURPOSE:  Parse "Elementary XML", that is, XML without 
**           attributes or DTDs, in other words, XML with 
**           elements only.
** 
** 
===========================================================*/
namespace System.Security.Util {
    using System.Text;
    using System.Runtime.InteropServices;
    using System;
    using BinaryReader = System.IO.BinaryReader ;
    using ArrayList = System.Collections.ArrayList;
    using Stream = System.IO.Stream;
    using StreamReader = System.IO.StreamReader;
    using Encoding = System.Text.Encoding;
 
    sealed internal class Parser
    {
        private SecurityDocument _doc;
        private Tokenizer _t;
    
        internal SecurityElement GetTopElement()
        {
            return _doc.GetRootElement();
        }
 
        private const short c_flag = 0x4000;
        private const short c_elementtag = (short)(SecurityDocument.c_element << 8 | c_flag);
        private const short c_attributetag = (short)(SecurityDocument.c_attribute << 8 | c_flag);
        private const short c_texttag = (short)(SecurityDocument.c_text << 8 | c_flag);
        private const short c_additionaltexttag = (short)(SecurityDocument.c_text << 8 | c_flag | 0x2000);
        private const short c_childrentag = (short)(SecurityDocument.c_children << 8 | c_flag);
        private const short c_wastedstringtag = (short)(0x1000 | c_flag);
 
        private void GetRequiredSizes( TokenizerStream stream, ref int index )
        {
            //
            // Iteratively collect stuff up until the next end-tag.
            // We've already seen the open-tag.
            //
           
            bool needToBreak = false;
            bool needToPop = false;
            bool createdNode = false;
            bool intag = false;
            int stackDepth = 1;
            SecurityElementType type = SecurityElementType.Regular;
            String strValue = null;
            bool sawEquals = false;
            bool sawText = false;
            int status = 0;
            
            short i;
 
            do
            {
                for (i = stream.GetNextToken() ; i != -1 ; i = stream.GetNextToken())
                {
                    switch (i & 0x00FF)
                    {
                    case Tokenizer.cstr:
                        {
                            if (intag)
                            {
                                if (type == SecurityElementType.Comment)
                                {
                                    // Ignore data in comments but still get the data 
                                    // to keep the stream in the right place.
                                    stream.ThrowAwayNextString();
                                    stream.TagLastToken( c_wastedstringtag );
                                }
                                else
                                {
                                    // We're in a regular tag, so we've found an attribute/value pair.
                                
                                    if (strValue == null)
                                    {
                                        // Found attribute name, save it for later.
                                    
                                        strValue = stream.GetNextString();
                                    }
                                    else
                                    {
                                        // Found attribute text, add the pair to the current element.
 
                                        if (!sawEquals)
                                            throw new XmlSyntaxException( _t.LineNo );
 
                                        stream.TagLastToken( c_attributetag );
                                        index += SecurityDocument.EncodedStringSize( strValue ) +
                                                 SecurityDocument.EncodedStringSize( stream.GetNextString() ) +
                                                 1;
                                        strValue = null;
                                        sawEquals = false;
                                    }
                                }
                            }
                            else
                            {
                                // We're not in a tag, so we've found text between tags.
 
                                if (sawText)
                                {
                                    stream.TagLastToken( c_additionaltexttag );
                                    index += SecurityDocument.EncodedStringSize( stream.GetNextString() ) +
                                             SecurityDocument.EncodedStringSize( " " );
                                }
                                else
                                {
                                    stream.TagLastToken( c_texttag );
                                    index += SecurityDocument.EncodedStringSize( stream.GetNextString() ) +
                                             1;
                                    sawText = true;
                                }
                            }
                        }
                        break;
        
                    case Tokenizer.bra:
                        intag = true;
                        sawText = false;
                        i = stream.GetNextToken();
    
                        if (i == Tokenizer.slash)
                        {
                            stream.TagLastToken( c_childrentag );
                            while (true)
                            {
                                // spin; don't care what's in here
                                i = stream.GetNextToken();
                                if (i == Tokenizer.cstr)
                                {
                                    stream.ThrowAwayNextString();
                                    stream.TagLastToken( c_wastedstringtag );
                                }
                                else if (i == -1)
                                    throw new XmlSyntaxException (_t.LineNo, Environment.GetResourceString( "XMLSyntax_UnexpectedEndOfFile" ));
                                else
                                    break;
                            }
        
                            if (i != Tokenizer.ket)
                            {
                                throw new XmlSyntaxException (_t.LineNo, Environment.GetResourceString( "XMLSyntax_ExpectedCloseBracket" ));
                            }
            
                            intag = false;
            
                            // Found the end of this element
                            index++;
 
                            sawText = false;
                            stackDepth--;
                            
                            needToBreak = true;
                        }
                        else if (i == Tokenizer.cstr)
                        {
                            // Found a child
                            
                            createdNode = true;
 
                            stream.TagLastToken( c_elementtag );
                            index += SecurityDocument.EncodedStringSize( stream.GetNextString() ) +
                                     1;
                            
                            if (type != SecurityElementType.Regular)
                                throw new XmlSyntaxException( _t.LineNo );
                            
                            needToBreak = true;
                            stackDepth++;
                        }
                        else if (i == Tokenizer.bang)
                        {
                            // Found a child that is a comment node.  Next up better be a cstr.
 
                            status = 1;
 
                            do
                            {
                                i = stream.GetNextToken();
 
                                switch (i)
                                {
                                case Tokenizer.bra:
                                    status++;
                                    break;
 
                                case Tokenizer.ket:
                                    status--;
                                    break;
 
                                case Tokenizer.cstr:
                                    stream.ThrowAwayNextString();
                                    stream.TagLastToken( c_wastedstringtag );
                                    break;
 
                                default:
                                    break;
                                }
                            } while (status > 0);
 
                            intag = false;
                            sawText = false;
                            needToBreak = true;
                        }
                        else if (i == Tokenizer.quest)
                        {
                            // Found a child that is a format node.  Next up better be a cstr.
 
                            i = stream.GetNextToken();
 
                            if (i != Tokenizer.cstr)
                                throw new XmlSyntaxException( _t.LineNo );
                            
                            createdNode = true;
 
                            type = SecurityElementType.Format;
                            
                            stream.TagLastToken( c_elementtag );
                            index += SecurityDocument.EncodedStringSize( stream.GetNextString() ) +
                                     1;
                            
                            status = 1;
                            stackDepth++;
                            
                            needToBreak = true;
                        }
                        else   
                        {
                            throw new XmlSyntaxException (_t.LineNo, Environment.GetResourceString( "XMLSyntax_ExpectedSlashOrString" ));
                        }
                        break ;
        
                    case Tokenizer.equals:
                        sawEquals = true;
                        break;
                        
                    case Tokenizer.ket:
                        if (intag)
                        {
                            intag = false;
                            continue;
                        }
                        else
                        {
                            throw new XmlSyntaxException (_t.LineNo, Environment.GetResourceString( "XMLSyntax_UnexpectedCloseBracket" ));
                        }
                        // not reachable
                        
                    case Tokenizer.slash:
                        i = stream.GetNextToken();
                        
                        if (i == Tokenizer.ket)
                        {
                            // Found the end of this element
                            stream.TagLastToken( c_childrentag );
                            index++;
                            stackDepth--;
                            sawText = false;
                            
                            needToBreak = true;
                        }
                        else
                        {
                            throw new XmlSyntaxException (_t.LineNo, Environment.GetResourceString( "XMLSyntax_ExpectedCloseBracket" ));
                        }
                        break;
                        
                    case Tokenizer.quest:
                        if (intag && type == SecurityElementType.Format && status == 1)
                        {
                            i = stream.GetNextToken();
 
                            if (i == Tokenizer.ket)
                            {
                                stream.TagLastToken( c_childrentag );
                                index++;
                                stackDepth--;
                                sawText = false;
 
                                needToBreak = true;
                            }
                            else
                            {
                                throw new XmlSyntaxException (_t.LineNo, Environment.GetResourceString( "XMLSyntax_ExpectedCloseBracket" ));
                            }
                        }
                        else
                        {
                            throw new XmlSyntaxException (_t.LineNo);
                        }
                        break;
 
                    case Tokenizer.dash:
                    default:
                        throw new XmlSyntaxException (_t.LineNo) ;
                    }
                    
                    if (needToBreak)
                    {
                        needToBreak = false;
                        needToPop = false;
                        break;
                    }
                    else
                    {
                        needToPop = true;
                    }
                }
 
                if (needToPop)
                {
                    index++;
                    stackDepth--;
                    sawText = false;
                }
                else if (i == -1 && (stackDepth != 1 || !createdNode))
                {
                    // This means that we still have items on the stack, but the end of our
                    // stream has been reached.
 
                    throw new XmlSyntaxException( _t.LineNo, Environment.GetResourceString( "XMLSyntax_UnexpectedEndOfFile" ));
                }
            }
            while (stackDepth > 1);
        }
        
        private int DetermineFormat( TokenizerStream stream )
        {
            if (stream.GetNextToken() == Tokenizer.bra)
            {
                if (stream.GetNextToken() == Tokenizer.quest)
                {
                    _t.GetTokens( stream, -1, true );
                    stream.GoToPosition( 2 );
 
                    bool sawEquals = false;
                    bool sawEncoding = false;
 
                    short i;
 
                    for (i = stream.GetNextToken(); i != -1 && i != Tokenizer.ket; i = stream.GetNextToken())
                    {
                        switch (i)
                        {
                        case Tokenizer.cstr:
                            if (sawEquals && sawEncoding)
                            {
                                _t.ChangeFormat( System.Text.Encoding.GetEncoding( stream.GetNextString() ) );
                                return 0;
                            }
                            else if (!sawEquals)
                            {
                                if (String.Compare( stream.GetNextString(), "encoding", StringComparison.Ordinal) == 0)
                                    sawEncoding = true;
                            }
                            else
                            {
                                sawEquals = false;
                                sawEncoding = false;
                                stream.ThrowAwayNextString();
                            }
                            break;
 
                        case Tokenizer.equals:
                            sawEquals = true;
                            break;
 
                        default:
                            throw new XmlSyntaxException (_t.LineNo, Environment.GetResourceString( "XMLSyntax_UnexpectedEndOfFile" ));
                        }
                    }
 
                    return 0;
                }
            }
 
            return 2;
        }
                    
 
        private void ParseContents()
        {
            short i;
 
            TokenizerStream stream = new TokenizerStream();
 
            _t.GetTokens( stream, 2, false );
            stream.Reset();
 
            int gotoPosition = DetermineFormat( stream );
 
            stream.GoToPosition( gotoPosition );
            _t.GetTokens( stream, -1, false );
            stream.Reset();
 
            int neededIndex = 0;
            
            GetRequiredSizes( stream, ref neededIndex );
 
            _doc = new SecurityDocument( neededIndex );
            int position = 0;
 
            stream.Reset();
 
            for (i = stream.GetNextFullToken(); i != -1; i = stream.GetNextFullToken())
            {
                if ((i & c_flag) != c_flag)
                    continue;
                else
                {
                    switch((short)(i & 0xFF00))
                    {
                    case c_elementtag:
                        _doc.AddToken( SecurityDocument.c_element, ref position );
                        _doc.AddString( stream.GetNextString(), ref position );
                        break;
 
                    case c_attributetag:
                        _doc.AddToken( SecurityDocument.c_attribute, ref position );
                        _doc.AddString( stream.GetNextString(), ref position );
                        _doc.AddString( stream.GetNextString(), ref position );
                        break;
 
                    case c_texttag:
                        _doc.AddToken( SecurityDocument.c_text, ref position );
                        _doc.AddString( stream.GetNextString(), ref position );
                        break;
 
                    case c_additionaltexttag:
                        _doc.AppendString( " ", ref position );
                        _doc.AppendString( stream.GetNextString(), ref position );
                        break;
 
                    case c_childrentag:
                        _doc.AddToken( SecurityDocument.c_children, ref position );
                        break;
 
                    case c_wastedstringtag:
                        stream.ThrowAwayNextString();
                        break;
 
                    default:
                        throw new XmlSyntaxException();
                    }
                }
            }
        }
    
        private Parser(Tokenizer t)
        {
            _t = t;
            _doc = null;
 
            try
            {
                ParseContents();
            }
            finally
            {
                _t.Recycle();
            }
        }
        
        internal Parser (String input)
            : this (new Tokenizer (input))
        {
        }
 
        internal Parser (String input, String[] searchStrings, String[] replaceStrings)
            : this (new Tokenizer (input, searchStrings, replaceStrings))
        {
        }
 
        internal Parser( byte[] array, Tokenizer.ByteTokenEncoding encoding )
            : this (new Tokenizer( array, encoding, 0 ) )
        {
        }
 
    
        internal Parser( byte[] array, Tokenizer.ByteTokenEncoding encoding, int startIndex )
            : this (new Tokenizer( array, encoding, startIndex ) )
        {
        }
        
        internal Parser( StreamReader input )
            : this (new Tokenizer( input ) )
        {
        }
 
        internal Parser( char[] array )
            : this (new Tokenizer( array ) )
        {
        }
        
    }                                              
    
}
File: system\security\util\parser.cs
Project: ndp\clr\src\bcl\mscorlib.csproj (mscorlib)