File: System\Xml\XmlEncoding.cs
Project: ndp\fx\src\Xml\System.Xml.csproj (System.Xml)
//------------------------------------------------------------------------------
// <copyright file="XmlEncoding.cs" company="Microsoft">
//     Copyright (c) Microsoft Corporation.  All rights reserved.
// </copyright>
// <owner current="true" primary="true">Microsoft</owner>
//------------------------------------------------------------------------------
 
using System.Text;
using System.Diagnostics;
 
namespace System.Xml {
 
    internal class UTF16Decoder : System.Text.Decoder {
        private bool bigEndian;
        private int lastByte;
        private const int CharSize = 2;
            
        public UTF16Decoder( bool bigEndian ) {
            this.lastByte = -1;
            this.bigEndian = bigEndian;
        }
    
        public override int GetCharCount( byte[] bytes, int index, int count ) {
            return GetCharCount( bytes, index, count, false );
        }
        
        public override int GetCharCount( byte[] bytes, int index, int count, bool flush ) {
            int byteCount = count + ( ( lastByte >= 0 ) ? 1 : 0 );
            if ( flush && ( byteCount % CharSize != 0 ) ) {
                throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { -1 } ), (string)null );
            }
            return byteCount / CharSize;
        }
 
        public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
            int charCount = GetCharCount( bytes, byteIndex, byteCount );
 
            if ( lastByte >= 0 ) {
                if ( byteCount == 0 ) {
                    return charCount;
                }
                int nextByte = bytes[byteIndex++];
                byteCount--;
 
                chars[charIndex++] = bigEndian
                    ? (char)( lastByte << 8 | nextByte )
                    : (char)( nextByte << 8 | lastByte );
                lastByte = -1;
            }
 
            if ( ( byteCount & 1 ) != 0 ) {
                lastByte = bytes[byteIndex + --byteCount];
            }
 
            // use the fast BlockCopy if possible
            if ( bigEndian == BitConverter.IsLittleEndian ) {
                int byteEnd = byteIndex + byteCount;
                if ( bigEndian ) {
                    while ( byteIndex < byteEnd ) {
                        int hi = bytes[byteIndex++];
                        int lo = bytes[byteIndex++];
                        chars[charIndex++] = (char)( hi << 8 | lo );
                    }                    
                }
                else {
                    while ( byteIndex < byteEnd ) {
                        int lo = bytes[byteIndex++];
                        int hi = bytes[byteIndex++];
                        chars[charIndex++] = (char)( hi << 8 | lo );
                    }
                }
            }
            else {
                Buffer.BlockCopy( bytes, byteIndex, chars, charIndex * CharSize, byteCount );
            }
            return charCount;
        }
 
        public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) {
            charsUsed = 0;
            bytesUsed = 0;
 
            if ( lastByte >= 0 ) {
                if ( byteCount == 0 ) {
                    completed = true;
                    return;
                }
                int nextByte = bytes[byteIndex++];
                byteCount--;
                bytesUsed++;
 
                chars[charIndex++] = bigEndian
                    ? (char)( lastByte << 8 | nextByte )
                    : (char)( nextByte << 8 | lastByte );
                charCount--;
                charsUsed++;
                lastByte = -1;
            }
 
            if ( charCount * CharSize < byteCount ) {
                byteCount = charCount * CharSize;
                completed = false;
            }
            else {
                completed = true;
            }
 
            if ( bigEndian == BitConverter.IsLittleEndian ) {
                int i = byteIndex;
                int byteEnd = i + ( byteCount & ~0x1 );
                if ( bigEndian ) {
                    while ( i < byteEnd ) {
                        int hi = bytes[i++];
                        int lo = bytes[i++];
                        chars[charIndex++] = (char)( hi << 8 | lo );
                    }                    
                }
                else {
                    while ( i < byteEnd ) {
                        int lo = bytes[i++];
                        int hi = bytes[i++];
                        chars[charIndex++] = (char)( hi << 8 | lo );
                    }
                }
            }
            else {
                Buffer.BlockCopy( bytes, byteIndex, chars, charIndex * CharSize, (int)(byteCount & ~0x1) );
            }
            charsUsed += byteCount / CharSize;
            bytesUsed += byteCount;
            
            if ( ( byteCount & 1 ) != 0 ) {
                lastByte = bytes[byteIndex + byteCount - 1];
            }
        }
    }
 
    internal class SafeAsciiDecoder : Decoder {
        
        public SafeAsciiDecoder() {
        }
 
        public override int GetCharCount( byte[] bytes, int index, int count ) {
            return count;
        }
 
        public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
            int i = byteIndex;
            int j = charIndex;
            while ( i < byteIndex + byteCount ) {
                chars[j++] = (char)bytes[i++];
            }
            return byteCount;
        }
 
        public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) {
            if ( charCount < byteCount ) {
                byteCount = charCount;
                completed = false;
            }
            else {
                completed = true;
            }
 
            int i = byteIndex;
            int j = charIndex;
            int byteEndIndex = byteIndex + byteCount;
 
            while ( i < byteEndIndex ) {
                chars[j++] = (char)bytes[i++];
            }
 
            charsUsed = byteCount;
            bytesUsed = byteCount;
        }
    }
 
#if !SILVERLIGHT
    internal class Ucs4Encoding : Encoding  {
        internal Ucs4Decoder ucs4Decoder;
 
        public override string WebName {
            get {
                return this.EncodingName;
            }
        }
 
        public override Decoder GetDecoder() {
            return ucs4Decoder;
        }
 
        public override int GetByteCount( char[] chars, int index, int count ) {
            return checked( count * 4 );
        }
 
        public override int GetByteCount( char[] chars ) {
            return chars.Length * 4;
        }
 
        public override byte[] GetBytes( string s ) {
            return null; //ucs4Decoder.GetByteCount(chars, index, count);
        }
        public override int GetBytes( char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex ) {
            return 0;
        }
        public override int GetMaxByteCount( int charCount ) {
            return 0;
        }
 
        public override int GetCharCount( byte[] bytes, int index, int count ) {
            return ucs4Decoder.GetCharCount( bytes, index, count );
        }
 
        public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
            return ucs4Decoder.GetChars( bytes, byteIndex, byteCount, chars, charIndex );
        }
 
        public override int GetMaxCharCount( int byteCount ) {
            return ( byteCount + 3 ) / 4;
        }
 
        public override int CodePage {
            get { 
                return 0;
            }
        }
 
        public override int GetCharCount( byte[] bytes ) {
            return bytes.Length / 4;
        }
 
        public override Encoder GetEncoder() {
            return null;
        }
 
        internal static Encoding UCS4_Littleendian {
            get {
                return new Ucs4Encoding4321();
            }
        }
 
        internal static Encoding UCS4_Bigendian {
            get {
                return new Ucs4Encoding1234();
            }
        }
 
        internal static Encoding UCS4_2143 {
            get {
                return new Ucs4Encoding2143();
            }
        }
        internal static Encoding UCS4_3412 {
            get {
                return  new Ucs4Encoding3412();
            }
        }
    }
 
    internal class Ucs4Encoding1234 : Ucs4Encoding {
 
        public Ucs4Encoding1234() {
            ucs4Decoder = new Ucs4Decoder1234();
        }
 
        public override string EncodingName {
            get { 
                return "ucs-4 (Bigendian)";
            }
        }
 
        public override byte[] GetPreamble() {
            return new byte[4] { 0x00, 0x00, 0xfe, 0xff };
        }
    }
 
    internal class Ucs4Encoding4321 : Ucs4Encoding {
        public Ucs4Encoding4321() {
            ucs4Decoder = new Ucs4Decoder4321();
        }
 
        public override string EncodingName {
            get { 
                return "ucs-4";
            }
        }
 
        public override byte[] GetPreamble() {
            return new byte[4] { 0xff, 0xfe, 0x00, 0x00 };
        }
    }
 
    internal class Ucs4Encoding2143 : Ucs4Encoding {
        public Ucs4Encoding2143() {
            ucs4Decoder = new Ucs4Decoder2143();
        }
 
        public override string EncodingName {
            get { 
                return "ucs-4 (order 2143)";
            }
        }
        public override byte[] GetPreamble() {
            return new byte[4] { 0x00, 0x00, 0xff, 0xfe };
        }
    }
 
    internal class Ucs4Encoding3412 : Ucs4Encoding {
        public Ucs4Encoding3412() {
            ucs4Decoder = new Ucs4Decoder3412();
        }
 
        public override string EncodingName {
            get { 
                return "ucs-4 (order 3412)";
            }
        }
 
        public override byte[] GetPreamble() {
            return new byte[4] { 0xfe, 0xff, 0x00, 0x00 };
        }
    }
 
    internal abstract class Ucs4Decoder : Decoder {
 
        internal byte [] lastBytes = new byte[4];
        internal int lastBytesCount = 0;
 
        public override int GetCharCount( byte[] bytes, int index, int count ) {
            return ( count + lastBytesCount ) / 4;
        }
 
        internal abstract int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex );
 
        public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
            // finish a character from the bytes that were cached last time
            int i = lastBytesCount;
            if ( lastBytesCount > 0 ) {
                // copy remaining bytes into the cache
                for ( ; lastBytesCount < 4 && byteCount > 0; lastBytesCount++ ) {
                    lastBytes[lastBytesCount] = bytes[byteIndex];
                    byteIndex++;
                    byteCount--;
                }
                // still not enough bytes -> return
                if ( lastBytesCount < 4 ) {
                    return 0;
                }
                // decode 1 character from the byte cache
                i = GetFullChars( lastBytes, 0 , 4, chars, charIndex );
                Debug.Assert( i == 1 );
                charIndex += i;
                lastBytesCount = 0;
            }
            else {
                i = 0;
            }
 
            // decode block of byte quadruplets
            i = GetFullChars( bytes, byteIndex, byteCount, chars, charIndex ) + i;
 
            // cache remaining bytes that does not make up a character
            int bytesLeft = ( byteCount & 0x3 );
            if ( bytesLeft >= 0 ) {
                for( int j = 0; j < bytesLeft; j++ ) {
                    lastBytes[j] = bytes[byteIndex + byteCount - bytesLeft + j];
                }
                lastBytesCount = bytesLeft;
            }
            return i;
        }
 
        public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) {
            bytesUsed = 0;
            charsUsed = 0;
            // finish a character from the bytes that were cached last time
            int i = 0;
            int lbc = lastBytesCount;
            if ( lbc > 0 ) {
                // copy remaining bytes into the cache
                for ( ; lbc < 4 && byteCount > 0; lbc++ ) {
                    lastBytes[lbc] = bytes[byteIndex];
                    byteIndex++;
                    byteCount--;
                    bytesUsed++;
                }
                // still not enough bytes -> return
                if ( lbc < 4 ) {
                    lastBytesCount = lbc;
                    completed = true;
                    return;
                }
                // decode 1 character from the byte cache
                i = GetFullChars( lastBytes, 0 , 4, chars, charIndex );
                Debug.Assert( i == 1 );
                charIndex += i;
                charCount -= i;
                charsUsed = i;
 
                lastBytesCount = 0;
 
                // if that's all that was requested -> return
                if ( charCount == 0 ) {
                    completed = ( byteCount == 0 );
                    return;
                }
            }
            else {
                i = 0;
            }
 
            // modify the byte count for GetFullChars depending on how many characters were requested
            if ( charCount * 4 < byteCount ) {
                byteCount = charCount * 4;
                completed = false;
            }
            else {
                completed = true;
            }
            bytesUsed += byteCount;
 
            // decode block of byte quadruplets
            charsUsed = GetFullChars( bytes, byteIndex, byteCount, chars, charIndex ) + i;
 
            // cache remaining bytes that does not make up a character
            int bytesLeft = ( byteCount & 0x3 );
            if ( bytesLeft >= 0 ) {
                for( int j = 0; j < bytesLeft; j++ ) {
                    lastBytes[j] = bytes[byteIndex + byteCount - bytesLeft + j];
                }
                lastBytesCount = bytesLeft;
            }
        }
 
        internal void Ucs4ToUTF16(uint code, char[] chars, int charIndex) {
            chars[charIndex] = (char)(XmlCharType.SurHighStart + (char)((code >> 16) - 1) + (char)((code >> 10) & 0x3F));
            chars[charIndex + 1] = (char)(XmlCharType.SurLowStart + (char)(code & 0x3FF));
        }
    }
 
    internal class Ucs4Decoder4321 : Ucs4Decoder  {
 
        internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
            uint code;
            int i, j;
 
            byteCount += byteIndex;
 
            for ( i = byteIndex, j = charIndex; i + 3 < byteCount; ) {
                code =  (uint)( ( bytes[i+3] << 24 ) | ( bytes[i+2] << 16 ) | ( bytes[i+1] << 8 ) | bytes[i] );
                if ( code > 0x10FFFF ) {
                    throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null );
                }
                else if ( code > 0xFFFF ) {
                    Ucs4ToUTF16(code, chars, j);
                    j++;
                }
                else {
                    if ( XmlCharType.IsSurrogate( (int)code ) ) {
                        throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty );
                    }
                    else {
                        chars[j] = (char)code;
                    }
                }
                j++;
                i += 4;
            }
            return j - charIndex;
        }
    };
 
    internal class Ucs4Decoder1234 : Ucs4Decoder  {
 
        internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
            uint code;
            int i,j;
 
            byteCount += byteIndex;
 
            for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) {
                code = (uint)( ( bytes[i] << 24 ) | ( bytes[i+1] << 16 ) | ( bytes[i+2] << 8 ) | bytes[i+3] );
                if ( code > 0x10FFFF ) {
                    throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null );
                }
                else if ( code > 0xFFFF ) {
                    Ucs4ToUTF16(code, chars, j);
                    j++;
                }
                else {
                    if ( XmlCharType.IsSurrogate( (int)code ) ) {
                        throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty );
                    }
                    else {
                        chars[j] = (char)code;
                    }
                }
                j++;
                i += 4;
            }
            return j - charIndex;
        }
    }
 
 
    internal class Ucs4Decoder2143 : Ucs4Decoder  {
 
        internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
            uint code;
            int i,j;
 
            byteCount += byteIndex;
 
            for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) {
                code = (uint)( ( bytes[i+1] << 24 ) | ( bytes[i] << 16 ) | ( bytes[i+3] << 8 ) | bytes[i+2] );
                if ( code > 0x10FFFF ) {
                    throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null );
                }
                else if ( code > 0xFFFF ) {
                    Ucs4ToUTF16(code, chars, j);
                    j++;
                }
                else {
                    if ( XmlCharType.IsSurrogate( (int)code ) ) {
                        throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty );
                    }
                    else {
                        chars[j] = (char)code;
                    }
                }
                j++;
                i += 4;
            }
            return j - charIndex;
        }
    }
 
 
    internal class Ucs4Decoder3412 : Ucs4Decoder  {
 
        internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) {
            uint code;
            int i,j;
            
            byteCount += byteIndex;
 
            for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) {
                code = (uint)( ( bytes[i+2] << 24 ) | ( bytes[i+3] << 16 ) | ( bytes[i] << 8 ) | bytes[i+1] );
                if ( code > 0x10FFFF ) {
                    throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null );
                }
                else if ( code > 0xFFFF ) {
                    Ucs4ToUTF16(code, chars, j);
                    j++;
                }
                else {
                    if ( XmlCharType.IsSurrogate( (int)code ) ) {
                        throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty );
                    }
                    else {
                        chars[j] = (char)code;
                    }
                }
                j++;
                i += 4;
            }
            return j - charIndex;
        }
    }
#endif
}