File: Base\MS\Internal\ContentType.cs
Project: wpf\src\WindowsBase.csproj (WindowsBase)
//-----------------------------------------------------------------------------
//
// <copyright file="ContentType.cs" company="Microsoft">
//    Copyright (C) Microsoft Corporation.  All rights reserved.
// </copyright>
//
// Description:
//  ContentType class parses and validates the content-type string.
//  It provides functionality to compare the type/subtype values.
//
// Details:
// Grammar which this class follows - 
//
// Content-type grammar MUST conform to media-type grammar as per 
// RFC 2616 (ABNF notation):
// 
// media-type     = type "/" subtype *( ";" parameter )
// type           = token
// subtype        = token
// parameter      = attribute "=" value
// attribute      = token
// value          = token | quoted-string
// quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
// qdtext         = <any TEXT except <">>
// quoted-pair    = "\" CHAR
// token          = 1*<any CHAR except CTLs or separators>
// separators     = "(" | ")" | "<" | ">" | "@"
//                  | "," | ";" | ":" | "\" | <">
//                  | "/" | "[" | "]" | "?" | "="
//                  | "{" | "}" | SP | HT
// TEXT           = <any OCTET except CTLs, but including LWS>
// OCTET          = <any 8-bit sequence of data>
// CHAR           = <any US-ASCII character (octets 0 - 127)>
// CTL            = <any US-ASCII control character(octets 0 - 31)and DEL(127)>
// CR             = <US-ASCII CR, carriage return (13)>
// LF             = <US-ASCII LF, linefeed (10)>
// SP             = <US-ASCII SP, space (32)>
// HT             = <US-ASCII HT, horizontal-tab (9)>
// <">            = <US-ASCII double-quote mark (34)>
// LWS            = [CRLF] 1*( SP | HT )
// CRLF           = CR LF
// Linear white space (LWS) MUST NOT be used between the type and subtype, nor 
// between an attribute and its value. Leading and trailing LWS are prohibited.
// 
//
// History:
//  04/26/2004: SarjanaS: Initial Creation 
//-----------------------------------------------------------------------------
 
using System;
using System.Collections.Generic;   // For Dictionary<string, string>
using System.Text;                  // For StringBuilder
using System.Windows;               // For Exception strings - SRID
using MS.Internal.WindowsBase;      // For FriendAccessAllowed
using System.Diagnostics;           // For Debug.Assert
 
namespace MS.Internal
{
    /// <summary>
    /// Content Type class
    /// </summary>
    [FriendAccessAllowed]
    internal sealed class ContentType
    {       
        //------------------------------------------------------
        //
        //  Internal Constructors
        //
        //------------------------------------------------------
 
        #region Internal Constructors
 
        /// <summary>
        /// This constructor creates a ContentType object that represents 
        /// the content-type string. At construction time we validate the 
        /// string as per the grammar specified in RFC 2616.
        /// Note: We allow empty strings as valid input. Empty string should
        /// we used more as an indication of an absent/unknown ContentType. 
        /// </summary>
        /// <param name="contentType">content-type</param>
        /// <exception cref="ArgumentNullException">If the contentType parameter is null</exception>
        /// <exception cref="ArgumentException">If the contentType string has leading or 
        /// trailing Linear White Spaces(LWS) characters</exception>
        /// <exception cref="ArgumentException">If the contentType string invalid CR-LF characters</exception>
        internal ContentType(string contentType)
        {
            if (contentType == null)
                throw new ArgumentNullException("contentType");
                       
            if (String.CompareOrdinal(contentType, String.Empty) == 0)
            {
                _contentType = String.Empty;             
            }
            else
            {
                if (IsLinearWhiteSpaceChar(contentType[0]) || IsLinearWhiteSpaceChar(contentType[contentType.Length - 1]))
                    throw new ArgumentException(SR.Get(SRID.ContentTypeCannotHaveLeadingTrailingLWS));
 
                //Carriage return can be expressed as '\r\n' or '\n\r'
                //We need to make sure that a \r is accompanied by \n
                ValidateCarriageReturns(contentType);
 
                //Begin Parsing                
                int semiColonIndex = contentType.IndexOf(_semicolonSeparator);
 
                if (semiColonIndex == -1)
                {
                    // Parse content type similar to - type/subtype
                    ParseTypeAndSubType(contentType);
                }
                else
                {
                    // Parse content type similar to - type/subtype ; param1=value1 ; param2=value2 ; param3="value3"
                    ParseTypeAndSubType(contentType.Substring(0, semiColonIndex));
                    ParseParameterAndValue(contentType.Substring(semiColonIndex));
                }
            }
 
            // keep this untouched for return from OriginalString property
            _originalString = contentType;
 
            //This variable is used to print out the correct content type string representation
            //using the ToString method. This is mainly important while debugging and seeing the
            //value of the content type object in the debugger.
            _isInitialized = true;
        }
 
        #endregion Internal Constructors
 
        //------------------------------------------------------
        //
        //  Internal Methods
        //
        //------------------------------------------------------
 
        #region Internal Properties
 
        /// <summary>
        /// TypeComponent of the Content Type
        /// If the content type is "text/xml". This property will return "text"
        /// </summary>
        internal string TypeComponent
        {
            get
            {
                return _type;
            }
        }
 
        /// <summary>
        /// SubType component
        /// If the content type is "text/xml". This property will return "xml"
        /// </summary>
        internal string SubTypeComponent
        {
            get
            {
                return _subType;
            }
        }
 
        /// <summary>
        /// Enumerator which iterates over the Parameter and Value pairs which are stored 
        /// in a dictionary. We hand out just the enumerator in order to make this property
        /// ReadOnly
        /// Consider following Content type - 
        /// type/subtype ; param1=value1 ; param2=value2 ; param3="value3"
        /// This will return a enumerator over a dictionary of the parameter/value pairs. 
        /// </summary>
        internal Dictionary<string, string>.Enumerator ParameterValuePairs
        {
            get
            {
                EnsureParameterDictionary();
                return _parameterDictionary.GetEnumerator();
            }
        }
 
 
        /// <summary>
        /// Static property that represents a content type that is empty ""
        /// This is not a valid content type as per the grammar and should be used
        /// in places where the content type is missing or not available. 
        /// </summary>
        internal static ContentType Empty
        {
            get
            {                
                return _emptyContentType;
            }           
        }
 
        /// <summary>
        /// Original string provided to constructor
        /// </summary>
        internal string OriginalString
        {
            get
            {
                return _originalString;
            }
        }
        #endregion Internal Properties
 
        //------------------------------------------------------
        //
        //  Internal Methods
        //
        //------------------------------------------------------
 
        #region Internal Methods
 
        /// <summary>
        /// This method does a strong comparison of the content types, as parameters are not allowed. 
        /// We only compare the type and subType values in an ASCII case-insensitive manner.
        /// Parameters are not allowed to be present on any of the content type operands.
        /// </summary>
        /// <param name="contentType">Content type to be compared with</param>
        /// <returns></returns>
        internal bool AreTypeAndSubTypeEqual(ContentType contentType)
        {
            return AreTypeAndSubTypeEqual(contentType, false);     
        }
 
        /// <summary>
        /// This method does a weak comparison of the content types. We only compare the
        /// type and subType values in an ASCII case-insensitive manner.
        /// Parameter and value pairs are not used for the comparison. 
        /// If you wish to compare the paramters too, then you must get the ParameterValuePairs from
        /// both the ContentType objects and compare each parameter entry.
        /// The allowParameterValuePairs parameter is used to indicate whether the 
        /// comparison is tolerant to parameters being present or no.
        /// </summary>
        /// <param name="contentType">Content type to be compared with</param>
        /// <param name="allowParameterValuePairs">If true, allows the presence of parameter value pairs. 
        /// If false, parameter/value pairs cannot be present in the content type string.
        /// In either case, the parameter value pair is not used for the comparison.</param>
        /// <returns></returns>
        internal bool AreTypeAndSubTypeEqual(ContentType contentType, bool allowParameterValuePairs)
        {
            bool result = false;
 
            if (contentType != null)
            {
                if (!allowParameterValuePairs)
                {
                    //Return false if this content type object has parameters
                    if (_parameterDictionary != null && _parameterDictionary.Count > 0)
                        return false;
 
                    //Return false if the content type object passed in has parameters
                    Dictionary<string, string>.Enumerator contentTypeEnumerator;
                    contentTypeEnumerator = contentType.ParameterValuePairs;
                    contentTypeEnumerator.MoveNext();
                    if (contentTypeEnumerator.Current.Key != null)
                        return false;
                }
 
                // Perform a case-insensitive comparison on the type/subtype strings.  This is a
                // safe comparison because the _type and _subType strings have been restricted to
                // ASCII characters, digits, and a small set of symbols.  This is not a safe comparison
                // for the broader set of strings that have not been restricted in the same way.
                result = (String.Compare(_type, contentType.TypeComponent, StringComparison.OrdinalIgnoreCase) == 0 &&
                          String.Compare(_subType, contentType.SubTypeComponent, StringComparison.OrdinalIgnoreCase) == 0);
            }           
            return result;
        }
 
        /// <summary>
        /// ToString - outputs a normalized form of the content type string
        /// </summary>
        /// <returns></returns>
        public override string ToString()
        {   
            if (_contentType == null)
            {
                //This is needed so that while debugging we get the correct 
                //string
                if (!_isInitialized)
                    return String.Empty;
 
                Debug.Assert(String.CompareOrdinal(_type, String.Empty) != 0
                   || String.CompareOrdinal(_subType, String.Empty) != 0);
            
                StringBuilder stringBuilder = new StringBuilder(_type);
                stringBuilder.Append(_forwardSlashSeparator[0]);
                stringBuilder.Append(_subType);
 
                if (_parameterDictionary != null && _parameterDictionary.Count > 0)
                {   
                    foreach (string paramterKey in _parameterDictionary.Keys)
                    {
                        stringBuilder.Append(_LinearWhiteSpaceChars[0]);
                        stringBuilder.Append(_semicolonSeparator);
                        stringBuilder.Append(_LinearWhiteSpaceChars[0]);
                        stringBuilder.Append(paramterKey);
                        stringBuilder.Append(_equalSeparator);
                        stringBuilder.Append(_parameterDictionary[paramterKey]);
                    }
                }
 
                _contentType = stringBuilder.ToString();
            }
            
            return _contentType;
        }
 
        #endregion Internal Methods
 
        //------------------------------------------------------
        //
        //  Nested Classes
        //
        //------------------------------------------------------
 
        #region Nested Classes
 
        /// <summary>
        /// Comparer class makes it easier to put ContentType objects in collections.
        /// Only compares type and subtype components of the ContentType.  Could be
        /// expanded to optionally compare parameters as well.
        /// </summary>
        internal class StrongComparer : IEqualityComparer<ContentType>
        {
            /// <summary>
            /// This method does a strong comparison of the content types.
            /// Only compares the ContentTypes' type and subtype components.
            /// </summary>
            public bool Equals(ContentType x, ContentType y)
            {
                if (x == null)
                {
                    return (y == null);
                }
                else
                {
                    return x.AreTypeAndSubTypeEqual(y);
                }
            }            
 
            /// <summary>
            /// We lower case the results of ToString() because it returns the original
            /// casing passed into the constructor.  ContentTypes that are equal (which
            /// ignores casing) must have the same hash code.
            /// </summary>
            public int GetHashCode(ContentType obj)
            {
                return obj.ToString().ToUpperInvariant().GetHashCode();
            }
        }
 
        internal class WeakComparer : IEqualityComparer<ContentType>
        {            
            /// <summary>            
            /// This method does a weak comparison of the content types. 
            /// Parameter and value pairs are not used for the comparison. 
            /// </summary>
            public bool Equals(ContentType x, ContentType y)
            {
                if (x == null)
                {
                    return (y == null); 
                }
                else
                {
                    return x.AreTypeAndSubTypeEqual(y, true);
                }
            }
 
            /// <summary>
            /// We lower case the results of ToString() because it returns the original
            /// casing passed into the constructor.  ContentTypes that are equal (which
            /// ignores casing) must have the same hash code.
            /// </summary>
            public int GetHashCode(ContentType obj)
            {
                return obj._type.ToUpperInvariant().GetHashCode() ^ obj._subType.ToUpperInvariant().GetHashCode();
            }
        }
        #endregion Nested Classes
 
        //------------------------------------------------------
        //
        //  Private Methods
        //
        //------------------------------------------------------
 
        #region Private Methods
 
 
        /// <summary>
        /// This method validates if the content type string has
        /// valid CR-LF characters. Specifically we test if '\r' is 
        /// accompanied by a '\n' in the string, else its an error.
        /// </summary>
        /// <param name="contentType"></param>
        private static void ValidateCarriageReturns(string contentType)
        {
            Debug.Assert(!IsLinearWhiteSpaceChar(contentType[0]) && !IsLinearWhiteSpaceChar(contentType[contentType.Length - 1]));
 
            //Prior to calling this method we have already checked that first and last
            //character of the content type are not Linear White Spaces. So its safe to
            //assume that the index will be greater than 0 and less that length-2.
 
            int index = contentType.IndexOf(_LinearWhiteSpaceChars[2]);
            
            while (index != -1)
            {
                if (contentType[index - 1] == _LinearWhiteSpaceChars[1] || contentType[index + 1] == _LinearWhiteSpaceChars[1])
                {
                    index = contentType.IndexOf(_LinearWhiteSpaceChars[2], ++index);
                }
                else
                    throw new ArgumentException(SR.Get(SRID.InvalidLinearWhiteSpaceCharacter));
            }
        }
 
        /// <summary>
        /// Parses the type ans subType tokens from the string. 
        /// Also verifies if the Tokens are valid as per the grammar.
        /// </summary>
        /// <param name="typeAndSubType">substring that has the type and subType of the content type</param>
        /// <exception cref="ArgumentException">If the typeAndSubType parameter does not have the "/" character</exception>
        private void ParseTypeAndSubType(string typeAndSubType)
        {
            //okay to trim at this point the end of the string as Linear White Spaces(LWS) chars are allowed here.
            typeAndSubType = typeAndSubType.TrimEnd(_LinearWhiteSpaceChars);
 
            string[] splitBasedOnForwardSlash = typeAndSubType.Split(_forwardSlashSeparator);
 
            if (splitBasedOnForwardSlash.Length != 2)
                throw new ArgumentException(SR.Get(SRID.InvalidTypeSubType));
 
            _type    = ValidateToken(splitBasedOnForwardSlash[0]);
            _subType = ValidateToken(splitBasedOnForwardSlash[1]);
        }
 
        /// <summary>
        /// Parse the individual parameter=value strings
        /// </summary>
        /// <param name="parameterAndValue">This string has the parameter and value pair of the form
        /// parameter=value</param>
        /// <exception cref="ArgumentException">If the string does not have the required "="</exception>
        private void ParseParameterAndValue(string parameterAndValue)
        {
            while (String.CompareOrdinal(parameterAndValue, String.Empty) != 0)
            {
                //At this point the first character MUST be a semi-colon
                //First time through this test is serving more as an assert.
                if (parameterAndValue[0] != _semicolonSeparator)
                    throw new ArgumentException(SR.Get(SRID.ExpectingSemicolon));
 
                //At this point if we have just one semicolon, then its an error.
                //Also, there can be no trailing LWS characters, as we already checked for that
                //in the constructor.
                if (parameterAndValue.Length == 1)
                    throw new ArgumentException(SR.Get(SRID.ExpectingParameterValuePairs));
 
                //Removing the leading ; from the string
                parameterAndValue = parameterAndValue.Substring(1);
 
                //okay to trim start as there can be spaces before the begining
                //of the parameter name.
                parameterAndValue = parameterAndValue.TrimStart(_LinearWhiteSpaceChars);
 
                int equalSignIndex = parameterAndValue.IndexOf(_equalSeparator);
 
                if (equalSignIndex <= 0 || equalSignIndex == (parameterAndValue.Length - 1))
                    throw new ArgumentException(SR.Get(SRID.InvalidParameterValuePair));
 
                int parameterStartIndex = equalSignIndex + 1;
 
                //Get length of the parameter value
                int parameterValueLength = GetLengthOfParameterValue(parameterAndValue, parameterStartIndex);
 
                EnsureParameterDictionary();
 
                _parameterDictionary.Add(
                    ValidateToken(parameterAndValue.Substring(0, equalSignIndex)),
                    ValidateQuotedStringOrToken(parameterAndValue.Substring(parameterStartIndex, parameterValueLength)));
 
                parameterAndValue = parameterAndValue.Substring(parameterStartIndex + parameterValueLength).TrimStart(_LinearWhiteSpaceChars);
            }
        }
 
        /// <summary>
        /// This method returns the length of the first parameter value in the input string.
        /// </summary>
        /// <param name="s"></param>
        /// <param name="startIndex">Starting index for parsing</param>
        /// <returns></returns>
        private static int GetLengthOfParameterValue(string s, int startIndex)
        {
            Debug.Assert(s != null);
 
            int length = 0;
            
            //if the parameter value does not start with a '"' then,
            //we expect a valid token. So we look for Linear White Spaces or
            //a ';' as the terminator for the token value.
            if (s[startIndex] != '"')
            {
                int semicolonIndex = s.IndexOf(_semicolonSeparator, startIndex);
 
                if (semicolonIndex != -1)
                {
                    int lwsIndex = s.IndexOfAny(_LinearWhiteSpaceChars, startIndex);
                    if (lwsIndex != -1 && lwsIndex < semicolonIndex)
                        length = lwsIndex;
                    else
                        length = semicolonIndex;
                }
                else
                    length = semicolonIndex;
 
                //If there is no linear white space found we treat the entire remaining string as 
                //parameter value.
                if (length == -1)
                    length = s.Length;
            }
            else
            {
                //if the parameter value starts with a '"' then, we need to look for the
                //pairing '"' that is not preceded by a "\" ["\" is used to escape the '"']
                bool found = false;
                length = startIndex;
 
                while (!found)
                {
                    length = s.IndexOf('"', ++length);
 
                    if (length == -1)
                        throw new ArgumentException(SR.Get(SRID.InvalidParameterValue));
 
                    if (s[length - 1] != '\\')
                    {
                        found = true;
                        length++;
                    }
                }
            }
            return length - startIndex;
        }
 
        /// <summary>
        /// Validating the given token
        /// The following checks are being made - 
        /// 1. If all the characters in the token are either ASCII letter or digit.
        /// 2. If all the characters in the token are either from the remaining allowed cha----ter set.
        /// </summary>
        /// <param name="token">string token</param>
        /// <returns>validated string token</returns>
        /// <exception cref="ArgumentException">If the token is Empty</exception>
        private static string ValidateToken(string token)
        {
            if (String.CompareOrdinal(token, String.Empty)==0)
                throw new ArgumentException(SR.Get(SRID.InvalidToken));
 
            for (int i = 0; i < token.Length; i++)
            {
                if (IsAsciiLetterOrDigit(token[i]))
                    continue;
                else
                    if (IsAllowedCharacter(token[i]))
                        continue;
                    else
                        throw new ArgumentException(SR.Get(SRID.InvalidToken));
            }
 
            return token;
        }
 
        /// <summary>
        /// Validating if the value of a parameter is either a valid token or a 
        /// valid quoted string
        /// </summary>
        /// <param name="parameterValue">paramter value string</param>
        /// <returns>validate parameter value string</returns>
        /// <exception cref="ArgumentException">If the paramter value is empty</exception>
        private static string ValidateQuotedStringOrToken(string parameterValue)
        {
            if (String.CompareOrdinal(parameterValue, String.Empty) == 0)
                throw new ArgumentException(SR.Get(SRID.InvalidParameterValue));
 
            if (parameterValue.Length >= 2 && 
                parameterValue.StartsWith(_quote, StringComparison.Ordinal) && 
                parameterValue.EndsWith(_quote, StringComparison.Ordinal))
                ValidateQuotedText(parameterValue.Substring(1, parameterValue.Length-2));
            else
                ValidateToken(parameterValue);
 
            return parameterValue;
        }
 
        /// <summary>
        /// This method validates if the text in the quoted string
        /// </summary>
        /// <param name="quotedText"></param>
        private static void ValidateQuotedText(string quotedText)
        {
            //empty is okay
 
            for (int i = 0; i < quotedText.Length; i++)
            {
                if (IsLinearWhiteSpaceChar(quotedText[i]))
                    continue;
 
                if (quotedText[i] <= ' ' || quotedText[i] >= 0xFF)
                    throw new ArgumentException(SR.Get(SRID.InvalidParameterValue));
                else
                    if (quotedText[i] == '"' && 
                        (i==0 || quotedText[i-1] != '\\'))
                        throw new ArgumentException(SR.Get(SRID.InvalidParameterValue));
            }
        }
 
        /// <summary>
        /// Returns true if the input character is an allowed character
        /// Returns false if the input cha----ter is not an allowed character
        /// </summary>
        /// <param name="character">input character</param>
        /// <returns></returns>
        private static bool IsAllowedCharacter(char character)
        {
            //We did not use any of the .Contains methods as
            //it will result in boxing costs.
            foreach (char c in _allowedCharacters)
            {
                if (c == character)
                    return true;               
            }
            return false;
        }
 
        /// <summary>
        /// Returns true if the input character is an ASCII digit or letter
        /// Returns false if the input character is not an ASCII digit or letter
        /// </summary>
        /// <param name="character">input character</param>
        /// <returns></returns>
        private static bool IsAsciiLetterOrDigit(char character)
        {
            if (IsAsciiLetter(character))
            {
                return true;
            }
            if (character >= '0')
            {
                return (character <= '9');
            }
            return false;
        }
 
        /// <summary>
        /// Returns true if the input character is an ASCII letter
        /// Returns false if the input character is not an ASCII letter
        /// </summary>
        /// <param name="character">input character</param>
        /// <returns></returns>
        private static bool IsAsciiLetter(char character)
        {
            if ((character >= 'a') && (character <= 'z'))
            {
                return true;
            }
            if (character >= 'A')
            {
                return (character <= 'Z');
            }
            return false;
        }
 
        /// <summary>
        /// Returns true if the input character is one of the Linear White Space characters - 
        /// ' ', '\t', '\n', '\r'
        /// Returns false if the input character is none of the above
        /// </summary>
        /// <param name="ch">input character</param>
        /// <returns></returns>
        private static bool IsLinearWhiteSpaceChar(char ch)
        {
            if (ch > ' ')
            {
                return false;
            }
 
            foreach (char c in _LinearWhiteSpaceChars)
            {
                if (ch == c)
                    return true;
            }
 
            return false;
        }
 
        /// <summary>
        /// Lazy initialization for the ParameterDictionary
        /// </summary>
        private void EnsureParameterDictionary()
        {
            if (_parameterDictionary == null)
            {
                _parameterDictionary = new Dictionary<string, string>(); //initial size 0
            }
        }        
       
        #endregion Private Methods
 
        //------------------------------------------------------
        //
        //  Private Members
        //
        //------------------------------------------------------
        #region Private Members
 
        private string _contentType = null;
        private string _type    = String.Empty;
        private string _subType = String.Empty;
        private string _originalString;
        private Dictionary<string, string> _parameterDictionary = null;
        private bool   _isInitialized = false;
 
        private const string     _quote              = "\"";
        private const char       _semicolonSeparator = ';';
        private const char       _equalSeparator     = '=';
 
        //This array is sorted by the ascii value of these characters.
        private static readonly char[] _allowedCharacters = 
         { '!' /*33*/, '#' /*35*/ , '$'  /*36*/,
           '%' /*37*/, '&' /*38*/ , '\'' /*39*/,
           '*' /*42*/, '+' /*43*/ , '-'  /*45*/,
           '.' /*46*/, '^' /*94*/ , '_'  /*95*/,
           '`' /*96*/, '|' /*124*/, '~'  /*126*/, 
         };
 
        private static readonly char[]     _forwardSlashSeparator = { '/' };
        
        //Linear White Space characters
        private static readonly char[]     _LinearWhiteSpaceChars = 
         { ' ',  // space           - \x20
           '\n', // new line        - \x0A
           '\r', // carriage return - \x0D
           '\t'  // horizontal tab  - \x09
         };
 
        private static readonly ContentType _emptyContentType = new ContentType("");
 
        #endregion Private Members
    }
}