|
//------------------------------------------------------------------------------
// <copyright file="WebUtility.cs" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//------------------------------------------------------------------------------
// Don't entity encode high chars (160 to 256), to fix bugs VSWhidbey 85857/111927
//
#define ENTITY_ENCODE_HIGH_ASCII_CHARS
namespace System.Net {
using System;
using System.Collections.Generic;
#if !FEATURE_NETCORE
using System.Configuration;
#endif
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.IO;
using System.Net.Configuration;
using System.Runtime.Versioning;
using System.Text;
#if FEATURE_NETCORE
using System.Security;
#endif
public static class WebUtility
{
// some consts copied from Char / CharUnicodeInfo since we don't have friend access to those types
private const char HIGH_SURROGATE_START = '\uD800';
private const char LOW_SURROGATE_START = '\uDC00';
private const char LOW_SURROGATE_END = '\uDFFF';
private const int UNICODE_PLANE00_END = 0x00FFFF;
private const int UNICODE_PLANE01_START = 0x10000;
private const int UNICODE_PLANE16_END = 0x10FFFF;
private const int UnicodeReplacementChar = '\uFFFD';
private static readonly char[] _htmlEntityEndingChars = new char[] { ';', '&' };
private static volatile UnicodeDecodingConformance _htmlDecodeConformance = UnicodeDecodingConformance.Auto;
private static volatile UnicodeEncodingConformance _htmlEncodeConformance = UnicodeEncodingConformance.Auto;
#region HtmlEncode / HtmlDecode methods
public static string HtmlEncode(string value) {
if (String.IsNullOrEmpty(value)) {
return value;
}
// Don't create string writer if we don't have nothing to encode
int index = IndexOfHtmlEncodingChars(value, 0);
if (index == -1) {
return value;
}
StringWriter writer = new StringWriter(CultureInfo.InvariantCulture);
HtmlEncode(value, writer);
return writer.ToString();
}
#if FEATURE_NETCORE
[SecuritySafeCritical]
#endif
public static unsafe void HtmlEncode(string value, TextWriter output) {
if (value == null) {
return;
}
if (output == null) {
throw new ArgumentNullException("output");
}
int index = IndexOfHtmlEncodingChars(value, 0);
if (index == -1) {
output.Write(value);
return;
}
Debug.Assert(0 <= index && index <= value.Length, "0 <= index && index <= value.Length");
UnicodeEncodingConformance encodeConformance = HtmlEncodeConformance;
int cch = value.Length - index;
fixed (char* str = value) {
char* pch = str;
while (index-- > 0) {
output.Write(*pch++);
}
for (; cch > 0; cch--, pch++) {
char ch = *pch;
if (ch <= '>') {
switch (ch) {
case '<':
output.Write("<");
break;
case '>':
output.Write(">");
break;
case '"':
output.Write(""");
break;
case '\'':
output.Write("'");
break;
case '&':
output.Write("&");
break;
default:
output.Write(ch);
break;
}
}
else {
int valueToEncode = -1; // set to >= 0 if needs to be encoded
#if ENTITY_ENCODE_HIGH_ASCII_CHARS
if (ch >= 160 && ch < 256) {
// The seemingly arbitrary 160 comes from RFC
valueToEncode = ch;
} else
#endif // ENTITY_ENCODE_HIGH_ASCII_CHARS
if (encodeConformance == UnicodeEncodingConformance.Strict && Char.IsSurrogate(ch)) {
int scalarValue = GetNextUnicodeScalarValueFromUtf16Surrogate(ref pch, ref cch);
if (scalarValue >= UNICODE_PLANE01_START) {
valueToEncode = scalarValue;
}
else {
// Don't encode BMP characters (like U+FFFD) since they wouldn't have
// been encoded if explicitly present in the string anyway.
ch = (char)scalarValue;
}
}
if (valueToEncode >= 0) {
// value needs to be encoded
output.Write("&#");
output.Write(valueToEncode.ToString(NumberFormatInfo.InvariantInfo));
output.Write(';');
}
else {
// write out the character directly
output.Write(ch);
}
}
}
}
}
public static string HtmlDecode(string value) {
if (String.IsNullOrEmpty(value)) {
return value;
}
// Don't create string writer if we don't have nothing to encode
if (!StringRequiresHtmlDecoding(value)) {
return value;
}
StringWriter writer = new StringWriter(CultureInfo.InvariantCulture);
HtmlDecode(value, writer);
return writer.ToString();
}
[SuppressMessage("Microsoft.Usage", "CA1806:DoNotIgnoreMethodResults", MessageId = "System.UInt16.TryParse(System.String,System.Globalization.NumberStyles,System.IFormatProvider,System.UInt16@)", Justification="UInt16.TryParse guarantees that result is zero if the parse fails.")]
public static void HtmlDecode(string value, TextWriter output) {
if (value == null) {
return;
}
if (output == null) {
throw new ArgumentNullException("output");
}
if (!StringRequiresHtmlDecoding(value)) {
output.Write(value); // good as is
return;
}
UnicodeDecodingConformance decodeConformance = HtmlDecodeConformance;
int l = value.Length;
for (int i = 0; i < l; i++) {
char ch = value[i];
if (ch == '&') {
// We found a '&'. Now look for the next ';' or '&'. The idea is that
// if we find another '&' before finding a ';', then this is not an entity,
// and the next '&' might start a real entity (VSWhidbey 275184)
int index = value.IndexOfAny(_htmlEntityEndingChars, i + 1);
if (index > 0 && value[index] == ';') {
string entity = value.Substring(i + 1, index - i - 1);
if (entity.Length > 1 && entity[0] == '#') {
// The # syntax can be in decimal or hex, e.g.
// å --> decimal
// å --> same char in hex
// See http://www.w3.org/TR/REC-html40/charset.html#entities
bool parsedSuccessfully;
uint parsedValue;
if (entity[1] == 'x' || entity[1] == 'X') {
parsedSuccessfully = UInt32.TryParse(entity.Substring(2), NumberStyles.AllowHexSpecifier, NumberFormatInfo.InvariantInfo, out parsedValue);
}
else {
parsedSuccessfully = UInt32.TryParse(entity.Substring(1), NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out parsedValue);
}
if (parsedSuccessfully) {
switch (decodeConformance) {
case UnicodeDecodingConformance.Strict:
// decoded character must be U+0000 .. U+10FFFF, excluding surrogates
parsedSuccessfully = ((parsedValue < HIGH_SURROGATE_START) || (LOW_SURROGATE_END < parsedValue && parsedValue <= UNICODE_PLANE16_END));
break;
case UnicodeDecodingConformance.Compat:
// decoded character must be U+0001 .. U+FFFF
// null chars disallowed for compat with 4.0
parsedSuccessfully = (0 < parsedValue && parsedValue <= UNICODE_PLANE00_END);
break;
case UnicodeDecodingConformance.Loose:
// decoded character must be U+0000 .. U+10FFFF
parsedSuccessfully = (parsedValue <= UNICODE_PLANE16_END);
break;
default:
Debug.Assert(false, "Should never get here!");
parsedSuccessfully = false;
break;
}
}
if (parsedSuccessfully) {
if (parsedValue <= UNICODE_PLANE00_END) {
// single character
output.Write((char)parsedValue);
}
else {
// multi-character
char leadingSurrogate, trailingSurrogate;
ConvertSmpToUtf16(parsedValue, out leadingSurrogate, out trailingSurrogate);
output.Write(leadingSurrogate);
output.Write(trailingSurrogate);
}
i = index; // already looked at everything until semicolon
continue;
}
}
else {
i = index; // already looked at everything until semicolon
char entityChar = HtmlEntities.Lookup(entity);
if (entityChar != (char)0) {
ch = entityChar;
}
else {
output.Write('&');
output.Write(entity);
output.Write(';');
continue;
}
}
}
}
output.Write(ch);
}
}
#if FEATURE_NETCORE
[SecuritySafeCritical]
#endif
private static unsafe int IndexOfHtmlEncodingChars(string s, int startPos) {
Debug.Assert(0 <= startPos && startPos <= s.Length, "0 <= startPos && startPos <= s.Length");
UnicodeEncodingConformance encodeConformance = HtmlEncodeConformance;
int cch = s.Length - startPos;
fixed (char* str = s) {
for (char* pch = &str[startPos]; cch > 0; pch++, cch--) {
char ch = *pch;
if (ch <= '>') {
switch (ch) {
case '<':
case '>':
case '"':
case '\'':
case '&':
return s.Length - cch;
}
}
#if ENTITY_ENCODE_HIGH_ASCII_CHARS
else if (ch >= 160 && ch < 256) {
return s.Length - cch;
}
#endif // ENTITY_ENCODE_HIGH_ASCII_CHARS
else if (encodeConformance == UnicodeEncodingConformance.Strict && Char.IsSurrogate(ch)) {
return s.Length - cch;
}
}
}
return -1;
}
private static UnicodeDecodingConformance HtmlDecodeConformance {
get {
if (_htmlDecodeConformance != UnicodeDecodingConformance.Auto) {
return _htmlDecodeConformance;
}
UnicodeDecodingConformance defaultDecodeConformance = (BinaryCompatibility.TargetsAtLeast_Desktop_V4_5) ? UnicodeDecodingConformance.Strict : UnicodeDecodingConformance.Compat;
UnicodeDecodingConformance decodingConformance = defaultDecodeConformance;
#if !FEATURE_NETCORE
try {
// Read from config
decodingConformance = SettingsSectionInternal.Section.WebUtilityUnicodeDecodingConformance;
// Normalize conformance settings (turn 'Auto' into the actual setting)
if (decodingConformance <= UnicodeDecodingConformance.Auto || decodingConformance > UnicodeDecodingConformance.Loose) {
decodingConformance = defaultDecodeConformance;
}
}
catch (ConfigurationException) {
// Continue with default values
// HtmlDecode and related methods can still be called and format the error page intended for the client
// No need to retry again to initialize from the config in case of config errors
decodingConformance = defaultDecodeConformance;
}
catch {
// DevDiv: 642025
// ASP.NET uses own ConfigurationManager which can throw in more situations than config errors (i.e. BadRequest)
// It's ok to swallow the exception here and continue using the default value
// Try to initialize again the next time
return defaultDecodeConformance;
}
#endif
_htmlDecodeConformance = decodingConformance;
return _htmlDecodeConformance;
}
}
private static UnicodeEncodingConformance HtmlEncodeConformance {
get {
if (_htmlEncodeConformance != UnicodeEncodingConformance.Auto) {
return _htmlEncodeConformance;
}
UnicodeEncodingConformance defaultEncodeConformance = (BinaryCompatibility.TargetsAtLeast_Desktop_V4_5) ? UnicodeEncodingConformance.Strict : UnicodeEncodingConformance.Compat;
UnicodeEncodingConformance encodingConformance = defaultEncodeConformance;
#if !FEATURE_NETCORE
try {
// Read from config
encodingConformance = SettingsSectionInternal.Section.WebUtilityUnicodeEncodingConformance;
// Normalize conformance settings (turn 'Auto' into the actual setting)
if (encodingConformance <= UnicodeEncodingConformance.Auto || encodingConformance > UnicodeEncodingConformance.Compat) {
encodingConformance = defaultEncodeConformance;
}
}
catch (ConfigurationException) {
// Continue with default values
// HtmlEncode and related methods can still be called and format the error page intended for the client
// No need to retry again to initialize from the config in case of config errors
encodingConformance = defaultEncodeConformance;
}
catch {
// DevDiv: 642025
// ASP.NET uses own ConfigurationManager which can throw in more situations than config errors (i.e. BadRequest)
// It's ok to swallow the exception here and continue using the default value
// Try to initialize again the next time
return defaultEncodeConformance;
}
#endif
_htmlEncodeConformance = encodingConformance;
return _htmlEncodeConformance;
}
}
#endregion
#region UrlEncode implementation
// *** Source: alm/tfs_core/Framework/Common/UriUtility/HttpUtility.cs
// This specific code was copied from above ASP.NET codebase.
private static byte[] UrlEncode(byte[] bytes, int offset, int count, bool alwaysCreateNewReturnValue)
{
byte[] encoded = UrlEncode(bytes, offset, count);
return (alwaysCreateNewReturnValue && (encoded != null) && (encoded == bytes))
? (byte[])encoded.Clone()
: encoded;
}
private static byte[] UrlEncode(byte[] bytes, int offset, int count)
{
if (!ValidateUrlEncodingParameters(bytes, offset, count))
{
return null;
}
int cSpaces = 0;
int cUnsafe = 0;
// count them first
for (int i = 0; i < count; i++)
{
char ch = (char)bytes[offset + i];
if (ch == ' ')
cSpaces++;
else if (!IsUrlSafeChar(ch))
cUnsafe++;
}
// nothing to expand?
if (cSpaces == 0 && cUnsafe == 0) {
// DevDiv 912606: respect "offset" and "count"
if (0 == offset && bytes.Length == count) {
return bytes;
}
else {
var subarray = new byte[count];
Buffer.BlockCopy(bytes, offset, subarray, 0, count);
return subarray;
}
}
// expand not 'safe' characters into %XX, spaces to +s
byte[] expandedBytes = new byte[count + cUnsafe * 2];
int pos = 0;
for (int i = 0; i < count; i++)
{
byte b = bytes[offset + i];
char ch = (char)b;
if (IsUrlSafeChar(ch))
{
expandedBytes[pos++] = b;
}
else if (ch == ' ')
{
expandedBytes[pos++] = (byte)'+';
}
else
{
expandedBytes[pos++] = (byte)'%';
expandedBytes[pos++] = (byte)IntToHex((b >> 4) & 0xf);
expandedBytes[pos++] = (byte)IntToHex(b & 0x0f);
}
}
return expandedBytes;
}
#endregion
#region UrlEncode public methods
[SuppressMessage("Microsoft.Design", "CA1055:UriReturnValuesShouldNotBeStrings", Justification="Already shipped public API; code moved here as part of API consolidation")]
public static string UrlEncode(string value)
{
if (value == null)
return null;
byte[] bytes = Encoding.UTF8.GetBytes(value);
return Encoding.UTF8.GetString(UrlEncode(bytes, 0, bytes.Length, false /* alwaysCreateNewReturnValue */));
}
public static byte[] UrlEncodeToBytes(byte[] value, int offset, int count)
{
return UrlEncode(value, offset, count, true /* alwaysCreateNewReturnValue */);
}
#endregion
#region UrlDecode implementation
// *** Source: alm/tfs_core/Framework/Common/UriUtility/HttpUtility.cs
// This specific code was copied from above ASP.NET codebase.
// Changes done - Removed the logic to handle %Uxxxx as it is not standards compliant.
private static string UrlDecodeInternal(string value, Encoding encoding)
{
if (value == null)
{
return null;
}
int count = value.Length;
UrlDecoder helper = new UrlDecoder(count, encoding);
// go through the string's chars collapsing %XX and
// appending each char as char, with exception of %XX constructs
// that are appended as bytes
for (int pos = 0; pos < count; pos++)
{
char ch = value[pos];
if (ch == '+')
{
ch = ' ';
}
else if (ch == '%' && pos < count - 2)
{
int h1 = HexToInt(value[pos + 1]);
int h2 = HexToInt(value[pos + 2]);
if (h1 >= 0 && h2 >= 0)
{ // valid 2 hex chars
byte b = (byte)((h1 << 4) | h2);
pos += 2;
// don't add as char
helper.AddByte(b);
continue;
}
}
if ((ch & 0xFF80) == 0)
helper.AddByte((byte)ch); // 7 bit have to go as bytes because of Unicode
else
helper.AddChar(ch);
}
return helper.GetString();
}
private static byte[] UrlDecodeInternal(byte[] bytes, int offset, int count)
{
if (!ValidateUrlEncodingParameters(bytes, offset, count))
{
return null;
}
int decodedBytesCount = 0;
byte[] decodedBytes = new byte[count];
for (int i = 0; i < count; i++)
{
int pos = offset + i;
byte b = bytes[pos];
if (b == '+')
{
b = (byte)' ';
}
else if (b == '%' && i < count - 2)
{
int h1 = HexToInt((char)bytes[pos + 1]);
int h2 = HexToInt((char)bytes[pos + 2]);
if (h1 >= 0 && h2 >= 0)
{ // valid 2 hex chars
b = (byte)((h1 << 4) | h2);
i += 2;
}
}
decodedBytes[decodedBytesCount++] = b;
}
if (decodedBytesCount < decodedBytes.Length)
{
byte[] newDecodedBytes = new byte[decodedBytesCount];
Array.Copy(decodedBytes, newDecodedBytes, decodedBytesCount);
decodedBytes = newDecodedBytes;
}
return decodedBytes;
}
#endregion
#region UrlDecode public methods
[SuppressMessage("Microsoft.Design", "CA1055:UriReturnValuesShouldNotBeStrings", Justification="Already shipped public API; code moved here as part of API consolidation")]
public static string UrlDecode(string encodedValue)
{
if (encodedValue == null)
return null;
return UrlDecodeInternal(encodedValue, Encoding.UTF8);
}
public static byte[] UrlDecodeToBytes(byte[] encodedValue, int offset, int count)
{
return UrlDecodeInternal(encodedValue, offset, count);
}
#endregion
#region Helper methods
// similar to Char.ConvertFromUtf32, but doesn't check arguments or generate strings
// input is assumed to be an SMP character
private static void ConvertSmpToUtf16(uint smpChar, out char leadingSurrogate, out char trailingSurrogate) {
Debug.Assert(UNICODE_PLANE01_START <= smpChar && smpChar <= UNICODE_PLANE16_END);
int utf32 = (int)(smpChar - UNICODE_PLANE01_START);
leadingSurrogate = (char)((utf32 / 0x400) + HIGH_SURROGATE_START);
trailingSurrogate = (char)((utf32 % 0x400) + LOW_SURROGATE_START);
}
#if FEATURE_NETCORE
[SecuritySafeCritical]
#endif
private static unsafe int GetNextUnicodeScalarValueFromUtf16Surrogate(ref char* pch, ref int charsRemaining) {
// invariants
Debug.Assert(charsRemaining >= 1);
Debug.Assert(Char.IsSurrogate(*pch));
if (charsRemaining <= 1) {
// not enough characters remaining to resurrect the original scalar value
return UnicodeReplacementChar;
}
char leadingSurrogate = pch[0];
char trailingSurrogate = pch[1];
if (Char.IsSurrogatePair(leadingSurrogate, trailingSurrogate)) {
// we're going to consume an extra char
pch++;
charsRemaining--;
// below code is from Char.ConvertToUtf32, but without the checks (since we just performed them)
return (((leadingSurrogate - HIGH_SURROGATE_START) * 0x400) + (trailingSurrogate - LOW_SURROGATE_START) + UNICODE_PLANE01_START);
}
else {
// unmatched surrogate
return UnicodeReplacementChar;
}
}
private static int HexToInt(char h)
{
return (h >= '0' && h <= '9') ? h - '0' :
(h >= 'a' && h <= 'f') ? h - 'a' + 10 :
(h >= 'A' && h <= 'F') ? h - 'A' + 10 :
-1;
}
private static char IntToHex(int n)
{
Debug.Assert(n < 0x10);
if (n <= 9)
return (char)(n + (int)'0');
else
return (char)(n - 10 + (int)'A');
}
// Set of safe chars, from RFC 1738.4 minus '+'
private static bool IsUrlSafeChar(char ch)
{
if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9')
return true;
switch (ch)
{
case '-':
case '_':
case '.':
case '!':
case '*':
case '(':
case ')':
return true;
}
return false;
}
private static bool ValidateUrlEncodingParameters(byte[] bytes, int offset, int count)
{
if (bytes == null && count == 0)
return false;
if (bytes == null)
{
throw new ArgumentNullException("bytes");
}
if (offset < 0 || offset > bytes.Length)
{
throw new ArgumentOutOfRangeException("offset");
}
if (count < 0 || offset + count > bytes.Length)
{
throw new ArgumentOutOfRangeException("count");
}
return true;
}
private static bool StringRequiresHtmlDecoding(string s) {
if (HtmlDecodeConformance == UnicodeDecodingConformance.Compat) {
// this string requires html decoding only if it contains '&'
return (s.IndexOf('&') >= 0);
}
else {
// this string requires html decoding if it contains '&' or a surrogate character
for (int i = 0; i < s.Length; i++) {
char c = s[i];
if (c == '&' || Char.IsSurrogate(c)) {
return true;
}
}
return false;
}
}
#endregion
#region UrlDecoder nested class
// *** Source: alm/tfs_core/Framework/Common/UriUtility/HttpUtility.cs
// This specific code was copied from above ASP.NET codebase.
// Internal class to facilitate URL decoding -- keeps char buffer and byte buffer, allows appending of either chars or bytes
private class UrlDecoder
{
private int _bufferSize;
// Accumulate characters in a special array
private int _numChars;
private char[] _charBuffer;
// Accumulate bytes for decoding into characters in a special array
private int _numBytes;
private byte[] _byteBuffer;
// Encoding to convert chars to bytes
private Encoding _encoding;
private void FlushBytes()
{
if (_numBytes > 0)
{
_numChars += _encoding.GetChars(_byteBuffer, 0, _numBytes, _charBuffer, _numChars);
_numBytes = 0;
}
}
internal UrlDecoder(int bufferSize, Encoding encoding)
{
_bufferSize = bufferSize;
_encoding = encoding;
_charBuffer = new char[bufferSize];
// byte buffer created on demand
}
internal void AddChar(char ch)
{
if (_numBytes > 0)
FlushBytes();
_charBuffer[_numChars++] = ch;
}
internal void AddByte(byte b)
{
if (_byteBuffer == null)
_byteBuffer = new byte[_bufferSize];
_byteBuffer[_numBytes++] = b;
}
internal String GetString()
{
if (_numBytes > 0)
FlushBytes();
if (_numChars > 0)
return new String(_charBuffer, 0, _numChars);
else
return String.Empty;
}
}
#endregion
#region HtmlEntities nested class
// helper class for lookup of HTML encoding entities
private static class HtmlEntities {
// The list is from http://www.w3.org/TR/REC-html40/sgml/entities.html, except for ', which
// is defined in http://www.w3.org/TR/2008/REC-xml-20081126/#sec-predefined-ent.
private static String[] _entitiesList = new String[] {
"\x0022-quot",
"\x0026-amp",
"\x0027-apos",
"\x003c-lt",
"\x003e-gt",
"\x00a0-nbsp",
"\x00a1-iexcl",
"\x00a2-cent",
"\x00a3-pound",
"\x00a4-curren",
"\x00a5-yen",
"\x00a6-brvbar",
"\x00a7-sect",
"\x00a8-uml",
"\x00a9-copy",
"\x00aa-ordf",
"\x00ab-laquo",
"\x00ac-not",
"\x00ad-shy",
"\x00ae-reg",
"\x00af-macr",
"\x00b0-deg",
"\x00b1-plusmn",
"\x00b2-sup2",
"\x00b3-sup3",
"\x00b4-acute",
"\x00b5-micro",
"\x00b6-para",
"\x00b7-middot",
"\x00b8-cedil",
"\x00b9-sup1",
"\x00ba-ordm",
"\x00bb-raquo",
"\x00bc-frac14",
"\x00bd-frac12",
"\x00be-frac34",
"\x00bf-iquest",
"\x00c0-Agrave",
"\x00c1-Aacute",
"\x00c2-Acirc",
"\x00c3-Atilde",
"\x00c4-Auml",
"\x00c5-Aring",
"\x00c6-AElig",
"\x00c7-Ccedil",
"\x00c8-Egrave",
"\x00c9-Eacute",
"\x00ca-Ecirc",
"\x00cb-Euml",
"\x00cc-Igrave",
"\x00cd-Iacute",
"\x00ce-Icirc",
"\x00cf-Iuml",
"\x00d0-ETH",
"\x00d1-Ntilde",
"\x00d2-Ograve",
"\x00d3-Oacute",
"\x00d4-Ocirc",
"\x00d5-Otilde",
"\x00d6-Ouml",
"\x00d7-times",
"\x00d8-Oslash",
"\x00d9-Ugrave",
"\x00da-Uacute",
"\x00db-Ucirc",
"\x00dc-Uuml",
"\x00dd-Yacute",
"\x00de-THORN",
"\x00df-szlig",
"\x00e0-agrave",
"\x00e1-aacute",
"\x00e2-acirc",
"\x00e3-atilde",
"\x00e4-auml",
"\x00e5-aring",
"\x00e6-aelig",
"\x00e7-ccedil",
"\x00e8-egrave",
"\x00e9-eacute",
"\x00ea-ecirc",
"\x00eb-euml",
"\x00ec-igrave",
"\x00ed-iacute",
"\x00ee-icirc",
"\x00ef-iuml",
"\x00f0-eth",
"\x00f1-ntilde",
"\x00f2-ograve",
"\x00f3-oacute",
"\x00f4-ocirc",
"\x00f5-otilde",
"\x00f6-ouml",
"\x00f7-divide",
"\x00f8-oslash",
"\x00f9-ugrave",
"\x00fa-uacute",
"\x00fb-ucirc",
"\x00fc-uuml",
"\x00fd-yacute",
"\x00fe-thorn",
"\x00ff-yuml",
"\x0152-OElig",
"\x0153-oelig",
"\x0160-Scaron",
"\x0161-scaron",
"\x0178-Yuml",
"\x0192-fnof",
"\x02c6-circ",
"\x02dc-tilde",
"\x0391-Alpha",
"\x0392-Beta",
"\x0393-Gamma",
"\x0394-Delta",
"\x0395-Epsilon",
"\x0396-Zeta",
"\x0397-Eta",
"\x0398-Theta",
"\x0399-Iota",
"\x039a-Kappa",
"\x039b-Lambda",
"\x039c-Mu",
"\x039d-Nu",
"\x039e-Xi",
"\x039f-Omicron",
"\x03a0-Pi",
"\x03a1-Rho",
"\x03a3-Sigma",
"\x03a4-Tau",
"\x03a5-Upsilon",
"\x03a6-Phi",
"\x03a7-Chi",
"\x03a8-Psi",
"\x03a9-Omega",
"\x03b1-alpha",
"\x03b2-beta",
"\x03b3-gamma",
"\x03b4-delta",
"\x03b5-epsilon",
"\x03b6-zeta",
"\x03b7-eta",
"\x03b8-theta",
"\x03b9-iota",
"\x03ba-kappa",
"\x03bb-lambda",
"\x03bc-mu",
"\x03bd-nu",
"\x03be-xi",
"\x03bf-omicron",
"\x03c0-pi",
"\x03c1-rho",
"\x03c2-sigmaf",
"\x03c3-sigma",
"\x03c4-tau",
"\x03c5-upsilon",
"\x03c6-phi",
"\x03c7-chi",
"\x03c8-psi",
"\x03c9-omega",
"\x03d1-thetasym",
"\x03d2-upsih",
"\x03d6-piv",
"\x2002-ensp",
"\x2003-emsp",
"\x2009-thinsp",
"\x200c-zwnj",
"\x200d-zwj",
"\x200e-lrm",
"\x200f-rlm",
"\x2013-ndash",
"\x2014-mdash",
"\x2018-lsquo",
"\x2019-rsquo",
"\x201a-sbquo",
"\x201c-ldquo",
"\x201d-rdquo",
"\x201e-bdquo",
"\x2020-dagger",
"\x2021-Dagger",
"\x2022-bull",
"\x2026-hellip",
"\x2030-permil",
"\x2032-prime",
"\x2033-Prime",
"\x2039-lsaquo",
"\x203a-rsaquo",
"\x203e-oline",
"\x2044-frasl",
"\x20ac-euro",
"\x2111-image",
"\x2118-weierp",
"\x211c-real",
"\x2122-trade",
"\x2135-alefsym",
"\x2190-larr",
"\x2191-uarr",
"\x2192-rarr",
"\x2193-darr",
"\x2194-harr",
"\x21b5-crarr",
"\x21d0-lArr",
"\x21d1-uArr",
"\x21d2-rArr",
"\x21d3-dArr",
"\x21d4-hArr",
"\x2200-forall",
"\x2202-part",
"\x2203-exist",
"\x2205-empty",
"\x2207-nabla",
"\x2208-isin",
"\x2209-notin",
"\x220b-ni",
"\x220f-prod",
"\x2211-sum",
"\x2212-minus",
"\x2217-lowast",
"\x221a-radic",
"\x221d-prop",
"\x221e-infin",
"\x2220-ang",
"\x2227-and",
"\x2228-or",
"\x2229-cap",
"\x222a-cup",
"\x222b-int",
"\x2234-there4",
"\x223c-sim",
"\x2245-cong",
"\x2248-asymp",
"\x2260-ne",
"\x2261-equiv",
"\x2264-le",
"\x2265-ge",
"\x2282-sub",
"\x2283-sup",
"\x2284-nsub",
"\x2286-sube",
"\x2287-supe",
"\x2295-oplus",
"\x2297-otimes",
"\x22a5-perp",
"\x22c5-sdot",
"\x2308-lceil",
"\x2309-rceil",
"\x230a-lfloor",
"\x230b-rfloor",
"\x2329-lang",
"\x232a-rang",
"\x25ca-loz",
"\x2660-spades",
"\x2663-clubs",
"\x2665-hearts",
"\x2666-diams",
};
private static Dictionary<string, char> _lookupTable = GenerateLookupTable();
private static Dictionary<string, char> GenerateLookupTable() {
// e[0] is unicode char, e[1] is '-', e[2+] is entity string
Dictionary<string, char> lookupTable = new Dictionary<string, char>(StringComparer.Ordinal);
foreach (string e in _entitiesList) {
lookupTable.Add(e.Substring(2), e[0]);
}
return lookupTable;
}
public static char Lookup(string entity) {
char theChar;
_lookupTable.TryGetValue(entity, out theChar);
return theChar;
}
}
#endregion
}
}
|