|
////////////////////////////////////////////////////////////////////////////
//
// DateTimeFormatInfoScanner
//
// Scan a specified DateTimeFormatInfo to search for data used in DateTime.Parse()
//
// The data includes:
//
// DateWords: such as "de" used in es-ES (Spanish) LongDatePattern.
// Postfix: such as "ta" used in fi-FI after the month name.
//
// This class is shared among mscorlib.dll and sysglobl.dll.
// Use conditional CULTURE_AND_REGIONINFO_BUILDER_ONLY to differentiate between
// methods for mscorlib.dll and sysglobl.dll.
//
////////////////////////////////////////////////////////////////////////////
namespace System.Globalization
{
using System;
using System.Globalization;
using System.Collections;
using System.Collections.Generic;
using System.Text;
//
// from LocaleEx.txt header
//
//; IFORMATFLAGS
//; Parsing/formatting flags.
internal enum FORMATFLAGS {
None = 0x00000000,
UseGenitiveMonth = 0x00000001,
UseLeapYearMonth = 0x00000002,
UseSpacesInMonthNames = 0x00000004,
UseHebrewParsing = 0x00000008,
UseSpacesInDayNames = 0x00000010, // Has spaces or non-breaking space in the day names.
UseDigitPrefixInTokens = 0x00000020, // Has token starting with numbers.
}
//
// To change in CalendarId you have to do the same change in Calendar.cs
// To do: make the definintion shared between these two files.
//
internal enum CalendarId : ushort
{
GREGORIAN = 1 , // Gregorian (localized) calendar
GREGORIAN_US = 2 , // Gregorian (U.S.) calendar
JAPAN = 3 , // Japanese Emperor Era calendar
/* SSS_WARNINGS_OFF */ TAIWAN = 4 , // Taiwan Era calendar /* SSS_WARNINGS_ON */
KOREA = 5 , // Korean Tangun Era calendar
HIJRI = 6 , // Hijri (Arabic Lunar) calendar
THAI = 7 , // Thai calendar
HEBREW = 8 , // Hebrew (Lunar) calendar
GREGORIAN_ME_FRENCH = 9 , // Gregorian Middle East French calendar
GREGORIAN_ARABIC = 10, // Gregorian Arabic calendar
GREGORIAN_XLIT_ENGLISH = 11, // Gregorian Transliterated English calendar
GREGORIAN_XLIT_FRENCH = 12,
// Note that all calendars after this point are MANAGED ONLY for now.
JULIAN = 13,
JAPANESELUNISOLAR = 14,
CHINESELUNISOLAR = 15,
SAKA = 16, // reserved to match Office but not implemented in our code
LUNAR_ETO_CHN = 17, // reserved to match Office but not implemented in our code
LUNAR_ETO_KOR = 18, // reserved to match Office but not implemented in our code
LUNAR_ETO_ROKUYOU = 19, // reserved to match Office but not implemented in our code
KOREANLUNISOLAR = 20,
TAIWANLUNISOLAR = 21,
PERSIAN = 22,
UMALQURA = 23,
LAST_CALENDAR = 23 // Last calendar ID
}
internal class DateTimeFormatInfoScanner
{
// Special prefix-like flag char in DateWord array.
// Use char in PUA area since we won't be using them in real data.
// The char used to tell a read date word or a month postfix. A month postfix
// is "ta" in the long date pattern like "d. MMMM'ta 'yyyy" for fi-FI.
// In this case, it will be stored as "\xfffeta" in the date word array.
internal const char MonthPostfixChar = '\xe000';
// Add ignorable symbol in a DateWord array.
// hu-HU has:
// shrot date pattern: yyyy. MM. dd.;yyyy-MM-dd;yy-MM-dd
// long date pattern: yyyy. MMMM d.
// Here, "." is the date separator (derived from short date pattern). However,
// "." also appear at the end of long date pattern. In this case, we just
// "." as ignorable symbol so that the DateTime.Parse() state machine will not
// treat the additional date separator at the end of y,m,d pattern as an error
// condition.
internal const char IgnorableSymbolChar = '\xe001';
// Known CJK suffix
internal const String CJKYearSuff = "\u5e74";
internal const String CJKMonthSuff = "\u6708";
internal const String CJKDaySuff = "\u65e5";
internal const String KoreanYearSuff = "\ub144";
internal const String KoreanMonthSuff = "\uc6d4";
internal const String KoreanDaySuff = "\uc77c";
internal const String KoreanHourSuff = "\uc2dc";
internal const String KoreanMinuteSuff = "\ubd84";
internal const String KoreanSecondSuff = "\ucd08";
internal const String CJKHourSuff = "\u6642";
internal const String ChineseHourSuff = "\u65f6";
internal const String CJKMinuteSuff = "\u5206";
internal const String CJKSecondSuff = "\u79d2";
// The collection fo date words & postfix.
internal List<String> m_dateWords = new List<String>();
// Hashtable for the known words.
private static volatile Dictionary<String, String> s_knownWords;
static Dictionary<String, String> KnownWords
{
get
{
if (s_knownWords == null)
{
Dictionary<String, String> temp = new Dictionary<String, String>();
// Add known words into the hash table.
// Skip these special symbols.
temp.Add("/", String.Empty);
temp.Add("-", String.Empty);
temp.Add(".", String.Empty);
// Skip known CJK suffixes.
temp.Add(CJKYearSuff, String.Empty);
temp.Add(CJKMonthSuff, String.Empty);
temp.Add(CJKDaySuff, String.Empty);
temp.Add(KoreanYearSuff, String.Empty);
temp.Add(KoreanMonthSuff, String.Empty);
temp.Add(KoreanDaySuff, String.Empty);
temp.Add(KoreanHourSuff, String.Empty);
temp.Add(KoreanMinuteSuff, String.Empty);
temp.Add(KoreanSecondSuff, String.Empty);
temp.Add(CJKHourSuff, String.Empty);
temp.Add(ChineseHourSuff, String.Empty);
temp.Add(CJKMinuteSuff, String.Empty);
temp.Add(CJKSecondSuff, String.Empty);
s_knownWords = temp;
}
return (s_knownWords);
}
}
////////////////////////////////////////////////////////////////////////////
//
// Parameters:
// pattern: The pattern to be scanned.
// currentIndex: the current index to start the scan.
//
// Returns:
// Return the index with the first character that is a letter, which will
// be the start of a date word.
// Note that the index can be pattern.Length if we reach the end of the string.
//
////////////////////////////////////////////////////////////////////////////
internal static int SkipWhiteSpacesAndNonLetter(String pattern, int currentIndex)
{
while (currentIndex < pattern.Length)
{
char ch = pattern[currentIndex];
if (ch == '\\')
{
// Escaped character. Look ahead one character.
currentIndex++;
if (currentIndex < pattern.Length)
{
ch = pattern[currentIndex];
if (ch == '\'')
{
// Skip the leading single quote. We will
// stop at the first letter.
continue;
}
// Fall thru to check if this is a letter.
} else
{
// End of string
break;
}
}
if (Char.IsLetter(ch) || ch == '\'' || ch == '.')
{
break;
}
// Skip the current char since it is not a letter.
currentIndex++;
}
return (currentIndex);
}
////////////////////////////////////////////////////////////////////////////
//
// A helper to add the found date word or month postfix into ArrayList for date words.
//
// Parameters:
// formatPostfix: What kind of postfix this is.
// Possible values:
// null: This is a regular date word
// "MMMM": month postfix
// word: The date word or postfix to be added.
//
////////////////////////////////////////////////////////////////////////////
internal void AddDateWordOrPostfix(String formatPostfix, String str)
{
if (str.Length > 0)
{
// Some cultures use . like an abbreviation
if (str.Equals("."))
{
AddIgnorableSymbols(".");
return;
}
String words;
if (KnownWords.TryGetValue(str, out words) == false)
{
if (m_dateWords == null)
{
m_dateWords = new List<String>();
}
if (formatPostfix == "MMMM")
{
// Add the word into the ArrayList as "\xfffe" + real month postfix.
String temp = MonthPostfixChar + str;
if (!m_dateWords.Contains(temp))
{
m_dateWords.Add(temp);
}
} else
{
if (!m_dateWords.Contains(str))
{
m_dateWords.Add(str);
}
if (str[str.Length - 1] == '.')
{
// Old version ignore the trialing dot in the date words. Support this as well.
String strWithoutDot = str.Substring(0, str.Length - 1);
if (!m_dateWords.Contains(strWithoutDot))
{
m_dateWords.Add(strWithoutDot);
}
}
}
}
}
}
////////////////////////////////////////////////////////////////////////////
//
// Scan the pattern from the specified index and add the date word/postfix
// when appropriate.
//
// Parameters:
// pattern: The pattern to be scanned.
// index: The starting index to be scanned.
// formatPostfix: The kind of postfix to be scanned.
// Possible values:
// null: This is a regular date word
// "MMMM": month postfix
//
//
////////////////////////////////////////////////////////////////////////////
internal int AddDateWords(String pattern, int index, String formatPostfix)
{
// Skip any whitespaces so we will start from a letter.
int newIndex = SkipWhiteSpacesAndNonLetter(pattern, index);
if (newIndex != index && formatPostfix != null)
{
// There are whitespaces. This will not be a postfix.
formatPostfix = null;
}
index = newIndex;
// This is the first char added into dateWord.
// Skip all non-letter character. We will add the first letter into DateWord.
StringBuilder dateWord = new StringBuilder();
// We assume that date words should start with a letter.
// Skip anything until we see a letter.
while (index < pattern.Length)
{
char ch = pattern[index];
if (ch == '\'')
{
// We have seen the end of quote. Add the word if we do not see it before,
// and break the while loop.
AddDateWordOrPostfix(formatPostfix, dateWord.ToString());
index++;
break;
} else if (ch == '\\')
{
//
// Escaped character. Look ahead one character
//
// Skip escaped backslash.
index++;
if (index < pattern.Length)
{
dateWord.Append(pattern[index]);
index++;
}
} else if (Char.IsWhiteSpace(ch))
{
// Found a whitespace. We have to add the current date word/postfix.
AddDateWordOrPostfix(formatPostfix, dateWord.ToString());
if (formatPostfix != null)
{
// Done with postfix. The rest will be regular date word.
formatPostfix = null;
}
// Reset the dateWord.
dateWord.Length = 0;
index++;
} else
{
dateWord.Append(ch);
index++;
}
}
return (index);
}
////////////////////////////////////////////////////////////////////////////
//
// A simple helper to find the repeat count for a specified char.
//
////////////////////////////////////////////////////////////////////////////
internal static int ScanRepeatChar(String pattern, char ch, int index, out int count)
{
count = 1;
while (++index < pattern.Length && pattern[index] == ch) {
count++;
}
// Return the updated position.
return (index);
}
////////////////////////////////////////////////////////////////////////////
//
// Add the text that is a date separator but is treated like ignroable symbol.
// E.g.
// hu-HU has:
// shrot date pattern: yyyy. MM. dd.;yyyy-MM-dd;yy-MM-dd
// long date pattern: yyyy. MMMM d.
// Here, "." is the date separator (derived from short date pattern). However,
// "." also appear at the end of long date pattern. In this case, we just
// "." as ignorable symbol so that the DateTime.Parse() state machine will not
// treat the additional date separator at the end of y,m,d pattern as an error
// condition.
//
////////////////////////////////////////////////////////////////////////////
internal void AddIgnorableSymbols(String text)
{
if (m_dateWords == null)
{
// Create the date word array.
m_dateWords = new List<String>();
}
// Add the ingorable symbol into the ArrayList.
String temp = IgnorableSymbolChar + text;
if (!m_dateWords.Contains(temp))
{
m_dateWords.Add(temp);
}
}
//
// Flag used to trace the date patterns (yy/yyyyy/M/MM/MMM/MMM/d/dd) that we have seen.
//
enum FoundDatePattern
{
None = 0x0000,
FoundYearPatternFlag = 0x0001,
FoundMonthPatternFlag = 0x0002,
FoundDayPatternFlag = 0x0004,
FoundYMDPatternFlag = 0x0007, // FoundYearPatternFlag | FoundMonthPatternFlag | FoundDayPatternFlag;
}
// Check if we have found all of the year/month/day pattern.
FoundDatePattern m_ymdFlags = FoundDatePattern.None;
////////////////////////////////////////////////////////////////////////////
//
// Given a date format pattern, scan for date word or postfix.
//
// A date word should be always put in a single quoted string. And it will
// start from a letter, so whitespace and symbols will be ignored before
// the first letter.
//
// Examples of date word:
// 'de' in es-SP: dddd, dd' de 'MMMM' de 'yyyy
// "\x0443." in bg-BG: dd.M.yyyy '\x0433.'
//
// Example of postfix:
// month postfix:
// "ta" in fi-FI: d. MMMM'ta 'yyyy
// Currently, only month postfix is supported.
//
// Usage:
// Always call this with Framework-style pattern, instead of Windows style pattern.
// Windows style pattern uses '' for single quote, while .NET uses \'
//
////////////////////////////////////////////////////////////////////////////
internal void ScanDateWord(String pattern)
{
// Check if we have found all of the year/month/day pattern.
m_ymdFlags = FoundDatePattern.None;
int i = 0;
while (i < pattern.Length)
{
char ch = pattern[i];
int chCount;
switch (ch)
{
case '\'':
// Find a beginning quote. Search until the end quote.
i = AddDateWords(pattern, i+1, null);
break;
case 'M':
i = ScanRepeatChar(pattern, 'M', i, out chCount);
if (chCount >= 4)
{
if (i < pattern.Length && pattern[i] == '\'')
{
i = AddDateWords(pattern, i+1, "MMMM");
}
}
m_ymdFlags |= FoundDatePattern.FoundMonthPatternFlag;
break;
case 'y':
i = ScanRepeatChar(pattern, 'y', i, out chCount);
m_ymdFlags |= FoundDatePattern.FoundYearPatternFlag;
break;
case 'd':
i = ScanRepeatChar(pattern, 'd', i, out chCount);
if (chCount <= 2)
{
// Only count "d" & "dd".
// ddd, dddd are day names. Do not count them.
m_ymdFlags |= FoundDatePattern.FoundDayPatternFlag;
}
break;
case '\\':
// Found a escaped char not in a quoted string. Skip the current backslash
// and its next character.
i += 2;
break;
case '.':
if (m_ymdFlags == FoundDatePattern.FoundYMDPatternFlag)
{
// If we find a dot immediately after the we have seen all of the y, m, d pattern.
// treat it as a ignroable symbol. Check for comments in AddIgnorableSymbols for
// more details.
AddIgnorableSymbols(".");
m_ymdFlags = FoundDatePattern.None;
}
i++;
break;
default:
if (m_ymdFlags == FoundDatePattern.FoundYMDPatternFlag && !Char.IsWhiteSpace(ch))
{
// We are not seeing "." after YMD. Clear the flag.
m_ymdFlags = FoundDatePattern.None;
}
// We are not in quote. Skip the current character.
i++;
break;
}
}
}
////////////////////////////////////////////////////////////////////////////
//
// Given a DTFI, get all of the date words from date patterns and time patterns.
//
////////////////////////////////////////////////////////////////////////////
#if FEATURE_CORECLR
[System.Security.SecurityCritical] // auto-generated
#endif
internal String[] GetDateWordsOfDTFI(DateTimeFormatInfo dtfi) {
// Enumarate all LongDatePatterns, and get the DateWords and scan for month postfix.
String[] datePatterns = dtfi.GetAllDateTimePatterns('D');
int i;
// Scan the long date patterns
for (i = 0; i < datePatterns.Length; i++)
{
ScanDateWord(datePatterns[i]);
}
// Scan the short date patterns
datePatterns = dtfi.GetAllDateTimePatterns('d');
for (i = 0; i < datePatterns.Length; i++)
{
ScanDateWord(datePatterns[i]);
}
// Scan the YearMonth patterns.
datePatterns = dtfi.GetAllDateTimePatterns('y');
for (i = 0; i < datePatterns.Length; i++)
{
ScanDateWord(datePatterns[i]);
}
// Scan the month/day pattern
ScanDateWord(dtfi.MonthDayPattern);
// Scan the long time patterns.
datePatterns = dtfi.GetAllDateTimePatterns('T');
for (i = 0; i < datePatterns.Length; i++)
{
ScanDateWord(datePatterns[i]);
}
// Scan the short time patterns.
datePatterns = dtfi.GetAllDateTimePatterns('t');
for (i = 0; i < datePatterns.Length; i++)
{
ScanDateWord(datePatterns[i]);
}
String[] result = null;
if (m_dateWords != null && m_dateWords.Count > 0)
{
result = new String[m_dateWords.Count];
for (i = 0; i < m_dateWords.Count; i++)
{
result[i] = m_dateWords[i];
}
}
return (result);
}
#if ADDITIONAL_DTFI_SCANNER_METHODS
////////////////////////////////////////////////////////////////////////////
//
// Reset the date word ArrayList
//
////////////////////////////////////////////////////////////////////////////
internal void Reset()
{
m_dateWords.RemoveRange(0, m_dateWords.Count);
}
#endif
////////////////////////////////////////////////////////////////////////////
//
// Scan the month names to see if genitive month names are used, and return
// the format flag.
//
////////////////////////////////////////////////////////////////////////////
internal static FORMATFLAGS GetFormatFlagGenitiveMonth(String[] monthNames, String[] genitveMonthNames, String[] abbrevMonthNames, String[] genetiveAbbrevMonthNames)
{
// If we have different names in regular and genitive month names, use genitive month flag.
return ((!EqualStringArrays(monthNames, genitveMonthNames) || !EqualStringArrays(abbrevMonthNames, genetiveAbbrevMonthNames))
? FORMATFLAGS.UseGenitiveMonth: 0);
}
////////////////////////////////////////////////////////////////////////////
//
// Scan the month names to see if spaces are used or start with a digit, and return the format flag
//
////////////////////////////////////////////////////////////////////////////
internal static FORMATFLAGS GetFormatFlagUseSpaceInMonthNames(String[] monthNames, String[] genitveMonthNames, String[] abbrevMonthNames, String[] genetiveAbbrevMonthNames)
{
FORMATFLAGS formatFlags = 0;
formatFlags |= (ArrayElementsBeginWithDigit(monthNames) ||
ArrayElementsBeginWithDigit(genitveMonthNames) ||
ArrayElementsBeginWithDigit(abbrevMonthNames) ||
ArrayElementsBeginWithDigit(genetiveAbbrevMonthNames)
? FORMATFLAGS.UseDigitPrefixInTokens : 0);
formatFlags |= (ArrayElementsHaveSpace(monthNames) ||
ArrayElementsHaveSpace(genitveMonthNames) ||
ArrayElementsHaveSpace(abbrevMonthNames) ||
ArrayElementsHaveSpace(genetiveAbbrevMonthNames)
? FORMATFLAGS.UseSpacesInMonthNames : 0);
return (formatFlags);
}
////////////////////////////////////////////////////////////////////////////
//
// Scan the day names and set the correct format flag.
//
////////////////////////////////////////////////////////////////////////////
internal static FORMATFLAGS GetFormatFlagUseSpaceInDayNames(String[] dayNames, String[] abbrevDayNames)
{
return ((ArrayElementsHaveSpace(dayNames) ||
ArrayElementsHaveSpace(abbrevDayNames))
? FORMATFLAGS.UseSpacesInDayNames : 0);
}
////////////////////////////////////////////////////////////////////////////
//
// Check the calendar to see if it is HebrewCalendar and set the Hebrew format flag if necessary.
//
////////////////////////////////////////////////////////////////////////////
internal static FORMATFLAGS GetFormatFlagUseHebrewCalendar(int calID)
{
return (calID == (int)CalendarId.HEBREW ?
FORMATFLAGS.UseHebrewParsing | FORMATFLAGS.UseLeapYearMonth : 0);
}
//-----------------------------------------------------------------------------
// EqualStringArrays
// compares two string arrays and return true if all elements of the first
// array equals to all elmentsof the second array.
// otherwise it returns false.
//-----------------------------------------------------------------------------
private static bool EqualStringArrays(string [] array1, string [] array2)
{
// Shortcut if they're the same array
if (array1 == array2)
{
return true;
}
// This is effectively impossible
if (array1.Length != array2.Length)
{
return false;
}
// Check each string
for (int i=0; i<array1.Length; i++)
{
if (!array1[i].Equals(array2[i]))
{
return false;
}
}
return true;
}
//-----------------------------------------------------------------------------
// ArrayElementsHaveSpace
// It checks all input array elements if any of them has space character
// returns true if found space character in one of the array elements.
// otherwise returns false.
//-----------------------------------------------------------------------------
private static bool ArrayElementsHaveSpace(string [] array)
{
for (int i=0; i<array.Length; i++)
{
// it is faster to check for space character manually instead of calling IndexOf
// so we don't have to go to native code side.
for (int j=0; j<array[i].Length; j++)
{
if ( Char.IsWhiteSpace(array[i][j]) )
{
return true;
}
}
}
return false;
}
////////////////////////////////////////////////////////////////////////////
//
// Check if any element of the array start with a digit.
//
////////////////////////////////////////////////////////////////////////////
private static bool ArrayElementsBeginWithDigit(string [] array)
{
for (int i=0; i<array.Length; i++)
{
// it is faster to check for space character manually instead of calling IndexOf
// so we don't have to go to native code side.
if (array[i].Length > 0 &&
array[i][0] >= '0' && array[i][0] <= '9')
{
int index = 1;
while (index < array[i].Length && array[i][index] >= '0' && array[i][index] <= '9')
{
// Skip other digits.
index++;
}
if (index == array[i].Length)
{
return (false);
}
if (index == array[i].Length - 1)
{
// Skip known CJK month suffix.
// CJK uses month name like "1\x6708", since \x6708 is a known month suffix,
// we don't need the UseDigitPrefixInTokens since it is slower.
switch (array[i][index])
{
case '\x6708': // CJKMonthSuff
case '\xc6d4': // KoreanMonthSuff
return (false);
}
}
if (index == array[i].Length - 4)
{
// Skip known CJK month suffix.
// Starting with Windows 8, the CJK months for some cultures looks like: "1' \x6708'"
// instead of just "1\x6708"
if(array[i][index] == '\'' && array[i][index + 1] == ' ' &&
array[i][index + 2] == '\x6708' && array[i][index + 3] == '\'')
{
return (false);
}
}
return (true);
}
}
return false;
}
}
}
|