using System; using System.Runtime.InteropServices; using System.Text; using Vanara.Extensions; using Vanara.InteropServices; namespace Vanara.PInvoke { public static partial class Kernel32 { /// The system default Windows ANSI code page. public const uint CP_ACP = 0; /// The current system Macintosh code page. public const uint CP_MACCP = 2; /// The current system OEM code page. public const uint CP_OEMCP = 1; /// Symbol code page (42). public const uint CP_SYMBOL = 42; /// The Windows ANSI code page for the current thread. public const uint CP_THREAD_ACP = 3; /// UTF-7. Use this value only when forced by a 7-bit transport mechanism. Use of UTF-8 is preferred. public const uint CP_UTF7 = 65000; /// UTF-8. public const uint CP_UTF8 = 65001; /// /// The string indicated by lpString1 is equivalent in lexical value to the string indicated by lpString2. The two strings are /// equivalent for sorting purposes, although not necessarily identical. /// public const uint CSTR_EQUAL = 2; /// The string indicated by lpString1 is greater in lexical value than the string indicated by lpString2. public const uint CSTR_GREATER_THAN = 3; /// The string indicated by lpString1 is less in lexical value than the string indicated by lpString2. public const uint CSTR_LESS_THAN = 1; /// Flags used by CompareString and CompareStringEx public enum COMPARE_STRING { /// Ignore case. For many scripts (notably Latin scripts), NORM_IGNORECASE coincides with LINGUISTIC_IGNORECASE. NORM_IGNORECASE = 1, /// Ignore nonspacing characters. For many scripts (notably Latin scripts), NORM_IGNORENONSPACE coincides with LINGUISTIC_IGNOREDIACRITIC. NORM_IGNORENONSPACE = 2, /// Ignore symbols and punctuation. NORM_IGNORESYMBOLS = 4, /// /// Do not differentiate between hiragana and katakana characters. Corresponding hiragana and katakana characters compare as equal. /// NORM_IGNOREKANATYPE = 65536, /// /// Ignore the difference between half-width and full-width characters, for example, C a t == cat. The full-width form is a /// formatting distinction used in Chinese and Japanese scripts. /// NORM_IGNOREWIDTH = 131072, /// Ignore case, as linguistically appropriate. LINGUISTIC_IGNORECASE = 16, /// Ignore nonspacing characters, as linguistically appropriate. LINGUISTIC_IGNOREDIACRITIC = 32, /// /// Use the default linguistic rules for casing, instead of file system rules. Note that most scenarios for CompareStringEx use /// this flag. This flag does not have to be used when your application calls CompareStringOrdinal. /// NORM_LINGUISTIC_CASING = 134217728, /// Treat punctuation the same as symbols. SORT_STRINGSORT = 0x00001000, /// Windows 7: Treat digits as numbers during sorting, for example, sort &quot;2&quot; before &quot;10&quot;. SORT_DIGITSASNUMBERS = 8 } /// Flags indicating the conversion type. [Flags] public enum MBCONV { /// /// Default; do not use with MB_COMPOSITE. Always use precomposed characters, that is, characters having a single character value /// for a base or nonspacing character combination. For example, in the character &#232;, the e is the base character and the /// accent grave mark is the nonspacing character. If a single Unicode code point is defined for a character, the application /// should use it instead of a separate base character and a nonspacing character. For example, &#196; is represented by the /// single Unicode code point LATIN CAPITAL LETTER A WITH DIAERESIS (U+00C4). /// MB_PRECOMPOSED = 0x00000001, /// /// Always use decomposed characters, that is, characters in which a base character and one or more nonspacing characters each /// have distinct code point values. For example, &#196; is represented by A + &#168;: LATIN CAPITAL LETTER A (U+0041) + /// COMBINING DIAERESIS (U+0308). Note that this flag cannot be used with MB_PRECOMPOSED. /// MB_COMPOSITE = 0x00000002, /// Use glyph characters instead of control characters. MB_USEGLYPHCHARS = 0x00000004, /// /// Fail if an invalid input character is encountered. Starting with Windows Vista, the function does not drop illegal code /// points if the application does not set this flag, but instead replaces illegal sequences with U+FFFD (encoded as appropriate /// for the specified codepage).Windows 2000 with SP4 and later, Windows XP: If this flag is not set, the function silently drops /// illegal code points. A call to GetLastError returns ERROR_NO_UNICODE_TRANSLATION. /// MB_ERR_INVALID_CHARS = 0x00000008, } /// Flags specifying the type of transformation to use during string mapping. [PInvokeData("Winnls.h")] [Flags] public enum STRING_MAPPING { /// /// Fold compatibility zone characters into standard Unicode equivalents. This flag is equivalent to normalization form KD in /// Windows Vista, if the MAP_COMPOSITE flag is also set. If the composite flag is not set (default), this flag is equivalent to /// normalization form KC in Windows Vista. /// MAP_FOLDCZONE = 16, /// /// Map accented characters to precomposed characters, in which the accent and base character are combined into a single /// character value. This flag is equivalent to normalization form C in Windows Vista. This value cannot be combined with MAP_COMPOSITE. /// MAP_PRECOMPOSED = 32, /// /// Map accented characters to decomposed characters, that is, characters in which a base character and one or more nonspacing /// characters each have distinct code point values. For example, &#196; is represented by A + &#168;: LATIN CAPITAL /// LETTER A (U+0041) + COMBINING DIAERESIS (U+0308). This flag is equivalent to normalization form D in Windows Vista. Note that /// this flag cannot be used with MB_PRECOMPOSED. /// MAP_COMPOSITE = 64, /// Map all digits to Unicode characters 0 through 9. MAP_FOLDDIGITS = 128, /// /// Expand all ligature characters so that they are represented by their two-character equivalent. For example, the ligature /// &quot;&#230;&quot; (U+00e6) expands to the two characters &quot;a&quot; (U+0061) + &quot;e&quot; /// (U+0065). This value cannot be combined with MAP_PRECOMPOSED or MAP_COMPOSITE. /// MAP_EXPAND_LIGATURES = 8192, } /// Flags indicating the conversion type. [Flags] public enum WCCONV { /// /// Convert composite characters, consisting of a base character and a nonspacing character, each with different character /// values. Translate these characters to precomposed characters, which have a single character value for a base-nonspacing /// character combination. For example, in the character &#232;, the e is the base character and the accent grave mark is the /// nonspacing character.Your application can combine WC_COMPOSITECHECK with any one of the following flags, with the default /// being WC_SEPCHARS. These flags determine the behavior of the function when no precomposed mapping for a base-nonspacing /// character combination in a Unicode string is available. If none of these flags is supplied, the function behaves as if the /// WC_SEPCHARS flag is set. For more information, see WC_COMPOSITECHECK and related flags in the Remarks section. /// WC_COMPOSITECHECK = 0x00000200, /// Discard nonspacing characters during conversion. WC_DISCARDNS = 0x00000010, /// Default. Generate separate characters during conversion. WC_SEPCHARS = 0x00000020, /// Replace exceptions with the default character during conversion. WC_DEFAULTCHAR = 0x00000040, /// /// Windows Vista and later: Fail (by returning 0 and setting the last-error code to ERROR_NO_UNICODE_TRANSLATION) if an invalid /// input character is encountered. You can retrieve the last-error code with a call to GetLastError. If this flag is not set, /// the function replaces illegal sequences with U+FFFD (encoded as appropriate for the specified codepage) and succeeds by /// returning the length of the converted string. Note that this flag only applies when CodePage is specified as CP_UTF8 or /// 54936. It cannot be used with other code page values. /// WC_ERR_INVALID_CHARS = 0x00000080, /// /// Translate any Unicode characters that do not translate directly to multibyte equivalents to the default character specified /// by lpDefaultChar. In other words, if translating from Unicode to multibyte and back to Unicode again does not yield the same /// Unicode character, the function uses the default character. This flag can be used by itself or in combination with the other /// defined flags.For strings that require validation, such as file, resource, and user names, the application should always use /// the WC_NO_BEST_FIT_CHARS flag. This flag prevents the function from mapping characters to characters that appear similar but /// have very different semantics. In some cases, the semantic change can be extreme. For example, the symbol for /// &quot;∞&quot; (infinity) maps to 8 (eight) in some code pages. /// WC_NO_BEST_FIT_CHARS = 0x00000400, } /// Compares two character strings, for a locale specified by identifier. /// /// Locale identifier of the locale used for the comparison. You can use the MAKELCID macro to create a locale identifier or /// use one of the following predefined values. /// /// /// Flags that indicate how the function compares the two strings. For detailed definitions, see the dwCmpFlags parameter of CompareStringEx. /// /// Pointer to the first string to compare. /// /// Length of the string indicated by lpString1, excluding the terminating null character. This value represents bytes for the ANSI /// version of the function and wide characters for the Unicode version. The application can supply a negative value if the string is /// null-terminated. In this case, the function determines the length automatically. /// /// Pointer to the second string to compare. /// /// Length of the string indicated by lpString2, excluding the terminating null character. This value represents bytes for the ANSI /// version of the function and wide characters for the Unicode version. The application can supply a negative value if the string is /// null-terminated. In this case, the function determines the length automatically. /// /// Returns the values described for CompareStringEx. // int CompareString( _In_ LCID Locale, _In_ DWORD dwCmpFlags, _In_ LPCTSTR lpString1, _In_ int cchCount1, _In_ LPCTSTR lpString2, // _In_ int cchCount2); https://msdn.microsoft.com/en-us/library/windows/desktop/dd317759(v=vs.85).aspx [DllImport(Lib.Kernel32, SetLastError = false, CharSet = CharSet.Auto)] [PInvokeData("Winnls.h", MSDNShortId = "dd317759")] public static extern int CompareString(LCID Locale, COMPARE_STRING dwCmpFlags, string lpString1, int cchCount1, string lpString2, int cchCount2); /// Compares two Unicode (wide character) strings, for a locale specified by name. /// Pointer to a locale name, or one of the following predefined values. /// /// /// Flags that indicate how the function compares the two strings. By default, these flags are not set. This parameter can specify a /// combination of any of the following values, or it can be set to 0 to obtain the default behavior. /// /// /// /// /// Flag /// Meaning /// /// /// LINGUISTIC_IGNORECASE /// Ignore case, as linguistically appropriate. /// /// /// LINGUISTIC_IGNOREDIACRITIC /// Ignore nonspacing characters, as linguistically appropriate. /// /// /// NORM_IGNORECASE /// Ignore case. For many scripts (notably Latin scripts), NORM_IGNORECASE coincides with LINGUISTIC_IGNORECASE. /// /// /// NORM_IGNOREKANATYPE /// /// Do not differentiate between hiragana and katakana characters. Corresponding hiragana and katakana characters compare as equal. /// /// /// /// NORM_IGNORENONSPACE /// Ignore nonspacing characters. For many scripts (notably Latin scripts), NORM_IGNORENONSPACE coincides with LINGUISTIC_IGNOREDIACRITIC. /// /// /// NORM_IGNORESYMBOLS /// Ignore symbols and punctuation. /// /// /// NORM_IGNOREWIDTH /// /// Ignore the difference between half-width and full-width characters, for example, C a t == cat. The full-width form is a /// formatting distinction used in Chinese and Japanese scripts. /// /// /// /// NORM_LINGUISTIC_CASING /// /// Use the default linguistic rules for casing, instead of file system rules. Note that most scenarios for CompareStringEx use this /// flag. This flag does not have to be used when your application calls CompareStringOrdinal. /// /// /// /// SORT_DIGITSASNUMBERS /// Windows 7: Treat digits as numbers during sorting, for example, sort &quot;2&quot; before &quot;10&quot;. /// /// /// SORT_STRINGSORT /// Treat punctuation the same as symbols. /// /// /// /// /// Pointer to the first string to compare. /// /// Length of the string indicated by lpString1, excluding the terminating null character. The application can supply a negative /// value if the string is null-terminated. In this case, the function determines the length automatically. /// /// Pointer to the second string to compare. /// /// Length of the string indicated by lpString2, excluding the terminating null character. The application can supply a negative /// value if the string is null-terminated. In this case, the function determines the length automatically. /// /// /// /// Pointer to an NLSVERSIONINFOEX structure that contains the version information about the relevant NLS capability; usually /// retrieved from GetNLSVersionEx. /// /// Windows Vista, Windows 7: Reserved; must set to NULL. /// /// Reserved; must set to NULL. /// Reserved; must be set to 0. /// /// /// Returns one of the following values if successful. To maintain the C runtime convention of comparing strings, the value 2 can be /// subtracted from a nonzero return value. Then, the meaning of <0, ==0, and >0 is consistent with the C runtime. /// /// /// The function returns 0 if it does not succeed. To get extended error information, the application can call GetLastError, /// which can return one of the following error codes: /// /// // int CompareStringEx( _In_opt_ LPCWSTR lpLocaleName, _In_ DWORD dwCmpFlags, _In_ LPCWSTR lpString1, _In_ int cchCount1, _In_ // LPCWSTR lpString2, _In_ int cchCount2, _In_opt_ LPNLSVERSIONINFO lpVersionInformation, _In_opt_ LPVOID lpReserved, _In_opt_ LPARAM // lParam); https://msdn.microsoft.com/en-us/library/windows/desktop/dd317761(v=vs.85).aspx [DllImport(Lib.Kernel32, SetLastError = true, ExactSpelling = true, CharSet = CharSet.Unicode)] [PInvokeData("Stringapiset.h", MSDNShortId = "dd317761")] public static extern int CompareStringEx(string lpLocaleName, COMPARE_STRING dwCmpFlags, string lpString1, int cchCount1, string lpString2, int cchCount2, [Optional] IntPtr lpVersionInformation, [Optional] IntPtr lpReserved, [Optional] IntPtr lParam); /// Compares two Unicode strings to test binary equivalence. /// Pointer to the first string to compare. /// /// Length of the string indicated by lpString1. The application supplies -1 if the string is null-terminated. In this case, the /// function determines the length automatically. /// /// Pointer to the second string to compare. /// /// Length of the string indicated by lpString2. The application supplies -1 if the string is null-terminated. In this case, the /// function determines the length automatically. /// /// /// TRUE if the function is to perform a case-insensitive comparison, using the operating system uppercase table information. /// The application sets this parameter to FALSE if the function is to compare the strings exactly as they are passed in. /// /// /// /// Returns one of the following values if successful. To maintain the C runtime convention of comparing strings, the value 2 can be /// subtracted from a nonzero return value. Then, the meaning of <0, ==0, and >0 is consistent with the C runtime. /// /// /// The function returns 0 if it does not succeed. To get extended error information, the application can call GetLastError, /// which can return one of the following error codes: /// /// // int CompareStringOrdinal( _In_ LPCWSTR lpString1, _In_ int cchCount1, _In_ LPCWSTR lpString2, _In_ int cchCount2, _In_ BOOL // bIgnoreCase); https://msdn.microsoft.com/en-us/library/windows/desktop/dd317762(v=vs.85).aspx [DllImport(Lib.Kernel32, SetLastError = true, ExactSpelling = true, CharSet = CharSet.Unicode)] [PInvokeData("Stringapiset.h", MSDNShortId = "dd317762")] public static extern int CompareStringOrdinal(string lpString1, int cchCount1, string lpString2, int cchCount2, [MarshalAs(UnmanagedType.Bool)] bool bIgnoreCase); /// /// Maps one Unicode string to another, performing the specified transformation. For an overview of the use of the string functions, /// see Strings. /// /// /// /// Flags specifying the type of transformation to use during string mapping. This parameter can be a combination of the following values. /// /// /// /// /// Flag /// Meaning /// /// /// MAP_COMPOSITE /// /// Map accented characters to decomposed characters, that is, characters in which a base character and one or more nonspacing /// characters each have distinct code point values. For example, &#196; is represented by A + &#168;: LATIN CAPITAL LETTER A /// (U+0041) + COMBINING DIAERESIS (U+0308). This flag is equivalent to normalization form D in Windows Vista. Note that this flag /// cannot be used with MB_PRECOMPOSED. /// /// /// /// MAP_EXPAND_LIGATURES /// /// Expand all ligature characters so that they are represented by their two-character equivalent. For example, the ligature /// &quot;&#230;&quot; (U+00e6) expands to the two characters &quot;a&quot; (U+0061) + &quot;e&quot; /// (U+0065). This value cannot be combined with MAP_PRECOMPOSED or MAP_COMPOSITE. /// /// /// /// MAP_FOLDCZONE /// /// Fold compatibility zone characters into standard Unicode equivalents. This flag is equivalent to normalization form KD in Windows /// Vista, if the MAP_COMPOSITE flag is also set. If the composite flag is not set (default), this flag is equivalent to /// normalization form KC in Windows Vista. /// /// /// /// MAP_FOLDDIGITS /// Map all digits to Unicode characters 0 through 9. /// /// /// MAP_PRECOMPOSED /// /// Map accented characters to precomposed characters, in which the accent and base character are combined into a single character /// value. This flag is equivalent to normalization form C in Windows Vista. This value cannot be combined with MAP_COMPOSITE. /// /// /// /// /// /// Pointer to a source string that the function maps. /// /// Size, in characters, of the source string indicated by lpSrcStr, excluding the terminating null character. The application can /// set the parameter to any negative value to specify that the source string is null-terminated. In this case, the function /// calculates the string length automatically, and null-terminates the mapped string indicated by lpDestStr. /// /// Pointer to a buffer in which this function retrieves the mapped string. /// /// /// Size, in characters, of the destination string indicated by lpDestStr. If space for a terminating null character is included in /// cchSrc, cchDest must also include space for a terminating null character. /// /// /// The application can set cchDest to 0. In this case, the function does not use the lpDestStr parameter and returns the required /// buffer size for the mapped string. If the MAP_FOLDDIGITS flag is specified, the return value is the maximum size required, even /// if the actual number of characters needed is smaller than the maximum size. If the maximum size is not passed, the function fails /// with ERROR_INSUFFICIENT_BUFFER. /// /// /// /// /// Returns the number of characters in the translated string, including a terminating null character, if successful. If the function /// succeeds and the value of cchDest is 0, the return value is the size of the buffer required to hold the translated string, /// including a terminating null character. /// /// /// This function returns 0 if it does not succeed. To get extended error information, the application can call GetLastError, /// which can return one of the following error codes: /// /// // int FoldString( _In_ DWORD dwMapFlags, _In_ LPCTSTR lpSrcStr, _In_ int cchSrc, _Out_opt_ LPTSTR lpDestStr, _In_ int cchDest); https://msdn.microsoft.com/en-us/library/windows/desktop/dd318063(v=vs.85).aspx [DllImport(Lib.Kernel32, SetLastError = true, CharSet = CharSet.Auto)] [PInvokeData("Winnls.h", MSDNShortId = "dd318063")] public static extern int FoldString(STRING_MAPPING dwMapFlags, string lpSrcStr, int cchSrc, StringBuilder lpDestStr, int cchDest); /// /// Deprecated. Retrieves character type information for the characters in the specified source string. For each character in the /// string, the function sets one or more bits in the corresponding 16-bit element of the output array. Each bit identifies a given /// character type, for example, letter, digit, or neither. /// /// /// /// Locale identifier that specifies the locale. You can use the MAKELCID macro to create a locale identifier or use one of /// the following predefined values. /// /// Windows Vista and later: The following custom locale identifiers are also supported. /// /// /// Flags specifying the character type information to retrieve. For possible flag values, see the dwInfoType parameter of /// GetStringTypeW. For detailed information about the character type bits, see Remarks for GetStringTypeW. /// /// /// Pointer to the ANSI string for which to retrieve the character types. The string can be a double-byte character set (DBCS) string /// if the supplied locale is appropriate for DBCS. The string is assumed to be null-terminated if cchSrc is set to any negative value. /// /// /// Size, in characters, of the string indicated by lpSrcStr. If the size includes a terminating null character, the function /// retrieves character type information for that character. If the application sets the size to any negative integer, the source /// string is assumed to be null-terminated and the function calculates the size automatically with an additional character for the /// null termination. /// /// /// Pointer to an array of 16-bit values. The length of this array must be large enough to receive one 16-bit value for each /// character in the source string. If cchSrc is not a negative number, lpCharType should be an array of words with cchSrc elements. /// If cchSrc is set to a negative number, lpCharType is an array of words with lpSrcStr + 1 elements. When the function returns, /// this array contains one word corresponding to each character in the source string. /// /// /// Returns a nonzero value if successful, or 0 otherwise. To get extended error information, the application can call /// GetLastError, which can return one of the following error codes: /// // BOOL GetStringTypeA( _In_ LCID Locale, _In_ DWORD dwInfoType, _In_ LPCSTR lpSrcStr, _In_ int cchSrc, _Out_ LPWORD lpCharType); https://msdn.microsoft.com/en-us/library/windows/desktop/dd318117(v=vs.85).aspx [DllImport(Lib.Kernel32, SetLastError = true, ExactSpelling = true, CharSet = CharSet.Ansi)] [PInvokeData("Winnls.h", MSDNShortId = "dd318117")] [return: MarshalAs(UnmanagedType.Bool)] public static extern bool GetStringTypeA(LCID Locale, CHAR_TYPE_INFO dwInfoType, string lpSrcStr, int cchSrc, [In, Out, MarshalAs(UnmanagedType.LPArray)] ushort[] lpCharType); /// /// /// Retrieves character type information for the characters in the specified source string. For each character in the string, the /// function sets one or more bits in the corresponding 16-bit element of the output array. Each bit identifies a given character /// type, for example, letter, digit, or neither. /// /// /// Caution Using the GetStringTypeEx function incorrectly can compromise the security of your application. To avoid a /// buffer overflow, the application must set the output buffer size correctly. For more security information, see Security /// Considerations: Windows User Interface. /// /// /// Note Unlike its close relatives GetStringTypeA and GetStringTypeW, this function exhibits appropriate ANSI or Unicode /// behavior through the use of the #define UNICODE switch. This is the recommended function for character type retrieval. /// /// /// /// /// Locale identifier that specifies the locale. This value uniquely defines the ANSI code page. You can use the MAKELCID macro to /// create a locale identifier or use one of the following predefined values. /// /// /// /// LOCALE_SYSTEM_DEFAULT /// /// /// LOCALE_USER_DEFAULT /// /// /// Windows Vista and later: /// The following custom locale identifiers are also supported. /// /// /// LOCALE_CUSTOM_DEFAULT /// /// /// LOCALE_CUSTOM_UI_DEFAULT /// /// /// LOCALE_CUSTOM_UNSPECIFIED /// /// /// /// /// Flags specifying the character type information to retrieve. For possible flag values, see the dwInfoType parameter of /// GetStringTypeW. For detailed information about the character type bits, see Remarks for GetStringTypeW. /// /// /// Pointer to the string for which to retrieve the character types. The string is assumed to be null-terminated if cchSrc is set to /// any negative value. /// /// /// Size, in characters, of the string indicated by lpSrcStr. The size refers to bytes for the ANSI version of the function or wide /// characters for the Unicode version. If the size includes a terminating null character, the function retrieves character type /// information for that character. If the application sets the size to any negative integer, the source string is assumed to be /// null-terminated and the function calculates the size automatically with an additional character for the null termination. /// /// /// Pointer to an array of 16-bit values. The length of this array must be large enough to receive one 16-bit value for each /// character in the source string. If cchSrc is not a negative number, lpCharType should be an array of words with cchSrc elements. /// If cchSrc is set to a negative number, lpCharType is an array of words with lpSrcStr + 1 elements. When the function returns, /// this array contains one word corresponding to each character in the source string. /// /// /// /// Returns a nonzero value if successful, or 0 otherwise. To get extended error information, the application can call GetLastError, /// which can return one of the following error codes: /// /// /// /// ERROR_INVALID_FLAGS. The values supplied for flags were not valid. /// /// /// ERROR_INVALID_PARAMETER. Any of the parameter values was invalid. /// /// /// /// /// For an overview of the use of the string functions, see Strings. /// /// Using the ANSI code page for the supplied locale, this function translates the source string from ANSI to Unicode. It then /// analyzes each Unicode character for character type information. /// /// /// The ANSI version of this function converts the source string to Unicode and calls the corresponding GetStringTypeW function. Thus /// the words in the output buffer correspond not to the original ANSI string but to its Unicode equivalent. The conversion from ANSI /// to Unicode can result in a change in string length, for example, a pair of ANSI characters can map to a single Unicode character. /// Therefore, the correspondence between the words in the output buffer and the characters in the original ANSI string is not /// one-to-one in all cases, for example, multibyte strings. Thus, the ANSI version of this function is of limited use for /// multi-character strings. The Unicode version of the function is recommended instead. /// /// /// This function circumvents a limitation caused by the difference in parameters between GetStringTypeA and GetStringTypeW. Because /// of the parameter difference, an application cannot automatically invoke the proper ANSI or Unicode version of a /// GetStringType* function through the use of the #define UNICODE switch. On the other hand, GetStringTypeEx, behaves /// properly with regard to that switch. Thus it is the recommended function. /// /// /// When the ANSI version of this function is used with a Unicode-only locale identifier, the function can succeed because the /// operating system uses the system code page. However, characters that are undefined in the system code page appear in the string /// as a question mark (?). /// /// /// The values of the lpSrcStr and lpCharType parameters must not be the same. If they are the same, the function fails with ERROR_INVALID_PARAMETER. /// /// /// The Locale parameter is only used to perform string conversion to Unicode. It has nothing to do with the CTYPE* values supplied /// by the application. These values are solely determined by Unicode code points, and do not vary on a locale basis. For example, /// Greek letters are specified as C1_ALPHA for any value of Locale. /// /// // https://docs.microsoft.com/en-us/windows/win32/api/winnls/nf-winnls-getstringtypeexa BOOL GetStringTypeExA( LCID Locale, DWORD // dwInfoType, LPCSTR lpSrcStr, int cchSrc, LPWORD lpCharType ); [DllImport(Lib.Kernel32, SetLastError = true, CharSet = CharSet.Auto)] [PInvokeData("winnls.h", MSDNShortId = "e0cd051f-6627-457a-9a83-d71de607f67f")] [return: MarshalAs(UnmanagedType.Bool)] public static extern bool GetStringTypeEx(LCID Locale, CHAR_TYPE_INFO dwInfoType, string lpSrcStr, int cchSrc, [In, Out, MarshalAs(UnmanagedType.LPArray)] ushort[] lpCharType); /// /// /// Retrieves character type information for the characters in the specified source string. For each character in the string, the /// function sets one or more bits in the corresponding 16-bit element of the output array. Each bit identifies a given character /// type, for example, letter, digit, or neither. /// /// /// Caution Using the GetStringTypeEx function incorrectly can compromise the security of your application. To avoid a /// buffer overflow, the application must set the output buffer size correctly. For more security information, see Security /// Considerations: Windows User Interface. /// /// /// Note Unlike its close relatives GetStringTypeA and GetStringTypeW, this function exhibits appropriate ANSI or Unicode /// behavior through the use of the #define UNICODE switch. This is the recommended function for character type retrieval. /// /// /// The return type requested. This must be one of the CtypeX enumerated types. /// /// Pointer to the string for which to retrieve the character types. The string is assumed to be null-terminated if cchSrc is set to /// any negative value. /// /// /// /// Locale identifier that specifies the locale. This value uniquely defines the ANSI code page. You can use the MAKELCID macro to /// create a locale identifier or use one of the following predefined values. /// /// /// /// LOCALE_SYSTEM_DEFAULT /// /// /// LOCALE_USER_DEFAULT /// /// /// Windows Vista and later: /// The following custom locale identifiers are also supported. /// /// /// LOCALE_CUSTOM_DEFAULT /// /// /// LOCALE_CUSTOM_UI_DEFAULT /// /// /// LOCALE_CUSTOM_UNSPECIFIED /// /// /// /// /// An array of 16-bit values. The length of this array must be large enough to receive one 16-bit value for each character in the /// source string. If cchSrc is not a negative number, lpCharType should be an array of words with cchSrc elements. If cchSrc is set /// to a negative number, lpCharType is an array of words with lpSrcStr + 1 elements. When the function returns, this array contains /// one word corresponding to each character in the source string. /// /// /// lpSrcStr [PInvokeData("winnls.h", MSDNShortId = "e0cd051f-6627-457a-9a83-d71de607f67f")] public static TCtype[] GetStringTypeEx(string lpSrcStr, LCID Locale) where TCtype : unmanaged, System.Enum { if (!CorrespondingTypeAttribute.CanGet(out var ct)) throw new ArgumentException($"{nameof(TCtype)} must be one of the CtypeX enumerated types."); if (string.IsNullOrEmpty(lpSrcStr)) throw new ArgumentNullException(nameof(lpSrcStr)); var ctVals = new ushort[lpSrcStr.Length + 1]; return GetStringTypeEx(Locale, ct, lpSrcStr, -1, ctVals) ? Array.ConvertAll(ctVals, v => v.ToEnum()) : throw Win32Error.GetLastError().GetException(); } /// /// Retrieves character type information for the characters in the specified Unicode source string. For each character in the string, /// the function sets one or more bits in the corresponding 16-bit element of the output array. Each bit identifies a given character /// type, for example, letter, digit, or neither. /// /// /// /// Flags specifying the character type information to retrieve. This parameter can have the following values. The character types /// are divided into different levels as described in the Remarks section. /// /// /// /// /// Flag /// Meaning /// /// /// CT_CTYPE1 /// Retrieve character type information. /// /// /// CT_CTYPE2 /// Retrieve bidirectional layout information. /// /// /// CT_CTYPE3 /// Retrieve text processing information. /// /// /// /// /// /// Pointer to the Unicode string for which to retrieve the character types. The string is assumed to be null-terminated if cchSrc is /// set to any negative value. /// /// /// Size, in characters, of the string indicated by lpSrcStr. If the size includes a terminating null character, the function /// retrieves character type information for that character. If the application sets the size to any negative integer, the source /// string is assumed to be null-terminated and the function calculates the size automatically with an additional character for the /// null termination. /// /// /// Pointer to an array of 16-bit values. The length of this array must be large enough to receive one 16-bit value for each /// character in the source string. If cchSrc is not a negative number, lpCharType should be an array of words with cchSrc elements. /// If cchSrc is set to a negative number, lpCharType is an array of words with lpSrcStr + 1 elements. When the function returns, /// this array contains one word corresponding to each character in the source string. /// /// /// Returns a nonzero value if successful, or 0 otherwise. To get extended error information, the application can call /// GetLastError, which can return one of the following error codes: /// // BOOL GetStringTypeW( _In_ DWORD dwInfoType, _In_ LPCWSTR lpSrcStr, _In_ int cchSrc, _Out_ LPWORD lpCharType); https://msdn.microsoft.com/en-us/library/windows/desktop/dd318119(v=vs.85).aspx [DllImport(Lib.Kernel32, SetLastError = true, ExactSpelling = true, CharSet = CharSet.Unicode)] [PInvokeData("Stringapiset.h", MSDNShortId = "dd318119")] [return: MarshalAs(UnmanagedType.Bool)] public static extern bool GetStringTypeW(CHAR_TYPE_INFO dwInfoType, string lpSrcStr, int cchSrc, [In, Out, MarshalAs(UnmanagedType.LPArray)] ushort[] lpCharType); /// /// Maps a character string to a UTF-16 (wide character) string. The character string is not necessarily from a multibyte character set. /// /// /// /// Code page to use in performing the conversion. This parameter can be set to the value of any code page that is installed or /// available in the operating system. For a list of code pages, see Code Page Identifiers. Your application can also specify one of /// the values shown in the following table. /// /// /// /// /// Value /// Meaning /// /// /// CP_ACP /// The system default Windows ANSI code page. /// /// /// CP_MACCP /// The current system Macintosh code page. /// /// /// CP_OEMCP /// The current system OEM code page. /// /// /// CP_SYMBOL /// Symbol code page (42). /// /// /// CP_THREAD_ACP /// The Windows ANSI code page for the current thread. /// /// /// CP_UTF7 /// UTF-7. Use this value only when forced by a 7-bit transport mechanism. Use of UTF-8 is preferred. /// /// /// CP_UTF8 /// UTF-8. /// /// /// /// /// /// /// Flags indicating the conversion type. The application can specify a combination of the following values, with MB_PRECOMPOSED /// being the default. MB_PRECOMPOSED and MB_COMPOSITE are mutually exclusive. MB_USEGLYPHCHARS and MB_ERR_INVALID_CHARS can be set /// regardless of the state of the other flags. /// /// /// /// /// Value /// Meaning /// /// /// MB_COMPOSITE /// /// Always use decomposed characters, that is, characters in which a base character and one or more nonspacing characters each have /// distinct code point values. For example, &#196; is represented by A + &#168;: LATIN CAPITAL LETTER A (U+0041) + COMBINING /// DIAERESIS (U+0308). Note that this flag cannot be used with MB_PRECOMPOSED. /// /// /// /// MB_ERR_INVALID_CHARS /// /// Fail if an invalid input character is encountered. Starting with Windows Vista, the function does not drop illegal code points if /// the application does not set this flag, but instead replaces illegal sequences with U+FFFD (encoded as appropriate for the /// specified codepage).Windows 2000 with SP4 and later, Windows XP: If this flag is not set, the function silently drops illegal /// code points. A call to GetLastError returns ERROR_NO_UNICODE_TRANSLATION. /// /// /// /// MB_PRECOMPOSED /// /// Default; do not use with MB_COMPOSITE. Always use precomposed characters, that is, characters having a single character value for /// a base or nonspacing character combination. For example, in the character &#232;, the e is the base character and the accent /// grave mark is the nonspacing character. If a single Unicode code point is defined for a character, the application should use it /// instead of a separate base character and a nonspacing character. For example, &#196; is represented by the single Unicode /// code point LATIN CAPITAL LETTER A WITH DIAERESIS (U+00C4). /// /// /// /// MB_USEGLYPHCHARS /// Use glyph characters instead of control characters. /// /// /// /// For the code pages listed below, dwFlags must be set to 0. Otherwise, the function fails with ERROR_INVALID_FLAGS. /// /// Pointer to the character string to convert. /// /// /// Size, in bytes, of the string indicated by the lpMultiByteStr parameter. Alternatively, this parameter can be set to -1 if the /// string is null-terminated. Note that, if cbMultiByte is 0, the function fails. /// /// /// If this parameter is -1, the function processes the entire input string, including the terminating null character. Therefore, the /// resulting Unicode string has a terminating null character, and the length returned by the function includes this character. /// /// /// If this parameter is set to a positive integer, the function processes exactly the specified number of bytes. If the provided /// size does not include a terminating null character, the resulting Unicode string is not null-terminated, and the returned length /// does not include this character. /// /// /// Pointer to a buffer that receives the converted string. /// /// Size, in characters, of the buffer indicated by lpWideCharStr. If this value is 0, the function returns the required buffer size, /// in characters, including any terminating null character, and makes no use of the lpWideCharStr buffer. /// /// /// /// Returns the number of characters written to the buffer indicated by lpWideCharStr if successful. If the function succeeds and /// cchWideChar is 0, the return value is the required size, in characters, for the buffer indicated by lpWideCharStr. Also see /// dwFlags for info about how the MB_ERR_INVALID_CHARS flag affects the return value when invalid sequences are input. /// /// /// The function returns 0 if it does not succeed. To get extended error information, the application can call GetLastError, /// which can return one of the following error codes: /// /// // int MultiByteToWideChar( _In_ UINT CodePage, _In_ DWORD dwFlags, _In_ LPCSTR lpMultiByteStr, _In_ int cbMultiByte, _Out_opt_ // LPWSTR lpWideCharStr, _In_ int cchWideChar); https://msdn.microsoft.com/en-us/library/windows/desktop/dd319072(v=vs.85).aspx [DllImport(Lib.Kernel32, SetLastError = true, ExactSpelling = true)] [PInvokeData("Stringapiset.h", MSDNShortId = "dd319072")] public static extern int MultiByteToWideChar(uint CodePage, MBCONV dwFlags, [In] [MarshalAs(UnmanagedType.LPStr)] string lpMultiByteStr, int cbMultiByte, [Out] [MarshalAs(UnmanagedType.LPWStr)] StringBuilder lpWideCharStr, int cchWideChar); /// /// Maps a UTF-16 (wide character) string to a new character string. The new character string is not necessarily from a multibyte /// character set. /// /// /// /// Code page to use in performing the conversion. This parameter can be set to the value of any code page that is installed or /// available in the operating system. For a list of code pages, see Code Page Identifiers. Your application can also specify one of /// the values shown in the following table. /// /// /// /// /// Value /// Meaning /// /// /// CP_ACP /// The system default Windows ANSI code page. /// /// /// CP_MACCP /// The current system Macintosh code page. /// /// /// CP_OEMCP /// The current system OEM code page. /// /// /// CP_SYMBOL /// Windows 2000: Symbol code page (42). /// /// /// CP_THREAD_ACP /// Windows 2000: The Windows ANSI code page for the current thread. /// /// /// CP_UTF7 /// /// UTF-7. Use this value only when forced by a 7-bit transport mechanism. Use of UTF-8 is preferred. With this value set, /// lpDefaultChar and lpUsedDefaultChar must be set to NULL. /// /// /// /// CP_UTF8 /// UTF-8. With this value set, lpDefaultChar and lpUsedDefaultChar must be set to NULL. /// /// /// /// /// /// /// Flags indicating the conversion type. The application can specify a combination of the following values. The function performs /// more quickly when none of these flags is set. The application should specify WC_NO_BEST_FIT_CHARS and WC_COMPOSITECHECK with the /// specific value WC_DEFAULTCHAR to retrieve all possible conversion results. If all three values are not provided, some results /// will be missing. /// /// /// /// /// Value /// Meaning /// /// /// WC_COMPOSITECHECK /// /// Convert composite characters, consisting of a base character and a nonspacing character, each with different character values. /// Translate these characters to precomposed characters, which have a single character value for a base-nonspacing character /// combination. For example, in the character &#232;, the e is the base character and the accent grave mark is the nonspacing /// character.Your application can combine WC_COMPOSITECHECK with any one of the following flags, with the default being WC_SEPCHARS. /// These flags determine the behavior of the function when no precomposed mapping for a base-nonspacing character combination in a /// Unicode string is available. If none of these flags is supplied, the function behaves as if the WC_SEPCHARS flag is set. For more /// information, see WC_COMPOSITECHECK and related flags in the Remarks section. /// /// /// /// WC_ERR_INVALID_CHARS /// /// Windows Vista and later: Fail (by returning 0 and setting the last-error code to ERROR_NO_UNICODE_TRANSLATION) if an invalid /// input character is encountered. You can retrieve the last-error code with a call to GetLastError. If this flag is not set, the /// function replaces illegal sequences with U+FFFD (encoded as appropriate for the specified codepage) and succeeds by returning the /// length of the converted string. Note that this flag only applies when CodePage is specified as CP_UTF8 or 54936. It cannot be /// used with other code page values. /// /// /// /// WC_NO_BEST_FIT_CHARS /// /// Translate any Unicode characters that do not translate directly to multibyte equivalents to the default character specified by /// lpDefaultChar. In other words, if translating from Unicode to multibyte and back to Unicode again does not yield the same Unicode /// character, the function uses the default character. This flag can be used by itself or in combination with the other defined /// flags.For strings that require validation, such as file, resource, and user names, the application should always use the /// WC_NO_BEST_FIT_CHARS flag. This flag prevents the function from mapping characters to characters that appear similar but have /// very different semantics. In some cases, the semantic change can be extreme. For example, the symbol for &quot;∞&quot; /// (infinity) maps to 8 (eight) in some code pages. /// /// /// /// /// For the code pages listed below, dwFlags must be 0. Otherwise, the function fails with ERROR_INVALID_FLAGS. /// /// Pointer to the Unicode string to convert. /// /// /// Size, in characters, of the string indicated by lpWideCharStr. Alternatively, this parameter can be set to -1 if the string is /// null-terminated. If cchWideChar is set to 0, the function fails. /// /// /// If this parameter is -1, the function processes the entire input string, including the terminating null character. Therefore, the /// resulting character string has a terminating null character, and the length returned by the function includes this character. /// /// /// If this parameter is set to a positive integer, the function processes exactly the specified number of characters. If the /// provided size does not include a terminating null character, the resulting character string is not null-terminated, and the /// returned length does not include this character. /// /// /// Pointer to a buffer that receives the converted string. /// /// Size, in bytes, of the buffer indicated by lpMultiByteStr. If this parameter is set to 0, the function returns the required /// buffer size for lpMultiByteStr and makes no use of the output parameter itself. /// /// /// /// Pointer to the character to use if a character cannot be represented in the specified code page. The application sets this /// parameter to NULL if the function is to use a system default value. To obtain the system default character, the /// application can call the GetCPInfo or GetCPInfoEx function. /// /// /// For the CP_UTF7 and CP_UTF8 settings for CodePage, this parameter must be set to NULL. Otherwise, the function fails with ERROR_INVALID_PARAMETER. /// /// /// /// /// Pointer to a flag that indicates if the function has used a default character in the conversion. The flag is set to TRUE /// if one or more characters in the source string cannot be represented in the specified code page. Otherwise, the flag is set to /// FALSE. This parameter can be set to NULL. /// /// /// For the CP_UTF7 and CP_UTF8 settings for CodePage, this parameter must be set to NULL. Otherwise, the function fails with ERROR_INVALID_PARAMETER. /// /// /// /// /// If successful, returns the number of bytes written to the buffer pointed to by lpMultiByteStr. If the function succeeds and /// cbMultiByte is 0, the return value is the required size, in bytes, for the buffer indicated by lpMultiByteStr. Also see dwFlags /// for info about how the WC_ERR_INVALID_CHARS flag affects the return value when invalid sequences are input. /// /// /// The function returns 0 if it does not succeed. To get extended error information, the application can call GetLastError, /// which can return one of the following error codes: /// /// // int WideCharToMultiByte( _In_ UINT CodePage, _In_ DWORD dwFlags, _In_ LPCWSTR lpWideCharStr, _In_ int cchWideChar, _Out_opt_ LPSTR // lpMultiByteStr, _In_ int cbMultiByte, _In_opt_ LPCSTR lpDefaultChar, _Out_opt_ LPBOOL lpUsedDefaultChar); https://msdn.microsoft.com/en-us/library/windows/desktop/dd374130(v=vs.85).aspx [DllImport(Lib.Kernel32, SetLastError = true, ExactSpelling = true)] [PInvokeData("Stringapiset.h", MSDNShortId = "dd374130")] public static extern int WideCharToMultiByte(uint CodePage, WCCONV dwFlags, [In] [MarshalAs(UnmanagedType.LPWStr)] string lpWideCharStr, int cchWideChar, [Out] [MarshalAs(UnmanagedType.LPStr)] StringBuilder lpMultiByteStr, int cbMultiByte, [In] [MarshalAs(UnmanagedType.LPStr)] string lpDefaultChar, [MarshalAs(UnmanagedType.Bool)] out bool lpUsedDefaultChar); /// /// Maps a UTF-16 (wide character) string to a new character string. The new character string is not necessarily from a multibyte /// character set. /// /// /// /// Code page to use in performing the conversion. This parameter can be set to the value of any code page that is installed or /// available in the operating system. For a list of code pages, see Code Page Identifiers. Your application can also specify one of /// the values shown in the following table. /// /// /// /// /// Value /// Meaning /// /// /// CP_ACP /// The system default Windows ANSI code page. /// /// /// CP_MACCP /// The current system Macintosh code page. /// /// /// CP_OEMCP /// The current system OEM code page. /// /// /// CP_SYMBOL /// Windows 2000: Symbol code page (42). /// /// /// CP_THREAD_ACP /// Windows 2000: The Windows ANSI code page for the current thread. /// /// /// CP_UTF7 /// /// UTF-7. Use this value only when forced by a 7-bit transport mechanism. Use of UTF-8 is preferred. With this value set, /// lpDefaultChar and lpUsedDefaultChar must be set to NULL. /// /// /// /// CP_UTF8 /// UTF-8. With this value set, lpDefaultChar and lpUsedDefaultChar must be set to NULL. /// /// /// /// /// /// /// Flags indicating the conversion type. The application can specify a combination of the following values. The function performs /// more quickly when none of these flags is set. The application should specify WC_NO_BEST_FIT_CHARS and WC_COMPOSITECHECK with the /// specific value WC_DEFAULTCHAR to retrieve all possible conversion results. If all three values are not provided, some results /// will be missing. /// /// /// /// /// Value /// Meaning /// /// /// WC_COMPOSITECHECK /// /// Convert composite characters, consisting of a base character and a nonspacing character, each with different character values. /// Translate these characters to precomposed characters, which have a single character value for a base-nonspacing character /// combination. For example, in the character &#232;, the e is the base character and the accent grave mark is the nonspacing /// character.Your application can combine WC_COMPOSITECHECK with any one of the following flags, with the default being WC_SEPCHARS. /// These flags determine the behavior of the function when no precomposed mapping for a base-nonspacing character combination in a /// Unicode string is available. If none of these flags is supplied, the function behaves as if the WC_SEPCHARS flag is set. For more /// information, see WC_COMPOSITECHECK and related flags in the Remarks section. /// /// /// /// WC_ERR_INVALID_CHARS /// /// Windows Vista and later: Fail (by returning 0 and setting the last-error code to ERROR_NO_UNICODE_TRANSLATION) if an invalid /// input character is encountered. You can retrieve the last-error code with a call to GetLastError. If this flag is not set, the /// function replaces illegal sequences with U+FFFD (encoded as appropriate for the specified codepage) and succeeds by returning the /// length of the converted string. Note that this flag only applies when CodePage is specified as CP_UTF8 or 54936. It cannot be /// used with other code page values. /// /// /// /// WC_NO_BEST_FIT_CHARS /// /// Translate any Unicode characters that do not translate directly to multibyte equivalents to the default character specified by /// lpDefaultChar. In other words, if translating from Unicode to multibyte and back to Unicode again does not yield the same Unicode /// character, the function uses the default character. This flag can be used by itself or in combination with the other defined /// flags.For strings that require validation, such as file, resource, and user names, the application should always use the /// WC_NO_BEST_FIT_CHARS flag. This flag prevents the function from mapping characters to characters that appear similar but have /// very different semantics. In some cases, the semantic change can be extreme. For example, the symbol for &quot;∞&quot; /// (infinity) maps to 8 (eight) in some code pages. /// /// /// /// /// For the code pages listed below, dwFlags must be 0. Otherwise, the function fails with ERROR_INVALID_FLAGS. /// /// Pointer to the Unicode string to convert. /// /// /// Size, in characters, of the string indicated by lpWideCharStr. Alternatively, this parameter can be set to -1 if the string is /// null-terminated. If cchWideChar is set to 0, the function fails. /// /// /// If this parameter is -1, the function processes the entire input string, including the terminating null character. Therefore, the /// resulting character string has a terminating null character, and the length returned by the function includes this character. /// /// /// If this parameter is set to a positive integer, the function processes exactly the specified number of characters. If the /// provided size does not include a terminating null character, the resulting character string is not null-terminated, and the /// returned length does not include this character. /// /// /// Pointer to a buffer that receives the converted string. /// /// Size, in bytes, of the buffer indicated by lpMultiByteStr. If this parameter is set to 0, the function returns the required /// buffer size for lpMultiByteStr and makes no use of the output parameter itself. /// /// /// /// Pointer to the character to use if a character cannot be represented in the specified code page. The application sets this /// parameter to NULL if the function is to use a system default value. To obtain the system default character, the /// application can call the GetCPInfo or GetCPInfoEx function. /// /// /// For the CP_UTF7 and CP_UTF8 settings for CodePage, this parameter must be set to NULL. Otherwise, the function fails with ERROR_INVALID_PARAMETER. /// /// /// /// /// Pointer to a flag that indicates if the function has used a default character in the conversion. The flag is set to TRUE /// if one or more characters in the source string cannot be represented in the specified code page. Otherwise, the flag is set to /// FALSE. This parameter can be set to NULL. /// /// /// For the CP_UTF7 and CP_UTF8 settings for CodePage, this parameter must be set to NULL. Otherwise, the function fails with ERROR_INVALID_PARAMETER. /// /// /// /// /// If successful, returns the number of bytes written to the buffer pointed to by lpMultiByteStr. If the function succeeds and /// cbMultiByte is 0, the return value is the required size, in bytes, for the buffer indicated by lpMultiByteStr. Also see dwFlags /// for info about how the WC_ERR_INVALID_CHARS flag affects the return value when invalid sequences are input. /// /// /// The function returns 0 if it does not succeed. To get extended error information, the application can call GetLastError, /// which can return one of the following error codes: /// /// // int WideCharToMultiByte( _In_ UINT CodePage, _In_ DWORD dwFlags, _In_ LPCWSTR lpWideCharStr, _In_ int cchWideChar, _Out_opt_ LPSTR // lpMultiByteStr, _In_ int cbMultiByte, _In_opt_ LPCSTR lpDefaultChar, _Out_opt_ LPBOOL lpUsedDefaultChar); https://msdn.microsoft.com/en-us/library/windows/desktop/dd374130(v=vs.85).aspx [DllImport(Lib.Kernel32, SetLastError = true, ExactSpelling = true)] [PInvokeData("Stringapiset.h", MSDNShortId = "dd374130")] public static extern int WideCharToMultiByte(uint CodePage, WCCONV dwFlags, [In] [MarshalAs(UnmanagedType.LPWStr)] string lpWideCharStr, int cchWideChar, [Out] [MarshalAs(UnmanagedType.LPStr)] StringBuilder lpMultiByteStr, int cbMultiByte, IntPtr lpDefaultChar = default, IntPtr lpUsedDefaultChar = default); } }