Tizen Native API
5.0
|
The Ustring module provides general unicode string handling information.
#include <utils_i18n.h>
The Ustring module provides general unicode string handling information.
It converts a byte string to a unicode string and then to uppercase letters.
char str_1[64] = {0,}; i18n_uchar uchar_str_1[64] = {0,}; i18n_uchar uchar_str_2[64] = {0,}; int uchar_len = 0; i18n_uerror_code_e err_code = I18N_ERROR_NONE; strcpy(str_1, "tizen"); dlog_print(DLOG_INFO, LOG_TAG, "str_1 is %s\n", str_1); // str_1 is tizen // converts a byte string to a unicode string i18n_ustring_copy_ua_n(uchar_str_1, str_1, strlen(str_1)); // converts to uppercase letters i18n_ustring_to_upper(uchar_str_2, 64, uchar_str_1, i18n_ustring_get_length( uchar_str_1 ), "en_US", &err_code); i18n_ustring_copy_au(str_1, uchar_str_2); dlog_print(DLOG_INFO, LOG_TAG, "str_1 is %s\n", str_1); // str_1 is TIZEN
Functions | |
int32_t | i18n_ustring_get_length (const i18n_uchar *s) |
Determines the length of an array of i18n_uchar. | |
int32_t | i18n_ustring_count_char32 (const i18n_uchar *s, int32_t length) |
Counts Unicode code points in the length i18n_uchar code units of the string. | |
i18n_ubool | i18n_ustring_has_more_char32_than (const i18n_uchar *s, int32_t length, int32_t number) |
Checks if the string contains more Unicode code points than a certain number. | |
i18n_uchar * | i18n_ustring_cat (i18n_uchar *dest, const i18n_uchar *src) |
Concatenates two ustrings. | |
i18n_uchar * | i18n_ustring_cat_n (i18n_uchar *dest, const i18n_uchar *src, int32_t n) |
Concatenate two ustrings. | |
i18n_uchar * | i18n_ustring_string (const i18n_uchar *s, const i18n_uchar *sub_string) |
Finds the first occurrence of a substring in a string. | |
i18n_uchar * | i18n_ustring_find_first (const i18n_uchar *s, int32_t length, const i18n_uchar *sub_string, int32_t sub_length) |
Finds the first occurrence of a substring in a string. | |
i18n_uchar * | i18n_ustring_char (const i18n_uchar *s, i18n_uchar c) |
Finds the first occurrence of a BMP code point in a string. | |
i18n_uchar * | i18n_ustring_char32 (const i18n_uchar *s, i18n_uchar32 c) |
Finds the first occurrence of a code point in a string. | |
i18n_uchar * | i18n_ustring_r_string (const i18n_uchar *s, const i18n_uchar *sub_string) |
Finds the last occurrence of a substring in a string. | |
i18n_uchar * | i18n_ustring_find_last (const i18n_uchar *s, int32_t length, const i18n_uchar *sub_string, int32_t sub_length) |
Finds the last occurrence of a substring in a string. | |
i18n_uchar * | i18n_ustring_r_char (const i18n_uchar *s, i18n_uchar c) |
Finds the last occurrence of a BMP code point in a string. | |
i18n_uchar * | i18n_ustring_r_char32 (const i18n_uchar *s, i18n_uchar32 c) |
Finds the last occurrence of a code point in a string. | |
i18n_uchar * | i18n_ustring_pbrk (const i18n_uchar *string, const i18n_uchar *match_set) |
Locates the first occurrence in the string of any of the characters in the string matchSet. | |
int32_t | i18n_ustring_cspn (const i18n_uchar *string, const i18n_uchar *match_set) |
Returns the number of consecutive characters in string, beginning with the first, that do not occur somewhere in match_set. | |
int32_t | i18n_ustring_spn (const i18n_uchar *string, const i18n_uchar *match_set) |
Returns the number of consecutive characters in string, beginning with the first, that occur somewhere in match_set. | |
i18n_uchar * | i18n_ustring_tokenizer_r (i18n_uchar *src, const i18n_uchar *delim, i18n_uchar **save_state) |
The string tokenizer API allows an application to break a string into tokens. | |
int32_t | i18n_ustring_compare (const i18n_uchar *s1, const i18n_uchar *s2) |
Compares two Unicode strings for bitwise equality (code unit order). | |
int32_t | i18n_ustring_compare_code_point_order (const i18n_uchar *s1, const i18n_uchar *s2) |
Compare two Unicode strings in code point order. | |
int32_t | i18n_ustring_compare_binary_order (const i18n_uchar *s1, int32_t length1, const i18n_uchar *s2, int32_t length2, i18n_ubool code_point_order) |
Compare two Unicode strings (binary order). | |
int32_t | i18n_ustring_case_compare_with_length (const i18n_uchar *s1, int32_t length1, const i18n_uchar *s2, int32_t length2, uint32_t options, i18n_error_code_e *error_code) |
Compare two strings case-insensitively using full case folding. | |
int32_t | i18n_ustring_compare_n (const i18n_uchar *s1, const i18n_uchar *s2, int32_t n) |
Compare two ustrings for bitwise equality. | |
int32_t | i18n_ustring_compare_n_code_point_order (const i18n_uchar *s1, const i18n_uchar *s2, int32_t n) |
Compare two Unicode strings in code point order. | |
int32_t | i18n_ustring_case_compare (const i18n_uchar *s1, const i18n_uchar *s2, uint32_t options) |
Compare two strings case-insensitively using full case folding. | |
int32_t | i18n_ustring_case_compare_n (const i18n_uchar *s1, const i18n_uchar *s2, int32_t n, uint32_t options) |
Compare two strings case-insensitively using full case folding. | |
int32_t | i18n_ustring_mem_case_compare (const i18n_uchar *s1, const i18n_uchar *s2, int32_t length, uint32_t options) |
Compare two strings case-insensitively using full case folding. | |
i18n_uchar * | i18n_ustring_copy (i18n_uchar *dest, const i18n_uchar *src) |
Copies a ustring. Adds a NULL terminator. | |
i18n_uchar * | i18n_ustring_copy_n (i18n_uchar *dest, const i18n_uchar *src, int32_t n) |
Copies a ustring. | |
i18n_uchar * | i18n_ustring_copy_ua (i18n_uchar *dest, const char *src) |
Copies a byte string encoded in the default codepage to a ustring. | |
i18n_uchar * | i18n_ustring_copy_ua_n (i18n_uchar *dest, const char *src, int32_t n) |
Copies a byte string encoded in the default codepage to a ustring. | |
char * | i18n_ustring_copy_au (char *dest, const i18n_uchar *src) |
Copies a ustring to a byte string encoded in the default codepage. | |
char * | i18n_ustring_copy_au_n (char *dest, const i18n_uchar *src, int32_t n) |
Copies a ustring to a byte string encoded in the default codepage. | |
i18n_uchar * | i18n_ustring_mem_copy (i18n_uchar *dest, const i18n_uchar *src, int32_t count) |
Synonym for memcpy(), but with i18n_uchar characters only. | |
i18n_uchar * | i18n_ustring_mem_move (i18n_uchar *dest, const i18n_uchar *src, int32_t count) |
Synonym for memmove(), but with i18n_uchar characters only. | |
i18n_uchar * | i18n_ustring_mem_set (i18n_uchar *dest, const i18n_uchar c, int32_t count) |
Initialize count characters of dest to c. | |
int32_t | i18n_ustring_mem_compare (const i18n_uchar *buf1, const i18n_uchar *buf2, int32_t count) |
Compare the first count i18n_uchar characters of each buffer. | |
int32_t | i18n_ustring_mem_compare_code_point_order (const i18n_uchar *s1, const i18n_uchar *s2, int32_t count) |
Compare two Unicode strings in code point order. | |
i18n_uchar * | i18n_ustring_mem_char (const i18n_uchar *s, i18n_uchar c, int32_t count) |
Finds the first occurrence of a BMP code point in a string. | |
i18n_uchar * | i18n_ustring_mem_char32 (const i18n_uchar *s, i18n_uchar32 c, int32_t count) |
Finds the first occurrence of a code point in a string. | |
i18n_uchar * | i18n_ustring_mem_r_char (const i18n_uchar *s, i18n_uchar c, int32_t count) |
Finds the last occurrence of a BMP code point in a string. | |
i18n_uchar * | i18n_ustring_mem_r_char32 (const i18n_uchar *s, i18n_uchar32 c, int32_t count) |
Finds the last occurrence of a code point in a string. | |
int32_t | i18n_ustring_unescape (const char *src, i18n_uchar *dest, int32_t dest_capacity) |
Unescape a string of characters and write the resulting Unicode characters to the destination buffer. | |
i18n_uchar32 | i18n_ustring_unescape_at (i18n_ustring_unescape_char_at_cb char_at, int32_t *offset, int32_t length, void *context) |
Unescape a single sequence. | |
int32_t | i18n_ustring_to_upper (i18n_uchar *dest, int32_t dest_capacity, const i18n_uchar *src, int32_t src_len, const char *locale, i18n_error_code_e *error_code) |
Uppercases the characters in a string. | |
int32_t | i18n_ustring_to_lower (i18n_uchar *dest, int32_t dest_capacity, const i18n_uchar *src, int32_t src_len, const char *locale, i18n_error_code_e *error_code) |
Lowercase the characters in a string. | |
int32_t | i18n_ustring_to_title_new (i18n_uchar *dest, int32_t dest_capacity, const i18n_uchar *src, int32_t src_len, i18n_ubreak_iterator_h title_iter, const char *locale) |
Titlecases a string. | |
int32_t | i18n_ustring_fold_case (i18n_uchar *dest, int32_t dest_capacity, const i18n_uchar *src, int32_t src_len, uint32_t options, i18n_error_code_e *error_code) |
Case-folds the characters in a string. | |
wchar_t * | i18n_ustring_to_WCS (wchar_t *dest, int32_t dest_capacity, int32_t *dest_len, const i18n_uchar *src, int32_t src_len, i18n_error_code_e *error_code) |
Convert a UTF-16 string to a wchar_t string. | |
i18n_uchar * | i18n_ustring_from_WCS (i18n_uchar *dest, int32_t dest_capacity, int32_t *dest_len, const wchar_t *src, int32_t src_len, i18n_error_code_e *error_code) |
Convert a wchar_t string to UTF-16. | |
char * | i18n_ustring_to_UTF8 (char *dest, int32_t dest_capacity, int32_t *dest_len, const i18n_uchar *src, int32_t src_len, i18n_error_code_e *error_code) |
Converts a UTF-16 string to UTF-8. | |
i18n_uchar * | i18n_ustring_from_UTF8 (i18n_uchar *dest, int32_t dest_capacity, int32_t *dest_len, const char *src, int32_t src_len, i18n_error_code_e *error_code) |
Converts a UTF-8 string to UTF-16. | |
char * | i18n_ustring_to_UTF8_with_sub (char *dest, int32_t dest_capacity, int32_t *dest_len, const i18n_uchar *src, int32_t src_len, i18n_uchar32 sub_char, int32_t *num_substitutions, i18n_error_code_e *error_code) |
Convert a UTF-16 string to UTF-8. Same as i18n_ustring_to_UTF8() except for the additional sub_char which is output for illegal input sequences, instead of stopping with the I18N_ERROR_INVALID_CHAR_FOUND error code. | |
i18n_uchar * | i18n_ustring_from_UTF8_with_sub (i18n_uchar *dest, int32_t dest_capacity, int32_t *dest_len, const char *src, int32_t src_len, i18n_uchar32 sub_char, int32_t *num_substitutions, i18n_error_code_e *error_code) |
Convert a UTF-8 string to UTF-16. | |
i18n_uchar * | i18n_ustring_from_UTF8_lenient (i18n_uchar *dest, int32_t dest_capacity, int32_t *dest_len, const char *src, int32_t src_len, i18n_error_code_e *error_code) |
Convert a UTF-8 string to UTF-16. | |
i18n_uchar32 * | i18n_ustring_to_UTF32 (i18n_uchar32 *dest, int32_t dest_capacity, int32_t *dest_len, const i18n_uchar *src, int32_t src_len, i18n_error_code_e *error_code) |
Convert a UTF-16 string to UTF-32. | |
i18n_uchar * | i18n_ustring_from_UTF32 (i18n_uchar *dest, int32_t dest_capacity, int32_t *dest_len, const i18n_uchar32 *src, int32_t src_len, i18n_error_code_e *error_code) |
Convert a UTF-32 string to UTF-16. | |
i18n_uchar32 * | i18n_ustring_to_UTF32_with_sub (i18n_uchar32 *dest, int32_t dest_capacity, int32_t *dest_len, const i18n_uchar *src, int32_t src_len, i18n_uchar32 sub_char, int32_t *num_substitutions, i18n_error_code_e *error_code) |
Convert a UTF-16 string to UTF-32. | |
i18n_uchar * | i18n_ustring_from_UTF32_with_sub (i18n_uchar *dest, int32_t dest_capacity, int32_t *dest_len, const i18n_uchar32 *src, int32_t src_len, i18n_uchar32 sub_char, int32_t *num_substitutions, i18n_error_code_e *error_code) |
Convert a UTF-32 string to UTF-16. Same as i18n_ustring_from_UTF32() except for the additional sub_char which is output for illegal input sequences, instead of stopping with the I18N_ERROR_INVALID_CHAR_FOUND error code. | |
Typedefs | |
typedef i18n_uchar(* | i18n_ustring_unescape_char_at_cb )(int32_t offset, void *context) |
Callback function for i18n_ustring_unescape_at() that returns a character of the source text given an offset and a context pointer. The context pointer will be whatever is passed into i18n_ustring_unescape_at(). | |
Defines | |
#define | I18N_USTRING_U_FOLD_CASE_DEFAULT 0 |
Option value for case folding: use default mappings defined in CaseFolding.txt. | |
#define | I18N_USTRING_U_COMPARE_CODE_POINT_ORDER 0x8000 |
Option bit i18n_ustring_case_compare_with_length(), i18n_ustring_case_compare(), etc: Compare strings in code point order instead of code unit order. | |
#define | I18N_USTRING_U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 |
Option value for case folding: Use the modified set of mappings provided in CaseFolding.txt to handle dotted I and dotless i appropriately for Turkic languages (tr, az). Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that are to be included for default mappings and excluded for the Turkic-specific mappings. Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that are to be excluded for default mappings and included for the Turkic-specific mappings. |
#define I18N_USTRING_U_COMPARE_CODE_POINT_ORDER 0x8000 |
Option bit i18n_ustring_case_compare_with_length(), i18n_ustring_case_compare(), etc: Compare strings in code point order instead of code unit order.
#define I18N_USTRING_U_FOLD_CASE_DEFAULT 0 |
Option value for case folding: use default mappings defined in CaseFolding.txt.
Option value for case folding:
Use the modified set of mappings provided in CaseFolding.txt to handle dotted I and dotless i appropriately for Turkic languages (tr, az).
Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that are to be included for default mappings and excluded for the Turkic-specific mappings.
Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that are to be excluded for default mappings and included for the Turkic-specific mappings.
typedef i18n_uchar(* i18n_ustring_unescape_char_at_cb)(int32_t offset, void *context) |
Callback function for i18n_ustring_unescape_at() that returns a character of the source text given an offset and a context pointer.
The context pointer will be whatever is passed into i18n_ustring_unescape_at().
[in] | offset | pointer to the offset that will be passed to i18n_ustring_unescape_at(). |
[in] | context | an opaque pointer passed directly into i18n_ustring_unescape_at() |
character | the character represented by the escape sequence at offset |
int32_t i18n_ustring_case_compare | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2, | ||
uint32_t | options | ||
) |
Compare two strings case-insensitively using full case folding.
[in] | s1 | A string to compare. |
[in] | s2 | A string to compare. |
[in] | options | bit set of options:
|
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_case_compare_n | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2, | ||
int32_t | n, | ||
uint32_t | options | ||
) |
Compare two strings case-insensitively using full case folding.
[in] | s1 | A string to compare. |
[in] | s2 | A string to compare. |
[in] | n | The maximum number of characters each string to case-fold and then compare. |
[in] | options | A bit set of options:
|
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_case_compare_with_length | ( | const i18n_uchar * | s1, |
int32_t | length1, | ||
const i18n_uchar * | s2, | ||
int32_t | length2, | ||
uint32_t | options, | ||
i18n_error_code_e * | error_code | ||
) |
Compare two strings case-insensitively using full case folding.
The comparison can be done in UTF-16 code unit order or in code point order. They differ only when comparing supplementary code points (U+10000..U+10ffff) to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). In code unit order, high BMP code points sort after supplementary code points because they are stored as pairs of surrogates which are at U+d800..U+dfff.
This functions works with strings of different explicitly specified lengths unlike the ANSI C-like i18n_ustring_compare() and i18n_ustring_mem_compare() etc. NULL-terminated strings are possible with length arguments of -1.
[in] | s1 | First source string. |
[in] | length1 | Length of first source string, or -1 if NULL-terminated. |
[in] | s2 | Second source string. |
[in] | length2 | Length of second source string, or -1 if NULL-terminated. |
[in] | options | A bit set of options:
|
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_cat | ( | i18n_uchar * | dest, |
const i18n_uchar * | src | ||
) |
Concatenates two ustrings.
Appends a copy of src, including the NULL terminator, to dest. The initial copied character from src overwrites the NULL terminator in dest.
[out] | dest | The destination string. |
[in] | src | The source string. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_cat_n | ( | i18n_uchar * | dest, |
const i18n_uchar * | src, | ||
int32_t | n | ||
) |
Concatenate two ustrings.
Appends a copy of src, including the NULL terminator, to dest. The initial copied character from src overwrites the NULL terminator in dest.
[out] | dest | The destination string. |
[in] | src | The source string. |
[in] | n | The maximum number of characters to append; no-op if <=0. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_char | ( | const i18n_uchar * | s, |
i18n_uchar | c | ||
) |
Finds the first occurrence of a BMP code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
[in] | s | The string to search (NULL-terminated). |
[in] | c | The BMP code point to find. |
NULL
if c is not in s. I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_char32 | ( | const i18n_uchar * | s, |
i18n_uchar32 | c | ||
) |
Finds the first occurrence of a code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
[in] | s | The string to search (NULL-terminated). |
[in] | c | The code point to find. |
NULL
if c is not in s. I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_compare | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2 | ||
) |
Compares two Unicode strings for bitwise equality (code unit order).
[in] | s1 | A string to compare. |
[in] | s2 | A string to compare. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_compare_binary_order | ( | const i18n_uchar * | s1, |
int32_t | length1, | ||
const i18n_uchar * | s2, | ||
int32_t | length2, | ||
i18n_ubool | code_point_order | ||
) |
Compare two Unicode strings (binary order).
The comparison can be done in code unit order or in code point order. They differ only in UTF-16 when comparing supplementary code points (U+10000..U+10ffff) to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). In code unit order, high BMP code points sort after supplementary code points because they are stored as pairs of surrogates which are at U+d800..U+dfff.
This functions works with strings of different explicitly specified lengths unlike the ANSI C-like i18n_ustring_compare() and i18n_ustring_mem_compare() etc. NULL-terminated strings are possible with length arguments of -1.
[in] | s1 | First source string. |
[in] | length1 | Length of first source string, or -1 if NULL-terminated. |
[in] | s2 | Second source string. |
[in] | length2 | Length of second source string, or -1 if NULL-terminated. |
[in] | code_point_order | Choose between code unit order (false) and code point order (true). |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_compare_code_point_order | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2 | ||
) |
Compare two Unicode strings in code point order.
See i18n_ustring_compare() for details.
[in] | s1 | A string to compare. |
[in] | s2 | A string to compare. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_compare_n | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2, | ||
int32_t | n | ||
) |
Compare two ustrings for bitwise equality.
Compares at most n characters.
[in] | s1 | A string to compare (can be NULL/invalid if n<=0). |
[in] | s2 | A string to compare (can be NULL/invalid if n<=0). |
[in] | n | The maximum number of characters to compare; always returns 0 if n<=0. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_compare_n_code_point_order | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2, | ||
int32_t | n | ||
) |
Compare two Unicode strings in code point order.
This is different in UTF-16 from i18n_ustring_compare_n() if supplementary characters are present. For details, see i18n_ustring_compare_binary_order().
[in] | s1 | A string to compare. |
[in] | s2 | A string to compare. |
[in] | n | The maximum number of characters to compare. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_copy | ( | i18n_uchar * | dest, |
const i18n_uchar * | src | ||
) |
Copies a ustring. Adds a NULL terminator.
[out] | dest | The destination string |
[in] | src | The source string |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
char* i18n_ustring_copy_au | ( | char * | dest, |
const i18n_uchar * | src | ||
) |
Copies a ustring to a byte string encoded in the default codepage.
Adds a NULL terminator. Performs an i18n_uchar to host byte conversion.
[out] | dest | The destination string |
[in] | src | The source string |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
char* i18n_ustring_copy_au_n | ( | char * | dest, |
const i18n_uchar * | src, | ||
int32_t | n | ||
) |
Copies a ustring to a byte string encoded in the default codepage.
Copies at most n characters. The result will be NULL terminated if the length of src is less than n. Performs an i18n_uchar to host byte conversion.
[out] | dest | The destination string |
[in] | src | The source string |
[in] | n | The maximum number of characters to copy |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_copy_n | ( | i18n_uchar * | dest, |
const i18n_uchar * | src, | ||
int32_t | n | ||
) |
Copies a ustring.
Copies at most n characters. The result will be NULL terminated if the length of src is less than n.
[out] | dest | The destination string |
[in] | src | The source string |
[in] | n | The maximum number of characters to copy |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_copy_ua | ( | i18n_uchar * | dest, |
const char * | src | ||
) |
Copies a byte string encoded in the default codepage to a ustring.
Adds a NULL terminator. Performs a host byte to i18n_uchar conversion.
[out] | dest | The destination string |
[in] | src | The source string |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_copy_ua_n | ( | i18n_uchar * | dest, |
const char * | src, | ||
int32_t | n | ||
) |
Copies a byte string encoded in the default codepage to a ustring.
Copies at most n characters. The result will be NULL terminated if the length of src is less than n. Performs a host byte to i18n_uchar conversion.
[out] | dest | The destination string |
[in] | src | The source string |
[in] | n | The maximum number of characters to copy |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_count_char32 | ( | const i18n_uchar * | s, |
int32_t | length | ||
) |
Counts Unicode code points in the length i18n_uchar code units of the string.
A code point may occupy either one or two i18n_uchar code units. Counting code points involves reading all code units.
[in] | s | The input string. |
[in] | length | The number of i18n_uchar code units to be checked, or -1 to count all code points before the first NULL (U+0000). |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_cspn | ( | const i18n_uchar * | string, |
const i18n_uchar * | match_set | ||
) |
Returns the number of consecutive characters in string, beginning with the first, that do not occur somewhere in match_set.
Works just like C's strcspn but with Unicode.
[in] | string | The string in which to search, NULL-terminated. |
[in] | match_set | A NULL-terminated string defining a set of code points for which to search in the text string. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_find_first | ( | const i18n_uchar * | s, |
int32_t | length, | ||
const i18n_uchar * | sub_string, | ||
int32_t | sub_length | ||
) |
Finds the first occurrence of a substring in a string.
The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.
[in] | s | The string to search (NULL-terminated). |
[in] | length | The length of s (number of i18n_uchar characters), or -1 if it is NULL-terminated. |
[in] | sub_string | The substring to find (NULL-terminated). |
[in] | sub_length | The length of substring (number of i18n_uchar characters), or -1 if it is NULL-terminated. |
NULL
if sub_string is not in s.I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_find_last | ( | const i18n_uchar * | s, |
int32_t | length, | ||
const i18n_uchar * | sub_string, | ||
int32_t | sub_length | ||
) |
Finds the last occurrence of a substring in a string.
The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.
[in] | s | The string to search. |
[in] | length | The length of s (number of i18n_uchar), or -1 if it is NULL-terminated. |
[in] | sub_string | The sub_string to find (NULL-terminated). |
[in] | sub_length | The length of sub_string (number of i18n_uchar), or -1 if it is NULL-terminated. |
NULL
if sub_string is not in s. I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_fold_case | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
uint32_t | options, | ||
i18n_error_code_e * | error_code | ||
) |
Case-folds the characters in a string.
Case-folding is locale-independent and not context-sensitive, but there is an option for whether to include or exclude mappings for dotted I and dotless i.
The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.
[out] | dest | A buffer for the result string The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of i18n_uchar characters) If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any of the result string. |
[in] | src | The original string |
[in] | src_len | The length of the original string. If -1 , then src must be zero-terminated. |
[in] | options | Either I18N_USTRING_U_FOLD_CASE_DEFAULT or I18N_USTRING_U_FOLD_CASE_EXCLUDE_SPECIAL_I |
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_from_UTF32 | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const i18n_uchar32 * | src, | ||
int32_t | src_len, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-32 string to UTF-16.
If the input string is not well-formed, then the I18N_ERROR_INVALID_CHAR_FOUND error code is set.
[out] | dest | A buffer for the result string. The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of i18n_uchar characters) If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). |
[out] | dest_len | A pointer to receive the number of units written to the destination. If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow. |
[in] | src | The original source string |
[in] | src_len | The length of the original string. If -1 , then src must be zero-terminated. |
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_from_UTF32_with_sub | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const i18n_uchar32 * | src, | ||
int32_t | src_len, | ||
i18n_uchar32 | sub_char, | ||
int32_t * | num_substitutions, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-32 string to UTF-16. Same as i18n_ustring_from_UTF32() except for the additional sub_char which is output for illegal input sequences, instead of stopping with the I18N_ERROR_INVALID_CHAR_FOUND error code.
[out] | dest | A buffer for the result string. The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of i18n_chars) If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). |
[out] | dest_len | A pointer to receive the number of units written to the destination. If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow. |
[in] | src | The original source string |
[in] | src_len | The length of the original string. If -1 , then src must be zero-terminated. |
[in] | sub_char | The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with I18N_ERROR_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER". |
[out] | num_substitutions | Output parameter receiving the number of substitutions if sub_char>=0. Set to 0 if no substitutions occur or sub_char<0. num_substitutions can be NULL. |
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_from_UTF8 | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const char * | src, | ||
int32_t | src_len, | ||
i18n_error_code_e * | error_code | ||
) |
Converts a UTF-8 string to UTF-16.
If the input string is not well-formed, then the I18N_ERROR_INVALID_CHAR_FOUND error code is set.
[out] | dest | A buffer for the result string. The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of i18n_uchar characters) If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). |
[out] | dest_len | A pointer to receive the number of units written to the destination. If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow. |
[in] | src | The original source string |
[in] | src_len | The length of the original string. If -1 , then src must be zero-terminated. |
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_from_UTF8_lenient | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const char * | src, | ||
int32_t | src_len, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-8 string to UTF-16.
Same as i18n_ustring_from_UTF8() except that this function is designed to be very fast, which it achieves by being lenient about malformed UTF-8 sequences. This function is intended for use in environments where UTF-8 text is expected to be well-formed.
Its semantics are:
[out] | dest | A buffer for the result string. The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of i18n_uchar characters) If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). Unlike for other I18N functions, if src_len>=0 then it must be dest_capacity>=src_len. |
[out] | dest_len | A pointer to receive the number of units written to the destination. If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow. Unlike for other I18N functions, if src_len>=0 but dest_capacity<src_len, then *dest_len will be set to src_len (and I18N_U_BUFFER_OVERFLOW_ERROR will be set) regardless of the actual result length. |
[in] | src | The original source string |
[in] | src_len | The length of the original string. If -1 , then src must be zero-terminated. |
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_from_UTF8_with_sub | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const char * | src, | ||
int32_t | src_len, | ||
i18n_uchar32 | sub_char, | ||
int32_t * | num_substitutions, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-8 string to UTF-16.
[out] | dest | A buffer for the result string. The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of i18n_uchar characters) If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). |
[out] | dest_len | A pointer to receive the number of units written to the destination. If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow. |
[in] | src | The original source string |
[in] | src_len | The length of the original string. If -1 , then src must be zero-terminated. |
[in] | sub_char | The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with I18N_ERROR_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER". |
[out] | num_substitutions | Output parameter receiving the number of substitutions if sub_char>=0. Set to 0 if no substitutions occur or sub_char<0. num_substitutions can be NULL. |
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_from_WCS | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const wchar_t * | src, | ||
int32_t | src_len, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a wchar_t string to UTF-16.
If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then this function simply calls the fast, dedicated function for that. Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed.
[out] | dest | A buffer for the result string. The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of i18n_uchar characters). If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). |
[out] | dest_len | A pointer to receive the number of units written to the destination. If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow. |
[in] | src | The original source string. |
[in] | src_len | The length of the original string. If -1 , then src must be zero-terminated. |
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_get_length | ( | const i18n_uchar * | s | ) |
Determines the length of an array of i18n_uchar.
[in] | s | The array of i18n_uchar characters, NULL (U+0000) terminated. |
chars
, minus the terminator I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_ubool i18n_ustring_has_more_char32_than | ( | const i18n_uchar * | s, |
int32_t | length, | ||
int32_t | number | ||
) |
Checks if the string contains more Unicode code points than a certain number.
This is more efficient than counting all code points in the entire string and comparing that number with a threshold. This function may not need to scan the string at all if the length is known (not -1
for NULL-termination) and falls within a certain range, and never needs to count more than 'number+1' code points. Logically equivalent to ( i18n_ustring_count_char32 (s, length, &number_of_code_points); number_of_code_points > number ). A Unicode code point may occupy either one or two i18n_uchar code units.
[in] | s | The input string. |
[in] | length | The length of the string, or -1 if it is NULL-terminated. |
[in] | number | The number of code points in the string is compared against the number parameter. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_mem_case_compare | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2, | ||
int32_t | length, | ||
uint32_t | options | ||
) |
Compare two strings case-insensitively using full case folding.
[in] | s1 | A string to compare. |
[in] | s2 | A string to compare. |
[in] | length | The number of characters in each string to case-fold and then compare. |
[in] | options | A bit set of options:
|
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_mem_char | ( | const i18n_uchar * | s, |
i18n_uchar | c, | ||
int32_t | count | ||
) |
Finds the first occurrence of a BMP code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
[in] | s | The string to search (contains count i18n_uchar characters). |
[in] | c | The BMP code point to find. |
[in] | count | The length of the string. |
NULL
if c is not in s. I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_mem_char32 | ( | const i18n_uchar * | s, |
i18n_uchar32 | c, | ||
int32_t | count | ||
) |
Finds the first occurrence of a code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
[in] | s | The string to search (contains count i18n_uchar characters). |
[in] | c | The code point to find. |
[in] | count | The length of the string. |
NULL
if c is not in s. I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_mem_compare | ( | const i18n_uchar * | buf1, |
const i18n_uchar * | buf2, | ||
int32_t | count | ||
) |
Compare the first count i18n_uchar characters of each buffer.
[in] | buf1 | The first string to compare. |
[in] | buf2 | The second string to compare. |
[in] | count | The maximum number of i18n_uchar characters to compare. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_mem_compare_code_point_order | ( | const i18n_uchar * | s1, |
const i18n_uchar * | s2, | ||
int32_t | count | ||
) |
Compare two Unicode strings in code point order.
This is different in UTF-16 from i18n_ustring_mem_compare() if supplementary characters are present. For details, see i18n_ustring_compare_binary_order().
[in] | s1 | A string to compare. |
[in] | s2 | A string to compare. |
[in] | count | The maximum number of characters to compare. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_mem_copy | ( | i18n_uchar * | dest, |
const i18n_uchar * | src, | ||
int32_t | count | ||
) |
Synonym for memcpy(), but with i18n_uchar characters only.
[out] | dest | The destination string |
[in] | src | The source string (can be NULL/invalid if count<=0) |
[in] | count | The number of characters to copy; no-op if <=0 |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_mem_move | ( | i18n_uchar * | dest, |
const i18n_uchar * | src, | ||
int32_t | count | ||
) |
Synonym for memmove(), but with i18n_uchar characters only.
[out] | dest | The destination string |
[in] | src | The source string (can be NULL/invalid if count<=0) |
[in] | count | The number of characters to copy; no-op if <=0 |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_mem_r_char | ( | const i18n_uchar * | s, |
i18n_uchar | c, | ||
int32_t | count | ||
) |
Finds the last occurrence of a BMP code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
[in] | s | The string to search (contains count i18n_uchar characters). |
[in] | c | The BMP code point to find. |
[in] | count | The length of the string. |
NULL
if c is not in s. I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_mem_r_char32 | ( | const i18n_uchar * | s, |
i18n_uchar32 | c, | ||
int32_t | count | ||
) |
Finds the last occurrence of a code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
[in] | s | The string to search (contains count i18n_uchar characters). |
[in] | c | The code point to find. |
[in] | count | The length of the string. |
NULL
if c is not in s. I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_mem_set | ( | i18n_uchar * | dest, |
const i18n_uchar | c, | ||
int32_t | count | ||
) |
Initialize count characters of dest to c.
[out] | dest | The destination string |
[in] | c | The character to initialize the string. |
[in] | count | The maximum number of characters to set. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_pbrk | ( | const i18n_uchar * | string, |
const i18n_uchar * | match_set | ||
) |
Locates the first occurrence in the string of any of the characters in the string matchSet.
Works just like C's strpbrk but with Unicode.
[in] | string | The string in which to search, NULL-terminated. |
[in] | match_set | A NULL-terminated string defining a set of code points for which to search in the text string. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_r_char | ( | const i18n_uchar * | s, |
i18n_uchar | c | ||
) |
Finds the last occurrence of a BMP code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
[in] | s | The string to search (NULL-terminated). |
[in] | c | The BMP code point to find. |
NULL
if c is not in s. I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_r_char32 | ( | const i18n_uchar * | s, |
i18n_uchar32 | c | ||
) |
Finds the last occurrence of a code point in a string.
A surrogate code point is found only if its match in the text is not part of a surrogate pair. A NULL character is found at the string terminator.
[in] | s | The string to search (NULL-terminated). |
[in] | c | The code point to find. |
NULL
if c is not in s. I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_r_string | ( | const i18n_uchar * | s, |
const i18n_uchar * | sub_string | ||
) |
Finds the last occurrence of a substring in a string.
The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.
[in] | s | The string to search (NULL-terminated). |
[in] | sub_string | The substring to find (NULL-terminated). |
NULL
if sub_string is not in s. I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_spn | ( | const i18n_uchar * | string, |
const i18n_uchar * | match_set | ||
) |
Returns the number of consecutive characters in string, beginning with the first, that occur somewhere in match_set.
Works just like C's strspn but with Unicode.
[in] | string | The string in which to search, NULL-terminated. |
[in] | match_set | A NULL-terminated string defining a set of code points for which to search in the text string. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_string | ( | const i18n_uchar * | s, |
const i18n_uchar * | sub_string | ||
) |
Finds the first occurrence of a substring in a string.
The substring is found at code point boundaries. That means that if the substring begins with a trail surrogate or ends with a lead surrogate, then it is found only if these surrogates stand alone in the text. Otherwise, the substring edge units would be matched against halves of surrogate pairs.
[in] | s | The string to search (NULL-terminated). |
[in] | sub_string | The substring to find (NULL-terminated). |
NULL
if sub_string is not in s.I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_to_lower | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
const char * | locale, | ||
i18n_error_code_e * | error_code | ||
) |
Lowercase the characters in a string.
Casing is locale-dependent and context-sensitive. The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap. The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.
[out] | dest | A buffer for the result string. The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of i18n_uchar characters) If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string. |
[in] | src | The original string |
[in] | src_len | The length of the original string. If -1 , then src must be zero-terminated. |
[in] | locale | The locale to consider, or "" for the root locale or NULL for the default locale. |
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_to_title_new | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
i18n_ubreak_iterator_h | title_iter, | ||
const char * | locale | ||
) |
Titlecases a string.
Casing is locale-dependent and context-sensitive. Titlecasing uses a break iterator to find the first characters of words that are to be titlecased. It titlecases those characters and lowercases all others.
The titlecase break iterator can be provided to customize arbitrary styles, using rules and dictionaries beyond the standard iterators. It may be more efficient to always provide an iterator to avoid opening and closing one for each string. The standard titlecase iterator for the root locale implements the algorithm of Unicode TR 21.
The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.
[out] | dest | A buffer for the result string. The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of i18n_uchar characters. If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any of the result string. |
[in] | src | The original string |
[in] | src_len | The length of the original string. If -1 , then src must be zero-terminated. |
[in] | title_iter | A break iterator to find the first characters of words that are to be titlecased. If none are provided ( NULL ), then a standard titlecase break iterator is opened. |
[in] | locale | The locale to consider, or "" for the root locale or NULL for the default locale. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_to_upper | ( | i18n_uchar * | dest, |
int32_t | dest_capacity, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
const char * | locale, | ||
i18n_error_code_e * | error_code | ||
) |
Uppercases the characters in a string.
Casing is locale-dependent and context-sensitive. The result may be longer or shorter than the original. The source string and the destination buffer are allowed to overlap.
[out] | dest | A buffer for the result string The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of i18n_uchar characters) If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any of the result string. |
[in] | src | The original string |
[in] | src_len | The length of the original string If -1 , then src must be zero-terminated. |
[in] | locale | The locale to consider, or "" for the root locale or NULL for the default locale. |
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar32* i18n_ustring_to_UTF32 | ( | i18n_uchar32 * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-16 string to UTF-32.
If the input string is not well-formed, then the I18N_ERROR_INVALID_CHAR_FOUND error code is set.
[out] | dest | A buffer for the result string. The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of i18n_uchar32 characters) If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). |
[out] | dest_len | A pointer to receive the number of units written to the destination. If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow. |
[in] | src | The original source string |
[in] | src_len | The length of the original string. If -1 , then src must be zero-terminated. |
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar32* i18n_ustring_to_UTF32_with_sub | ( | i18n_uchar32 * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
i18n_uchar32 | sub_char, | ||
int32_t * | num_substitutions, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-16 string to UTF-32.
Same as i18n_ustring_to_UTF32() except for the additional sub_char which is output for illegal input sequences, instead of stopping with the I18N_ERROR_INVALID_CHAR_FOUND error code.
[out] | dest | A buffer for the result string. The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of i18n_char32s) If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). |
[out] | dest_len | A pointer to receive the number of units written to the destination. If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow. |
[in] | src | The original source string |
[in] | src_len | The length of the original string. If -1 , then src must be zero-terminated. |
[in] | sub_char | The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with I18N_ERROR_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER". |
[out] | num_substitutions | Output parameter receiving the number of substitutions if sub_char>=0. Set to 0 if no substitutions occur or sub_char<0. num_substitutions can be NULL. |
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
char* i18n_ustring_to_UTF8 | ( | char * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
i18n_error_code_e * | error_code | ||
) |
Converts a UTF-16 string to UTF-8.
If the input string is not well-formed, then the I18N_ERROR_INVALID_CHAR_FOUND error code is set.
[out] | dest | A buffer for the result string. The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of chars) If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). |
[out] | dest_len | A pointer to receive the number of units written to the destination. If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow. |
[in] | src | The original source string |
[in] | src_len | The length of the original string. If -1 , then src must be zero-terminated. |
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
char* i18n_ustring_to_UTF8_with_sub | ( | char * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
i18n_uchar32 | sub_char, | ||
int32_t * | num_substitutions, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-16 string to UTF-8. Same as i18n_ustring_to_UTF8() except for the additional sub_char which is output for illegal input sequences, instead of stopping with the I18N_ERROR_INVALID_CHAR_FOUND error code.
[out] | dest | A buffer for the result string. The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of chars) If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). |
[out] | dest_len | A pointer to receive the number of units written to the destination. If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow. |
[in] | src | The original source string |
[in] | src_len | The length of the original string. If -1 , then src must be zero-terminated. |
[in] | sub_char | The substitution character to use in place of an illegal input sequence, or U_SENTINEL if the function is to return with I18N_ERROR_INVALID_CHAR_FOUND instead. A substitution character can be any valid Unicode code point (up to U+10FFFF) except for surrogate code points (U+D800..U+DFFF). The recommended value is U+FFFD "REPLACEMENT CHARACTER". |
[out] | num_substitutions | Output parameter receiving the number of substitutions if sub_char>=0. Set to 0 if no substitutions occur or sub_char<0. num_substitutions can be NULL. |
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
wchar_t* i18n_ustring_to_WCS | ( | wchar_t * | dest, |
int32_t | dest_capacity, | ||
int32_t * | dest_len, | ||
const i18n_uchar * | src, | ||
int32_t | src_len, | ||
i18n_error_code_e * | error_code | ||
) |
Convert a UTF-16 string to a wchar_t string.
If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then this function simply calls the fast, dedicated function for that. Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed.
[out] | dest | A buffer for the result string. The result will be zero-terminated if the buffer is large enough. |
[in] | dest_capacity | The size of the buffer (number of wchar_t's). If it is 0 , then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). |
[out] | dest_len | A pointer to receive the number of units written to the destination. If dest_len!=NULL then *dest_len is always set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow. |
[in] | src | The original source string. |
[in] | src_len | The length of the original string. If -1 , then src must be zero-terminated. |
[out] | error_code | Must be a valid pointer to an error code value, which must not indicate a failure before the function call. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar* i18n_ustring_tokenizer_r | ( | i18n_uchar * | src, |
const i18n_uchar * | delim, | ||
i18n_uchar ** | save_state | ||
) |
The string tokenizer API allows an application to break a string into tokens.
Works just like C's strspn but with Unicode.
[in] | src | String containing token(s). This string will be modified. After the first call to i18n_ustring_tokenizer_r(), this argument must be NULL to get to the next token. |
[in] | delim | Set of delimiter characters (Unicode code points). |
[out] | save_state | The current pointer within the original string, which is set by this function. The save_state parameter should the address of a local variable of type i18n_uchar *. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int32_t i18n_ustring_unescape | ( | const char * | src, |
i18n_uchar * | dest, | ||
int32_t | dest_capacity | ||
) |
Unescape a string of characters and write the resulting Unicode characters to the destination buffer.
The following escape sequences are recognized:
\uhhhh 4 hex digits; h in [0-9A-Fa-f] \Uhhhhhhhh 8 hex digits \xhh 1-2 hex digits \x{h...} 1-8 hex digits \ooo 1-3 octal digits; o in [0-7] \cX control-X; X is masked with 0x1F
as well as the standard ANSI C escapes:
\a => U+0007, \b => U+0008, \t => U+0009, \n => U+000A, \v => U+000B, \f => U+000C, \r => U+000D, \e => U+001B, \" => U+0022, \' => U+0027, \? => U+003F, \\ => U+005C
Anything else following a backslash is generically escaped. For example, "[a\-z]" returns "[a-z]".
If an escape sequence is ill-formed, this method returns an empty string. An example of an ill-formed sequence is "\\u" followed by fewer than 4 hex digits.
[in] | src | a zero-terminated string of invariant characters |
[in] | dest | pointer to buffer to receive converted and unescaped text and, if there is room, a zero terminator. May be NULL for preflighting, in which case no i18n_uchar characters will be written, but the return value will still be valid. On error, an empty string is stored here (if possible). |
[in] | dest_capacity | the number of i18n_uchar characters that may be written at dest. Ignored if dest == NULL. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
i18n_uchar32 i18n_ustring_unescape_at | ( | i18n_ustring_unescape_char_at_cb | char_at, |
int32_t * | offset, | ||
int32_t | length, | ||
void * | context | ||
) |
Unescape a single sequence.
The character at offset-1 is assumed (without checking) to be a backslash. This method takes a callback pointer to a function that returns the i18n_uchar at a given offset. By varying this callback, I18N functions are able to unescape char* strings, and UnicodeString objects.
If offset is out of range, or if the escape sequence is ill-formed, (i18n_uchar32)0xFFFFFFFF is returned. See documentation of i18n_ustring_unescape() for a list of recognized sequences.
[in] | char_at | callback function that returns a i18n_uchar of the source text given an offset and a context pointer. |
[in] | offset | pointer to the offset that will be passed to char_at. The offset value will be updated upon return to point after the last parsed character of the escape sequence. On error the offset is unchanged. |
[in] | length | the number of i18n_uchar characters that may be written at dest. Ignored if dest == NULL. |
[in] | context | an opaque pointer passed directly into char_at. |
I18N_ERROR_NONE | Success |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |