The Uchar module provides low-level access to the Unicode Character Database.

Required Header

#include <utils_i18n.h>

Overview

The Uchar module provides low-level access to the Unicode Character Database.

Sample Code 1

Gets the property value of 'east asian width' among an enumerated property, and the unicode allocation block that contains the character.

    int ret = I18N_ERROR_NONE;
    i18n_uchar32 code_point = 0;
    int property_value = 0;
    i18n_uchar_u_east_asian_width_e east_asian_width = I18N_UCHAR_U_EA_NEUTRAL;
    i18n_uchar_ublock_code_e block_code = I18N_UCHAR_UBLOCK_NO_BLOCK;

    // How to get the east asian width type for 's'
    code_point = 0x73;    // 's'
    ret = i18n_uchar_get_int_property_value(code_point, I18N_UCHAR_EAST_ASIAN_WIDTH, &property_value);
    if (ret != I18N_ERROR_NONE) {
        dlog_print(DLOG_INFO, LOG_TAG, "Error occurred!!\n");
    } else {
        east_asian_width = (i18n_uchar_u_east_asian_width_e)property_value;
        dlog_print(DLOG_INFO, LOG_TAG, "East Asian Width Type for ( %.4x ) is ( %d )\n", code_point, east_asian_width);
        // East Asian Width Type for ( 0073 ) is ( 4 ) which is I18N_UCHAR_U_EA_NARROW
    }

    // How to get the block code for 's'
    ret = i18n_uchar_get_ublock_code(code_point, &block_code);
    if (ret != I18N_ERROR_NONE) {
        dlog_print(DLOG_INFO, LOG_TAG, "Error occurred!!\n");
    } else {
        dlog_print(DLOG_INFO, LOG_TAG, "block name for ( %.4x ) is ( %d )\n", code_point, block_code);
        // block code for ( 0073 ) is ( 1 ) which is I18N_UCHAR_UBLOCK_BASIC_LATIN
    }

    // How to get the east asian width type for 'sung' as ideographs
    code_point = 0x661F;    // 'sung' as ideographs
    ret = i18n_uchar_get_int_property_value(code_point, I18N_UCHAR_EAST_ASIAN_WIDTH, &property_value);
    if (ret != I18N_ERROR_NONE) {
        dlog_print(DLOG_INFO, LOG_TAG, "Error occurred!!\n");
    } else {
        east_asian_width = (i18n_uchar_u_east_asian_width_e)property_value;
        dlog_print(DLOG_INFO, LOG_TAG, "East Asian Width Type for ( %.4x ) is ( %d )\n", code_point, east_asian_width);
        // East Asian Width Type for ( 661f ) is ( 5 ) which is I18N_UCHAR_U_EA_WIDE
    }

    // How to get the block code for 'sung' as ideographs
    ret = i18n_uchar_get_ublock_code(code_point, &block_code);
    if (ret != I18N_ERROR_NONE) {
        dlog_print(DLOG_INFO, LOG_TAG, "Error occurred!!\n");
    } else {
        dlog_print(DLOG_INFO, LOG_TAG, "block name for ( %.4x ) is ( %d )\n", code_point, block_code);
        // block code for ( 661f ) is ( 71 ) which is I18N_UCHAR_UBLOCK_CJK_UNIFIED_IDEOGRAPHS
    }

    // How to get the east asian width type for 'sung' as hangul
    code_point = 0xC131;    // 'sung' as hangul
    ret = i18n_uchar_get_int_property_value(code_point, I18N_UCHAR_EAST_ASIAN_WIDTH, &property_value);
    if (ret != I18N_ERROR_NONE) {
        dlog_print(DLOG_INFO, LOG_TAG, "Error occurred!!\n");
    } else {
        east_asian_width = (i18n_uchar_u_east_asian_width_e)property_value;
        dlog_print(DLOG_INFO, LOG_TAG, "East Asian Width Type for ( %.4x ) is ( %d )\n", code_point, east_asian_width);
        // East Asian Width Type for ( c131 ) is ( 5 ) which is I18N_UCHAR_U_EA_WIDE
    }

    // How to get the block code for 'sung' as hangul
    ret = i18n_uchar_get_ublock_code(code_point, &block_code);
    if (ret != I18N_ERROR_NONE) {
        dlog_print(DLOG_INFO, LOG_TAG, "Error occurred!!\n");
    } else {
        dlog_print(DLOG_INFO, LOG_TAG, "block name for ( %.4x ) is ( %d )\n", code_point, block_code);
        // block code for ( c131 ) is ( 74 ) which is I18N_UCHAR_UBLOCK_HANGUL_SYLLABLES
    }

Functions
int	i18n_uchar_get_int_property_value (i18n_uchar32 c, i18n_uchar_uproperty_e which, int32_t *property_val)
	Gets the property value for an enumerated property for a code point.
int	i18n_uchar_get_ublock_code (i18n_uchar32 c, i18n_uchar_ublock_code_e *block_val)
	Gets the Unicode allocation block that contains the character.
int	i18n_uchar_has_binary_property (i18n_uchar32 c, i18n_uchar_uproperty_e which, i18n_ubool *has_binary_property)
	Checks a binary unicode property for a code point.
int	i18n_uchar_is_alphabetic (i18n_uchar32 c, i18n_ubool *is_alphabetic)
	Checks if a code point has the Alphabetic Unicode unicode property.
int	i18n_uchar_is_lowercase (i18n_uchar32 c, i18n_ubool *is_lowercase)
	Checks if a code point has the Lowercase Unicode property.
int	i18n_uchar_is_uppercase (i18n_uchar32 c, i18n_ubool *is_uppercase)
	Checks if a code point has the Uppercase Unicode property.
int	i18n_uchar_is_white_space (i18n_uchar32 c, i18n_ubool *is_white_space)
	Checks if a code point has the White_Space Unicode property.
int	i18n_uchar_get_int_property_min_value (i18n_uchar_uproperty_e which, int32_t *int_min_value)
	Gets the minimum value for an enumerated/integer/binary Unicode property.
int	i18n_uchar_get_int_property_max_value (i18n_uchar_uproperty_e which, int32_t *int_max_value)
	Gets the maximum value for an enumerated/integer/binary Unicode property.
int	i18n_uchar_get_numeric_value (i18n_uchar32 c, double *numeric_val)
	Gets the numeric value for a Unicode code point as defined in the Unicode Character Database.
int	i18n_uchar_is_lower (i18n_uchar32 c, i18n_ubool *is_lower)
	Determines whether the specified code point has the general category "Ll" (lowercase letter).
int	i18n_uchar_is_upper (i18n_uchar32 c, i18n_ubool *is_upper)
	Determines whether the specified code point has the general category "Lu" (uppercase letter).
int	i18n_uchar_is_title (i18n_uchar32 c, i18n_ubool *is_title)
	Determines whether the specified code point is a titlecase letter.
int	i18n_uchar_is_digit (i18n_uchar32 c, i18n_ubool *is_digit)
	Determines whether the specified code point is a digit character according to Java.
int	i18n_uchar_is_alpha (i18n_uchar32 c, i18n_ubool *is_alpha)
	Determines whether the specified code point is a letter character.
int	i18n_uchar_is_alnum (i18n_uchar32 c, i18n_ubool *is_alnum)
	Determines whether the specified code point is an alphanumeric character (letter or digit) according to Java.
int	i18n_uchar_is_xdigit (i18n_uchar32 c, i18n_ubool *is_xdigit)
	Determines whether the specified code point is a hexadecimal digit.
int	i18n_uchar_is_punct (i18n_uchar32 c, i18n_ubool *is_punct)
	Determines whether the specified code point is a punctuation character.
int	i18n_uchar_is_graph (i18n_uchar32 c, i18n_ubool *is_graph)
	Determines whether the specified code point is a "graphic" character (printable, excluding, spaces).
int	i18n_uchar_is_blank (i18n_uchar32 c, i18n_ubool *is_blank)
	Determines whether the specified code point is a "blank" or "horizontal space", a character that visibly separates words on a line.
int	i18n_uchar_is_defined (i18n_uchar32 c, i18n_ubool *is_defined)
	Determines whether the specified code point is "defined", which usually means that it is assigned a character.
int	i18n_uchar_is_space (i18n_uchar32 c, i18n_ubool *is_space)
	Determines if the specified character is a space character or not.
int	i18n_uchar_is_java_space_char (i18n_uchar32 c, i18n_ubool *is_java_space_char)
	Determines if the specified code point is a space character according to Java.
int	i18n_uchar_is_whitespace (i18n_uchar32 c, i18n_ubool *is_whitespace)
	Determines if the specified code point is a whitespace character according to Java/ICU.
int	i18n_uchar_is_control (i18n_uchar32 c, i18n_ubool *is_control)
	Determines whether the specified code point is a control character (as defined by this function).
int	i18n_uchar_is_iso_control (i18n_uchar32 c, i18n_ubool *is_iso_control)
	Determines whether the specified code point is an ISO control code.
int	i18n_uchar_is_printable (i18n_uchar32 c, i18n_ubool *is_printable)
	Determines whether the specified code point is a printable character.
int	i18n_uchar_is_base (i18n_uchar32 c, i18n_ubool *is_base)
	Determines whether the specified code point is a base character.
int	i18n_uchar_char_direction (i18n_uchar32 c, i18n_uchar_direction_e *direction)
	Returns the bidirectional category value for the code point.
int	i18n_uchar_is_mirrored (i18n_uchar32 c, i18n_ubool *is_mirrored)
	Determines whether the code point has the Bidi_Mirrored property.
int	i18n_uchar_char_mirror (i18n_uchar32 c, i18n_uchar32 *char_mirror)
	Maps the specified character to a "mirror-image" character.
int	i18n_uchar_get_bidi_paired_bracket (i18n_uchar32 c, i18n_uchar32 *bidi_paired_bracket)
	Maps the specified character to its paired bracket character.
int	i18n_uchar_char_type (i18n_uchar32 c, int8_t *char_type)
	Returns the general category value for the code point.
int	i18n_uchar_enum_char_types (i18n_uchar_enum_char_type_cb cb, const void *user_data)
	Enumerates efficiently all code points with their unicode general categories.
int	i18n_uchar_get_combining_class (i18n_uchar32 c, uint8_t *combining_class)
	Returns the combining class of the code point.
int	i18n_uchar_char_digit_value (i18n_uchar32 c, int32_t *char_digit_value)
	Returns the decimal digit value of a decimal digit character.
int	i18n_uchar_char_name (i18n_uchar32 code, i18n_uchar_u_char_name_choice_e name_choice, char buffer, int32_t buffer_length, int32_t name_length)
	Retrieves the name of a unicode character.
int	i18n_uchar_char_from_name (i18n_uchar_u_char_name_choice_e name_choice, const char name, i18n_uchar32 char_from_name)
	Finds a unicode character by its name and return its code point value.
int	i18n_uchar_enum_char_names (i18n_uchar32 start, i18n_uchar32 limit, i18n_uchar_enum_char_name_cb cb, void *user_data, i18n_uchar_u_char_name_choice_e name_choice)
	Enumerates all assigned unicode characters between the start and limit code points (start inclusive, limit exclusive) and call a function for each, passing the code point value and the character name.
int	i18n_uchar_get_property_name (i18n_uchar_uproperty_e property, i18n_uchar_u_property_name_choice_e name_choice, const char **name)
	Returns the unicode name for a given property, as given in the unicode database file.
int	i18n_uchar_get_property_enum (const char alias, i18n_uchar_uproperty_e property_enum)
	Returns the i18n_uchar_uproperty_e enum for a given property name, as specified in the unicode database.
int	i18n_uchar_get_property_value_name (i18n_uchar_uproperty_e property, int32_t value, i18n_uchar_u_property_name_choice_e name_choice, const char **name)
	Returns the unicode name for a given property value.
int	i18n_uchar_get_property_value_enum (i18n_uchar_uproperty_e property, const char alias, int32_t value_enum)
	Returns the property value integer for a given value name, as specified in the unicode database.
int	i18n_uchar_is_id_start (i18n_uchar32 c, i18n_ubool *is_id_start)
	Determines if the specified character is permissible as the first character in an identifier according to unicode (the unicode standard, version 3.0, chapter 5.16 identifiers).
int	i18n_uchar_is_id_part (i18n_uchar32 c, i18n_ubool *is_id_part)
	Determines if the specified character is permissible in an identifier according to Java.
int	i18n_uchar_is_id_ignorable (i18n_uchar32 c, i18n_ubool *is_id_ignorable)
	Determines if the specified character should be regarded as an ignorable character in an identifier, according to java.
int	i18n_uchar_is_java_id_start (i18n_uchar32 c, i18n_ubool *is_java_id_start)
	Determines if the specified character is permissible as the first character in a java identifier.
int	i18n_uchar_is_java_id_part (i18n_uchar32 c, i18n_ubool *is_java_id_part)
	Determines if the specified character is permissible in a java identifier.
int	i18n_uchar_to_lower (i18n_uchar32 c, i18n_uchar32 *lower)
	Maps the given character to its lowercase equivalent.
int	i18n_uchar_to_upper (i18n_uchar32 c, i18n_uchar32 *upper)
	Maps the given character to its uppercase equivalent.
int	i18n_uchar_to_title (i18n_uchar32 c, i18n_uchar32 *title)
	Maps the given character to its titlecase equivalent.
int	i18n_uchar_fold_case (i18n_uchar32 c, uint32_t options, i18n_uchar32 *code_point)
	Maps the given character to its case folding equivalent.
int	i18n_uchar_digit (i18n_uchar32 ch, int8_t radix, int32_t *numerical_value)
	Returns the decimal digit value of the code point in the specified radix.
int	i18n_uchar_for_digit (int32_t digit, int8_t radix, i18n_uchar32 *representation)
	Determines the character representation for a specific digit in the specified radix.
int	i18n_uchar_char_age (i18n_uchar32 c, i18n_uversion_info version_array)
	Gets the "age" of the code point.
int	i18n_uchar_get_unicode_version (i18n_uversion_info version_array)
	Gets the unicode version information.
int	i18n_uchar_get_fc_nfkc_closure (i18n_uchar32 c, i18n_uchar dest, int32_t dest_capacity, int32_t length)
	Gets the FC_NFKC_Closure property string for a character.
Typedefs
typedef uint16_t	i18n_uchar
	i18n_uchar.
typedef int32_t	i18n_uchar32
	i18n_uchar32.
typedef int8_t	i18n_ubool
	i18n_ubool.
typedef i18n_ubool(*	i18n_uchar_enum_char_type_cb )(const void *context, i18n_uchar32 start, i18n_uchar32 limit, i18n_uchar_category_e type)
	Callback from i18n_uchar_enum_char_types(), is called for each contiguous range of code points c (where `start <= c < limit` ) with the same Unicode general category ("character type").
typedef i18n_ubool(*	i18n_uchar_enum_char_name_cb )(void context, i18n_uchar32 code, i18n_uchar_u_char_name_choice_e name_choice, const char name, int32_t length)
	Type of a callback function for i18n_uchar_enum_char_name() that gets called for each Unicode character with the code point value and the character name.
Defines
#define	I18N_U_MASK(x) ((uint32_t)1 << (x))
#define	I18N_U_GC_CN_MASK I18N_U_MASK(I18N_UCHAR_U_GENERAL_OTHER_TYPES)
#define	I18N_U_GC_LU_MASK I18N_U_MASK(I18N_UCHAR_U_UPPERCASE_LETTER)
#define	I18N_U_GC_LL_MASK I18N_U_MASK(I18N_UCHAR_U_LOWERCASE_LETTER)
#define	I18N_U_GC_LT_MASK I18N_U_MASK(I18N_UCHAR_U_TITLECASE_LETTER)
#define	I18N_U_GC_LM_MASK I18N_U_MASK(I18N_UCHAR_U_MODIFIER_LETTER)
#define	I18N_U_GC_LO_MASK I18N_U_MASK(I18N_UCHAR_U_OTHER_LETTER)
#define	I18N_U_GC_MN_MASK I18N_U_MASK(I18N_UCHAR_U_NON_SPACING_MARK)
#define	I18N_U_GC_ME_MASK I18N_U_MASK(I18N_UCHAR_U_ENCLOSING_MARK)
#define	I18N_U_GC_MC_MASK I18N_U_MASK(I18N_UCHAR_U_COMBINING_SPACING_MARK)
#define	I18N_U_GC_ND_MASK I18N_U_MASK(I18N_UCHAR_U_DECIMAL_DIGIT_NUMBER)
#define	I18N_U_GC_NL_MASK I18N_U_MASK(I18N_UCHAR_U_LETTER_NUMBER)
#define	I18N_U_GC_NO_MASK I18N_U_MASK(I18N_UCHAR_U_OTHER_NUMBER)
#define	I18N_U_GC_ZS_MASK I18N_U_MASK(I18N_UCHAR_U_SPACE_SEPARATOR)
#define	I18N_U_GC_ZL_MASK I18N_U_MASK(I18N_UCHAR_U_LINE_SEPARATOR)
#define	I18N_U_GC_ZP_MASK I18N_U_MASK(I18N_UCHAR_U_PARAGRAPH_SEPARATOR)
#define	I18N_U_GC_CC_MASK I18N_U_MASK(I18N_UCHAR_U_CONTROL_CHAR)
#define	I18N_U_GC_CF_MASK I18N_U_MASK(I18N_UCHAR_U_FORMAT_CHAR)
#define	I18N_U_GC_CO_MASK I18N_U_MASK(I18N_UCHAR_U_PRIVATE_USE_CHAR)
#define	I18N_U_GC_CS_MASK I18N_U_MASK(I18N_UCHAR_U_SURROGATE)
#define	I18N_U_GC_PD_MASK I18N_U_MASK(I18N_UCHAR_U_DASH_PUNCTUATION)
#define	I18N_U_GC_PS_MASK I18N_U_MASK(I18N_UCHAR_U_START_PUNCTUATION)
#define	I18N_U_GC_PE_MASK I18N_U_MASK(I18N_UCHAR_U_END_PUNCTUATION)
#define	I18N_U_GC_PC_MASK I18N_U_MASK(I18N_UCHAR_U_CONNECTOR_PUNCTUATION)
#define	I18N_U_GC_PO_MASK I18N_U_MASK(I18N_UCHAR_U_OTHER_PUNCTUATION)
#define	I18N_U_GC_SM_MASK I18N_U_MASK(I18N_UCHAR_U_MATH_SYMBOL)
#define	I18N_U_GC_SC_MASK I18N_U_MASK(I18N_UCHAR_U_CURRENCY_SYMBOL)
#define	I18N_U_GC_SK_MASK I18N_U_MASK(I18N_UCHAR_U_MODIFIER_SYMBOL)
#define	I18N_U_GC_SO_MASK I18N_U_MASK(I18N_UCHAR_U_OTHER_SYMBOL)
#define	I18N_U_GC_PI_MASK I18N_U_MASK(I18N_UCHAR_U_INITIAL_PUNCTUATION)
#define	I18N_U_GC_PF_MASK I18N_U_MASK(I18N_UCHAR_U_FINAL_PUNCTUATION)
#define	I18N_U_GC_L_MASK (I18N_U_GC_LU_MASK\|I18N_U_GC_LL_MASK\|I18N_U_GC_LT_MASK\|I18N_U_GC_LM_MASK\|I18N_U_GC_LO_MASK)
#define	I18N_U_GC_LC_MASK (I18N_U_GC_LU_MASK\|I18N_U_GC_LL_MASK\|I18N_U_GC_LT_MASK)
#define	I18N_U_GC_M_MASK (I18N_U_GC_MN_MASK\|I18N_U_GC_ME_MASK\|I18N_U_GC_MC_MASK)
#define	I18N_U_GC_N_MASK (I18N_U_GC_ND_MASK\|I18N_U_GC_NL_MASK\|I18N_U_GC_NO_MASK)
#define	I18N_U_GC_Z_MASK (I18N_U_GC_ZS_MASK\|I18N_U_GC_ZL_MASK\|I18N_U_GC_ZP_MASK)
#define	I18N_U_GC_C_MASK (I18N_U_GC_CN_MASK\|I18N_U_GC_CC_MASK\|I18N_U_GC_CF_MASK\|I18N_U_GC_CO_MASK\|I18N_U_GC_CS_MASK)
#define	I18N_U_GC_P_MASK (I18N_U_GC_PD_MASK\|I18N_U_GC_PS_MASK\|I18N_U_GC_PE_MASK\|I18N_U_GC_PC_MASK\|I18N_U_GC_PO_MASK\|I18N_U_GC_PI_MASK\|I18N_U_GC_PF_MASK)
#define	I18N_U_GC_S_MASK (I18N_U_GC_SM_MASK\|I18N_U_GC_SC_MASK\|I18N_U_GC_SK_MASK\|I18N_U_GC_SO_MASK)
#define	I18N_U_NO_NUMERIC_VALUE ((double)-123456789.)
#define	I18N_U_GET_GC_MASK(c) I18N_U_MASK(u_charType(c))
#define	I18N_U_FOLD_CASE_DEFAULT 0
	Option value for case folding: use default mappings defined in CaseFolding.txt.
#define	I18N_U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
	Option value for case folding: Use the modified set of mappings provided in CaseFolding.txt to handle dotted I and dotless i appropriately for Turkic languages (tr, az). Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that are to be included for default mappings and excluded for the Turkic-specific mappings. Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that are to be excluded for default mappings and included for the Turkic-specific mappings.
#define	I18N_USEARCH_DONE -1
	DONE is returned by i18n_usearch_previous() and i18n_usearch_next() after all valid matches have been returned, and by, i18n_usearch_first() and i18n_usearch_last() if there are no matches at all.

Define Documentation

#define I18N_U_FOLD_CASE_DEFAULT 0

Option value for case folding: use default mappings defined in CaseFolding.txt.

Since :: 2.3.1

#define I18N_U_FOLD_CASE_EXCLUDE_SPECIAL_I 1

Option value for case folding:
Use the modified set of mappings provided in CaseFolding.txt to handle dotted I and dotless i appropriately for Turkic languages (tr, az).
Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that are to be included for default mappings and excluded for the Turkic-specific mappings.
Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that are to be excluded for default mappings and included for the Turkic-specific mappings.

Since :: 2.3.1

#define I18N_U_GC_C_MASK (I18N_U_GC_CN_MASK|I18N_U_GC_CC_MASK|I18N_U_GC_CF_MASK|I18N_U_GC_CO_MASK|I18N_U_GC_CS_MASK)

Mask constant for multiple i18n_uchar_category_e bits (C Others).

Since :: 2.3.1

#define I18N_U_GC_CC_MASK I18N_U_MASK(I18N_UCHAR_U_CONTROL_CHAR)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_CF_MASK I18N_U_MASK(I18N_UCHAR_U_FORMAT_CHAR)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_CN_MASK I18N_U_MASK(I18N_UCHAR_U_GENERAL_OTHER_TYPES)

U_GC_XX_MASK constants are bit flags corresponding to Unicode general category values.

Since :: 2.3.1

#define I18N_U_GC_CO_MASK I18N_U_MASK(I18N_UCHAR_U_PRIVATE_USE_CHAR)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_CS_MASK I18N_U_MASK(I18N_UCHAR_U_SURROGATE)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_L_MASK (I18N_U_GC_LU_MASK|I18N_U_GC_LL_MASK|I18N_U_GC_LT_MASK|I18N_U_GC_LM_MASK|I18N_U_GC_LO_MASK)

Mask constant for multiple i18n_uchar_category_e bits (L Letters).

Since :: 2.3.1

#define I18N_U_GC_LC_MASK (I18N_U_GC_LU_MASK|I18N_U_GC_LL_MASK|I18N_U_GC_LT_MASK)

Mask constant for multiple i18n_uchar_category_e bits (LC Cased Letters).

Since :: 2.3.1

#define I18N_U_GC_LL_MASK I18N_U_MASK(I18N_UCHAR_U_LOWERCASE_LETTER)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_LM_MASK I18N_U_MASK(I18N_UCHAR_U_MODIFIER_LETTER)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_LO_MASK I18N_U_MASK(I18N_UCHAR_U_OTHER_LETTER)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_LT_MASK I18N_U_MASK(I18N_UCHAR_U_TITLECASE_LETTER)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_LU_MASK I18N_U_MASK(I18N_UCHAR_U_UPPERCASE_LETTER)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_M_MASK (I18N_U_GC_MN_MASK|I18N_U_GC_ME_MASK|I18N_U_GC_MC_MASK)

Mask constant for multiple i18n_uchar_category_e bits (M Marks).

Since :: 2.3.1

#define I18N_U_GC_MC_MASK I18N_U_MASK(I18N_UCHAR_U_COMBINING_SPACING_MARK)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_ME_MASK I18N_U_MASK(I18N_UCHAR_U_ENCLOSING_MARK)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_MN_MASK I18N_U_MASK(I18N_UCHAR_U_NON_SPACING_MARK)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_N_MASK (I18N_U_GC_ND_MASK|I18N_U_GC_NL_MASK|I18N_U_GC_NO_MASK)

Mask constant for multiple i18n_uchar_category_e bits (N Numbers).

Since :: 2.3.1

#define I18N_U_GC_ND_MASK I18N_U_MASK(I18N_UCHAR_U_DECIMAL_DIGIT_NUMBER)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_NL_MASK I18N_U_MASK(I18N_UCHAR_U_LETTER_NUMBER)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_NO_MASK I18N_U_MASK(I18N_UCHAR_U_OTHER_NUMBER)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

Mask constant for multiple i18n_uchar_category_e bits (P Punctuation).

Since :: 2.3.1

#define I18N_U_GC_PC_MASK I18N_U_MASK(I18N_UCHAR_U_CONNECTOR_PUNCTUATION)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_PD_MASK I18N_U_MASK(I18N_UCHAR_U_DASH_PUNCTUATION)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_PE_MASK I18N_U_MASK(I18N_UCHAR_U_END_PUNCTUATION)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_PF_MASK I18N_U_MASK(I18N_UCHAR_U_FINAL_PUNCTUATION)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_PI_MASK I18N_U_MASK(I18N_UCHAR_U_INITIAL_PUNCTUATION)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_PO_MASK I18N_U_MASK(I18N_UCHAR_U_OTHER_PUNCTUATION)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_PS_MASK I18N_U_MASK(I18N_UCHAR_U_START_PUNCTUATION)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_S_MASK (I18N_U_GC_SM_MASK|I18N_U_GC_SC_MASK|I18N_U_GC_SK_MASK|I18N_U_GC_SO_MASK)

Mask constant for multiple i18n_uchar_category_e bits (S Symbols).

Since :: 2.3.1

#define I18N_U_GC_SC_MASK I18N_U_MASK(I18N_UCHAR_U_CURRENCY_SYMBOL)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_SK_MASK I18N_U_MASK(I18N_UCHAR_U_MODIFIER_SYMBOL)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_SM_MASK I18N_U_MASK(I18N_UCHAR_U_MATH_SYMBOL)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_SO_MASK I18N_U_MASK(I18N_UCHAR_U_OTHER_SYMBOL)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_Z_MASK (I18N_U_GC_ZS_MASK|I18N_U_GC_ZL_MASK|I18N_U_GC_ZP_MASK)

Mask constant for multiple i18n_uchar_category_e bits (Z Separators).

Since :: 2.3.1

#define I18N_U_GC_ZL_MASK I18N_U_MASK(I18N_UCHAR_U_LINE_SEPARATOR)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_ZP_MASK I18N_U_MASK(I18N_UCHAR_U_PARAGRAPH_SEPARATOR)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GC_ZS_MASK I18N_U_MASK(I18N_UCHAR_U_SPACE_SEPARATOR)

Mask constant for a i18n_uchar_category_e.

Since :: 2.3.1

#define I18N_U_GET_GC_MASK ( c ) I18N_U_MASK(u_charType(c))

Get a single-bit bit set for the general category of a character.

Since :: 2.3.1

#define I18N_U_MASK ( x ) ((uint32_t)1 << (x))

Get a single-bit bit set (a flag) from a bit number 0..31.

Since :: 2.3.1

#define I18N_U_NO_NUMERIC_VALUE ((double)-123456789.)

Special value that is returned by i18n_uchar_get_numeric_value() when no numeric value is defined for a code point.

Since :: 2.3.1

#define I18N_USEARCH_DONE -1

DONE is returned by i18n_usearch_previous() and i18n_usearch_next() after all valid matches have been returned, and by, i18n_usearch_first() and i18n_usearch_last() if there are no matches at all.

Since :: 2.3.1

Typedef Documentation

typedef int8_t i18n_ubool

i18n_ubool.

Since :: 2.3.1

typedef uint16_t i18n_uchar

i18n_uchar.

Since :: 2.3.1 (U_ICU_VERSION_MAJOR_NUM >= 59)

typedef int32_t i18n_uchar32

i18n_uchar32.

Since :: 2.3.1

typedef i18n_ubool(* i18n_uchar_enum_char_name_cb)(void *context, i18n_uchar32 code, i18n_uchar_u_char_name_choice_e name_choice, const char *name, int32_t length)

Type of a callback function for i18n_uchar_enum_char_name() that gets called for each Unicode character with the code point value and the character name.

The callback function can stop the enumeration by returning false.

Since :: 4.0

Parameters:

[in]	context	The context pointer that was passed to i18n_uchar_enum_char_names()
[in]	code	The Unicode code point for the character with this name
[in]	name_choice	Selector for which kind of names is enumerated
[in]	name	The character's name, zero-terminated. Can be used only in the callback. To use outside, make a copy.
[in]	length	The length of the name

Returns:: true if the enumeration should continue, false to stop it

typedef i18n_ubool(* i18n_uchar_enum_char_type_cb)(const void *context, i18n_uchar32 start, i18n_uchar32 limit, i18n_uchar_category_e type)

Callback from i18n_uchar_enum_char_types(), is called for each contiguous range of code points c (where start <= c < limit ) with the same Unicode general category ("character type").

The callback function can stop the enumeration by returning false.

Since :: 4.0

Parameters:

[in]	context	The context pointer that was passed to i18n_uchar_enum_char_types()
[in]	start	The first code point in a contiguous range with value
[in]	limit	One past the last code point in a contiguous range with value
[in]	type	The general category for all code points in [start..limit]

Returns:: true if the enumeration should continue, false to stop it

Enumeration Type Documentation

enum i18n_uchar_category_e

Enumeration for Unicode general category types.

Since :: 2.3.1

Enumerator:

I18N_UCHAR_U_UNASSIGNED	Non-category for unassigned and non-character code points
I18N_UCHAR_U_GENERAL_OTHER_TYPES	Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as I18N_UCHAR_U_UNASSIGNED!)
I18N_UCHAR_U_UPPERCASE_LETTER	Lu
I18N_UCHAR_U_LOWERCASE_LETTER	Ll
I18N_UCHAR_U_TITLECASE_LETTER	Lt
I18N_UCHAR_U_MODIFIER_LETTER	Lm
I18N_UCHAR_U_OTHER_LETTER	Lo
I18N_UCHAR_U_NON_SPACING_MARK	Mn
I18N_UCHAR_U_ENCLOSING_MARK	Me
I18N_UCHAR_U_COMBINING_SPACING_MARK	Mc
I18N_UCHAR_U_DECIMAL_DIGIT_NUMBER	Nd
I18N_UCHAR_U_LETTER_NUMBER	Nl
I18N_UCHAR_U_OTHER_NUMBER	No
I18N_UCHAR_U_SPACE_SEPARATOR	Zs
I18N_UCHAR_U_LINE_SEPARATOR	Zl
I18N_UCHAR_U_PARAGRAPH_SEPARATOR	Zp
I18N_UCHAR_U_CONTROL_CHAR	Cc
I18N_UCHAR_U_FORMAT_CHAR	Cf
I18N_UCHAR_U_PRIVATE_USE_CHAR	Co
I18N_UCHAR_U_SURROGATE	Cs
I18N_UCHAR_U_DASH_PUNCTUATION	Pd
I18N_UCHAR_U_START_PUNCTUATION	Ps
I18N_UCHAR_U_END_PUNCTUATION	Pe
I18N_UCHAR_U_CONNECTOR_PUNCTUATION	Pc
I18N_UCHAR_U_OTHER_PUNCTUATION	Po
I18N_UCHAR_U_MATH_SYMBOL	Sm
I18N_UCHAR_U_CURRENCY_SYMBOL	Sc
I18N_UCHAR_U_MODIFIER_SYMBOL	Sk
I18N_UCHAR_U_OTHER_SYMBOL	So
I18N_UCHAR_U_INITIAL_PUNCTUATION	Pi
I18N_UCHAR_U_FINAL_PUNCTUATION	Pf
I18N_UCHAR_U_CHAR_CATEGORY_COUNT	One higher than the last enum i18n_uchar_category_e constant

enum i18n_uchar_direction_e

Enumeration for the language directional property of a character set.

Since :: 2.3.1

Enumerator:

I18N_UCHAR_U_LEFT_TO_RIGHT	L
I18N_UCHAR_U_RIGHT_TO_LEFT	R
I18N_UCHAR_U_EUROPEAN_NUMBER	EN
I18N_UCHAR_U_EUROPEAN_NUMBER_SEPARATOR	ES
I18N_UCHAR_U_EUROPEAN_NUMBER_TERMINATOR	ET
I18N_UCHAR_U_ARABIC_NUMBER	AN
I18N_UCHAR_U_COMMON_NUMBER_SEPARATOR	CS
I18N_UCHAR_U_BLOCK_SEPARATOR	B
I18N_UCHAR_U_SEGMENT_SEPARATOR	S
I18N_UCHAR_U_WHITE_SPACE_NEUTRAL	WS
I18N_UCHAR_U_OTHER_NEUTRAL	ON
I18N_UCHAR_U_LEFT_TO_RIGHT_EMBEDDING	LRE
I18N_UCHAR_U_LEFT_TO_RIGHT_OVERRIDE	LRO
I18N_UCHAR_U_RIGHT_TO_LEFT_ARABIC	AL
I18N_UCHAR_U_RIGHT_TO_LEFT_EMBEDDING	RLE
I18N_UCHAR_U_RIGHT_TO_LEFT_OVERRIDE	RLO
I18N_UCHAR_U_POP_DIRECTIONAL_FORMAT	PDF
I18N_UCHAR_U_DIR_NON_SPACING_MARK	NSM
I18N_UCHAR_U_BOUNDARY_NEUTRAL	BN
I18N_UCHAR_U_FIRST_STRONG_ISOLATE	FSI (Since 3.0)
I18N_UCHAR_U_LEFT_TO_RIGHT_ISOLATE	FSI (Since 3.0)
I18N_UCHAR_U_RIGHT_TO_LEFT_ISOLATE	RLI (Since 3.0)
I18N_UCHAR_U_POP_DIRECTIONAL_ISOLATE	PDI (Since 3.0)
I18N_UCHAR_U_CHAR_DIRECTION_COUNT	Count

enum i18n_uchar_u_bidi_paired_bracket_type_e

Bidi Paired Bracket Type constants.

Since :: 3.0

See also:: I18N_UCHAR_BIDI_PAIRED_BRACKET_TYPE

Enumerator:

I18N_UCHAR_U_BPT_NONE	Not a paired bracket.
I18N_UCHAR_U_BPT_OPEN	Open paired bracket.
I18N_UCHAR_U_BPT_CLOSE	Close paired bracket.
I18N_UCHAR_U_BPT_COUNT	Count

enum i18n_uchar_u_char_name_choice_e

Enumeration for i18n_uchar_char_name() constants.

Since :: 4.0

Enumerator:

I18N_UCHAR_U_UNICODE_CHAR_NAME	Unicode character name (Name property).
I18N_UCHAR_U_EXTENDED_CHAR_NAME	Standard or synthetic character name.
I18N_UCHAR_U_CHAR_NAME_ALIAS	Corrected name from NameAliases.txt.

enum i18n_uchar_u_decomposition_type_e

Enumeration for Decomposition Type constants.

Since :: 2.3.1

Enumerator:

I18N_UCHAR_U_DT_NONE	[none]
I18N_UCHAR_U_DT_CANONICAL	[can]
I18N_UCHAR_U_DT_COMPAT	[com]
I18N_UCHAR_U_DT_CIRCLE	[enc]
I18N_UCHAR_U_DT_FINAL	[fin]
I18N_UCHAR_U_DT_FONT	[font]
I18N_UCHAR_U_DT_FRACTION	[fra]
I18N_UCHAR_U_DT_INITIAL	[init]
I18N_UCHAR_U_DT_ISOLATED	[iso]
I18N_UCHAR_U_DT_MEDIAL	[med]
I18N_UCHAR_U_DT_NARROW	[nar]
I18N_UCHAR_U_DT_NOBREAK	[nb]
I18N_UCHAR_U_DT_SMALL	[sml]
I18N_UCHAR_U_DT_SQUARE	[sqr]
I18N_UCHAR_U_DT_SUB	[sub]
I18N_UCHAR_U_DT_SUPER	[sup]
I18N_UCHAR_U_DT_VERTICAL	[vert]
I18N_UCHAR_U_DT_WIDE	[wide]
I18N_UCHAR_U_DT_COUNT	18

enum i18n_uchar_u_east_asian_width_e

Enumeration for East Asian Width constants.

Since :: 2.3.1

Enumerator:

I18N_UCHAR_U_EA_NEUTRAL	[N]
I18N_UCHAR_U_EA_AMBIGUOUS	[A]
I18N_UCHAR_U_EA_HALFWIDTH	[H]
I18N_UCHAR_U_EA_FULLWIDTH	[F]
I18N_UCHAR_U_EA_NARROW	[Na]
I18N_UCHAR_U_EA_WIDE	[W]

enum i18n_uchar_u_grapheme_cluster_break_e

Enumeration for Grapheme Cluster Break constants.

Since :: 2.3.1

Enumerator:

I18N_UCHAR_U_GCB_OTHER	[XX]
I18N_UCHAR_U_GCB_CONTROL	[CN]
I18N_UCHAR_U_GCB_CR	[CR]
I18N_UCHAR_U_GCB_EXTEND	[EX]
I18N_UCHAR_U_GCB_L	[L]
I18N_UCHAR_U_GCB_LF	[LF]
I18N_UCHAR_U_GCB_LV	[LV]
I18N_UCHAR_U_GCB_LVT	[LVT]
I18N_UCHAR_U_GCB_T	[T]
I18N_UCHAR_U_GCB_V	[V]
I18N_UCHAR_U_GCB_SPACING_MARK	[SM]
I18N_UCHAR_U_GCB_PREPEND	[PP]
I18N_UCHAR_U_GCB_REGIONAL_INDICATOR	[RI] (Since 4.0)
I18N_UCHAR_U_GCB_E_BASE	[EB] (Since 4.0)
I18N_UCHAR_U_GCB_E_BASE_GAZ	[EBG] (Since 4.0)
I18N_UCHAR_U_GCB_E_MODIFIER	[EM] (Since 4.0)
I18N_UCHAR_U_GCB_GLUE_AFTER_ZWJ	[GAZ] (Since 4.0)
I18N_UCHAR_U_GCB_ZWJ	[ZWJ] (Since 4.0)
I18N_UCHAR_UCHAR_U_GCB_COUNT

enum i18n_uchar_u_hangul_syllable_type_e

Enumeration for Hangul Syllable Type constants.

Since :: 2.3.1

Enumerator:

I18N_UCHAR_U_HST_NOT_APPLICABLE	[NA]
I18N_UCHAR_U_HST_LEADING_JAMO	[L]
I18N_UCHAR_U_HST_VOWEL_JAMO	[V]
I18N_UCHAR_U_HST_TRAILING_JAMO	[T]
I18N_UCHAR_U_HST_LV_SYLLABLE	[LV]
I18N_UCHAR_U_HST_LVT_SYLLABLE	[LVT]
I18N_UCHAR_U_HST_COUNT

enum i18n_uchar_u_joining_group_e

Enumeration for Joining Group constants.

Since :: 2.3.1

Enumerator:

I18N_UCHAR_U_JG_NO_JOINING_GROUP	No Joining Group
I18N_UCHAR_U_JG_AIN	Ain
I18N_UCHAR_U_JG_ALAPH	Alaph
I18N_UCHAR_U_JG_ALEF	Alef
I18N_UCHAR_U_JG_BEH	Beh
I18N_UCHAR_U_JG_BETH	Beth
I18N_UCHAR_U_JG_DAL	Dal
I18N_UCHAR_U_JG_DALATH_RISH	Dalath Rish
I18N_UCHAR_U_JG_E	E
I18N_UCHAR_U_JG_FEH	Feh
I18N_UCHAR_U_JG_FINAL_SEMKATH	Final Semkath
I18N_UCHAR_U_JG_GAF	Gaf
I18N_UCHAR_U_JG_GAMAL	Gamal
I18N_UCHAR_U_JG_HAH	Hah
I18N_UCHAR_U_JG_TEH_MARBUTA_GOAL	Teh Marbuta Goal
I18N_UCHAR_U_JG_HAMZA_ON_HEH_GOAL	Hamza On Heh Goal
I18N_UCHAR_U_JG_HE	He
I18N_UCHAR_U_JG_HEH	Heh
I18N_UCHAR_U_JG_HEH_GOAL	Heh Goal
I18N_UCHAR_U_JG_HETH	Heth
I18N_UCHAR_U_JG_KAF	Kaf
I18N_UCHAR_U_JG_KAPH	Kaph
I18N_UCHAR_U_JG_KNOTTED_HEH	Knotted Heh
I18N_UCHAR_U_JG_LAM	Lam
I18N_UCHAR_U_JG_LAMADH	Lamadh
I18N_UCHAR_U_JG_MEEM	Meem
I18N_UCHAR_U_JG_MIM	Mim
I18N_UCHAR_U_JG_NOON	Noon
I18N_UCHAR_U_JG_NUN	Nun
I18N_UCHAR_U_JG_PE	Pe
I18N_UCHAR_U_JG_QAF	Qaf
I18N_UCHAR_U_JG_QAPH	Qaph
I18N_UCHAR_U_JG_REH	Reh
I18N_UCHAR_U_JG_REVERSED_PE	Reversed Pe
I18N_UCHAR_U_JG_SAD	Sad
I18N_UCHAR_U_JG_SADHE	Sadhe
I18N_UCHAR_U_JG_SEEN	Seen
I18N_UCHAR_U_JG_SEMKATH	Semkath
I18N_UCHAR_U_JG_SHIN	Shin
I18N_UCHAR_U_JG_SWASH_KAF	Swash Kaf
I18N_UCHAR_U_JG_SYRIAC_WAW	Syriac Waw
I18N_UCHAR_U_JG_TAH	Tah
I18N_UCHAR_U_JG_TAW	Taw
I18N_UCHAR_U_JG_TEH_MARBUTA	Teh Marbuta
I18N_UCHAR_U_JG_TETH	Teth
I18N_UCHAR_U_JG_WAW	Waw
I18N_UCHAR_U_JG_YEH	Yeh
I18N_UCHAR_U_JG_YEH_BARREE	Yeh Barree
I18N_UCHAR_U_JG_YEH_WITH_TAIL	Yeh With Tail
I18N_UCHAR_U_JG_YUDH	Yudh
I18N_UCHAR_U_JG_YUDH_HE	Yudh He
I18N_UCHAR_U_JG_ZAIN	Zain
I18N_UCHAR_U_JG_FE	Fe
I18N_UCHAR_U_JG_KHAPH	Khaph
I18N_UCHAR_U_JG_ZHAIN	Zhain
I18N_UCHAR_U_JG_BURUSHASKI_YEH_BARREE	Burushaski Yeh Barree
I18N_UCHAR_U_JG_FARSI_YEH	Farsi Yeh
I18N_UCHAR_U_JG_NYA	Nya
I18N_UCHAR_U_JG_ROHINGYA_YEH	Rohingya Yeh
I18N_UCHAR_U_JG_MANICHAEAN_ALEPH	Manichaean Aleph (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_AYIN	Manichaean Ayin (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_BETH	Manichaean Beth (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_DALETH	Manichaean Daleth (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_DHAMEDH	Manichaean Dhamedh (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_FIVE	Manichaean Five (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_GIMEL	Manichaean Gimel (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_HETH	Manichaean Heth (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_HUNDRED	Manichaean Hundred (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_KAPH	Manichaean Kaph (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_LAMEDH	Manichaean Lamedh (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_MEM	Manichaean Mem (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_NUN	Manichaean Nun (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_ONE	Manichaean One (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_PE	Manichaean Pe (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_QOPH	Manichaean Qoph (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_RESH	Manichaean Resh (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_SADHE	Manichaean Sadhe (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_SAMEKH	Manichaean Samekh (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_TAW	Manichaean Taw (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_TEN	Manichaean Ten (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_TETH	Manichaean Teth (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_THAMEDH	Manichaean Thamedh (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_TWENTY	Manichaean Twenty (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_WAW	Manichaean Waw (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_YODH	Manichaean Yodh (Since 3.0)
I18N_UCHAR_U_JG_MANICHAEAN_ZAYIN	Manichaean Zayin (Since 3.0)
I18N_UCHAR_U_JG_STRAIGHT_WAW	Manichaean Waw (Since 3.0)
I18N_UCHAR_U_JG_AFRICAN_FEH	African Feh (Since 4.0)
I18N_UCHAR_U_JG_AFRICAN_NOON	African Noon (Since 4.0)
I18N_UCHAR_U_JG_AFRICAN_QAF	African Qaf (Since 4.0)
I18N_UCHAR_U_JG_COUNT	Count

enum i18n_uchar_u_joining_type_e

Enumeration for Joining Type constants.

Since :: 2.3.1

Enumerator:

I18N_UCHAR_U_JT_NON_JOINING	[U]
I18N_UCHAR_U_JT_JOIN_CAUSING	[C]
I18N_UCHAR_U_JT_DUAL_JOINING	[D]
I18N_UCHAR_U_JT_LEFT_JOINING	[L]
I18N_UCHAR_U_JT_RIGHT_JOINING	[R]
I18N_UCHAR_U_JT_TRANSPARENT	[T]
I18N_UCHAR_U_JT_COUNT	6

enum i18n_uchar_u_line_break_e

Enumeration for Line Break constants.

Since :: 2.3.1

Enumerator:

I18N_UCHAR_U_LB_UNKNOWN	[XX]
I18N_UCHAR_U_LB_AMBIGUOUS	[AI]
I18N_UCHAR_U_LB_ALPHABETIC	[AL]
I18N_UCHAR_U_LB_BREAK_BOTH	[B2]
I18N_UCHAR_U_LB_BREAK_AFTER	[BA]
I18N_UCHAR_U_LB_BREAK_BEFORE	[BB]
I18N_UCHAR_U_LB_MANDATORY_BREAK	[BK]
I18N_UCHAR_U_LB_CONTINGENT_BREAK	[CB]
I18N_UCHAR_U_LB_CLOSE_PUNCTUATION	[CL]
I18N_UCHAR_U_LB_COMBINING_MARK	[CM]
I18N_UCHAR_U_LB_CARRIAGE_RETURN	[CR]
I18N_UCHAR_U_LB_EXCLAMATION	[EX]
I18N_UCHAR_U_LB_GLUE	[GL]
I18N_UCHAR_U_LB_HYPHEN	[HY]
I18N_UCHAR_U_LB_IDEOGRAPHIC	[ID]
I18N_UCHAR_U_LB_INSEPARABLE	[IN]
I18N_UCHAR_U_LB_INFIX_NUMERIC	[IS]
I18N_UCHAR_U_LB_LINE_FEED	[LF]
I18N_UCHAR_U_LB_NONSTARTER	[NS]
I18N_UCHAR_U_LB_NUMERIC	[NU]
I18N_UCHAR_U_LB_OPEN_PUNCTUATION	[OP]
I18N_UCHAR_U_LB_POSTFIX_NUMERIC	[PO]
I18N_UCHAR_U_LB_PREFIX_NUMERIC	[PR]
I18N_UCHAR_U_LB_QUOTATION	[QU]
I18N_UCHAR_U_LB_COMPLEX_CONTEXT	[SA]
I18N_UCHAR_U_LB_SURROGATE	[SG]
I18N_UCHAR_U_LB_SPACE	[SP]
I18N_UCHAR_U_LB_BREAK_SYMBOLS	[SY]
I18N_UCHAR_U_LB_ZWSPACE	[ZW]
I18N_UCHAR_U_LB_NEXT_LINE	[NL]
I18N_UCHAR_U_LB_WORD_JOINER	[WJ]
I18N_UCHAR_U_LB_H2	[H2]
I18N_UCHAR_U_LB_H3	[H3]
I18N_UCHAR_U_LB_JL	[JL]
I18N_UCHAR_U_LB_JT	[JT]
I18N_UCHAR_U_LB_JV	[JV]
I18N_UCHAR_U_LB_CLOSE_PARENTHESIS	[CP]
I18N_UCHAR_U_LB_CONDITIONAL_JAPANESE_STARTER	[CJ] (Since 4.0)
I18N_UCHAR_U_LB_HEBREW_LETTER	[HL] (Since 4.0)
I18N_UCHAR_U_LB_REGIONAL_INDICATOR	[RI] (Since 4.0)
I18N_UCHAR_U_LB_E_BASE	[EB] (Since 4.0)
I18N_UCHAR_U_LB_E_MODIFIER	[EM] (Since 4.0)
I18N_UCHAR_U_LB_ZWJ	[ZWJ] (Since 4.0)

enum i18n_uchar_u_numeric_type_e

Enumeration for Numeric Type constants.

Since :: 2.3.1

Enumerator:

I18N_UCHAR_U_NT_NONE	[None]
I18N_UCHAR_U_NT_DECIMAL	[de]
I18N_UCHAR_U_NT_DIGIT	[di]
I18N_UCHAR_U_NT_NUMERIC	[nu]
I18N_UCHAR_U_NT_COUNT

enum i18n_uchar_u_property_name_choice_e

Enumeration for i18n_uchar_get_property_name() and i18n_uchar_get_property_value_name() constants.

Since :: 4.0

Enumerator:

I18N_UCHAR_U_SHORT_PROPERTY_NAME	This selector is used to return short name for a given property or value.
I18N_UCHAR_U_LONG_PROPERTY_NAME	This selector is used to return long name for a given property or value.

enum i18n_uchar_u_sentence_break_e

Enumeration for Sentence Break constants.

Since :: 2.3.1

Enumerator:

I18N_UCHAR_U_SB_OTHER	[XX]
I18N_UCHAR_U_SB_ATERM	[AT]
I18N_UCHAR_U_SB_CLOSE	[CL]
I18N_UCHAR_U_SB_FORMAT	[FO]
I18N_UCHAR_U_SB_LOWER	[LO]
I18N_UCHAR_U_SB_NUMERIC	[NU]
I18N_UCHAR_U_SB_OLETTER	[LE]
I18N_UCHAR_U_SB_SEP	[SE]
I18N_UCHAR_U_SB_SP	[SP]
I18N_UCHAR_U_SB_STERM	[ST]
I18N_UCHAR_U_SB_UPPER	[UP]
I18N_UCHAR_U_SB_CR	[CR]
I18N_UCHAR_U_SB_EXTEND	[EX]
I18N_UCHAR_U_SB_LF	[LF]
I18N_UCHAR_U_SB_SCONTINUE	[SC]
I18N_UCHAR_U_SB_COUNT

enum i18n_uchar_u_word_break_values_e

Enumeration for Word Break constants.

Since :: 2.3.1

Enumerator:

I18N_UCHAR_U_WB_OTHER	[XX]
I18N_UCHAR_U_WB_ALETTER	[LE]
I18N_UCHAR_U_WB_FORMAT	[FO]
I18N_UCHAR_U_WB_KATAKANA	[KA]
I18N_UCHAR_U_WB_MIDLETTER	[ML]
I18N_UCHAR_U_WB_MIDNUM	[MN]
I18N_UCHAR_U_WB_NUMERIC	[NU]
I18N_UCHAR_U_WB_EXTENDNUMLET	[EX]
I18N_UCHAR_U_WB_CR	[CR]
I18N_UCHAR_U_WB_EXTEND	[Extend]
I18N_UCHAR_U_WB_LF	[LF]
I18N_UCHAR_U_WB_MIDNUMLET	[MB]
I18N_UCHAR_U_WB_NEWLINE	[NL]
I18N_UCHAR_U_WB_REGIONAL_INDICATOR	[RI] (Since 3.0)
I18N_UCHAR_U_WB_HEBREW_LETTER	[HL] (Since 3.0)
I18N_UCHAR_U_WB_SINGLE_QUOTE	[SQ] (Since 3.0)
I18N_UCHAR_U_WB_DOUBLE_QUOTE	[DQ] (Since 3.0)
I18N_UCHAR_U_WB_E_BASE	[EB] (Since 4.0)
I18N_UCHAR_U_WB_E_BASE_GAZ	[EBG] Since (4.0)
I18N_UCHAR_U_WB_E_MODIFIER	[EM] (Since 4.0)
I18N_UCHAR_U_WB_GLUE_AFTER_ZWJ	[GAZ] (Since 4.0)
I18N_UCHAR_U_WB_ZWJ	[ZWJ] (Since 4.0)
I18N_UCHAR_U_WB_COUNT

enum i18n_uchar_ublock_code_e

Constants for Unicode blocks, see the Unicode Data file Blocks.txt.

Since :: 2.3.1

Enumerator:

I18N_UCHAR_UBLOCK_NO_BLOCK	No Block
I18N_UCHAR_UBLOCK_BASIC_LATIN	Basic Latin
I18N_UCHAR_UBLOCK_LATIN_1_SUPPLEMENT	Latin_1 Supplement
I18N_UCHAR_UBLOCK_LATIN_EXTENDED_A	Latin Extended A
I18N_UCHAR_UBLOCK_LATIN_EXTENDED_B	Latin Extended B
I18N_UCHAR_UBLOCK_IPA_EXTENSIONS	IPA Extensions
I18N_UCHAR_UBLOCK_SPACING_MODIFIER_LETTERS	Spacing Modifier Letters
I18N_UCHAR_UBLOCK_COMBINING_DIACRITICAL_MARKS	Combining Diacritical Marks
I18N_UCHAR_UBLOCK_GREEK	Greek
I18N_UCHAR_UBLOCK_CYRILLIC	Cyrillic
I18N_UCHAR_UBLOCK_ARMENIAN	Armenian
I18N_UCHAR_UBLOCK_HEBREW	Hebrew
I18N_UCHAR_UBLOCK_ARABIC	Arabic
I18N_UCHAR_UBLOCK_SYRIAC	Syriac
I18N_UCHAR_UBLOCK_THAANA	Thaana
I18N_UCHAR_UBLOCK_DEVANAGARI	Devanagari
I18N_UCHAR_UBLOCK_BENGALI	Bengali
I18N_UCHAR_UBLOCK_GURMUKHI	Gurmukhi
I18N_UCHAR_UBLOCK_GUJARATI	Gujarati
I18N_UCHAR_UBLOCK_ORIYA	Oriya
I18N_UCHAR_UBLOCK_TAMIL	Tamil
I18N_UCHAR_UBLOCK_TELUGU	Telugu
I18N_UCHAR_UBLOCK_KANNADA	Kannada
I18N_UCHAR_UBLOCK_MALAYALAM	Malayalam
I18N_UCHAR_UBLOCK_SINHALA	Sinhala
I18N_UCHAR_UBLOCK_THAI	Thai
I18N_UCHAR_UBLOCK_LAO	Lao
I18N_UCHAR_UBLOCK_TIBETAN	Tibetan
I18N_UCHAR_UBLOCK_MYANMAR	Myanmar
I18N_UCHAR_UBLOCK_GEORGIAN	Georgian
I18N_UCHAR_UBLOCK_HANGUL_JAMO	Hangul Jamo
I18N_UCHAR_UBLOCK_ETHIOPIC	Ethiopic
I18N_UCHAR_UBLOCK_CHEROKEE	Cherokee
I18N_UCHAR_UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS	Unified Canadian Aboriginal Syllabics
I18N_UCHAR_UBLOCK_OGHAM	Ogham
I18N_UCHAR_UBLOCK_RUNIC	Runic
I18N_UCHAR_UBLOCK_KHMER	Khmer
I18N_UCHAR_UBLOCK_MONGOLIAN	Mongolian
I18N_UCHAR_UBLOCK_LATIN_EXTENDED_ADDITIONAL	Latin Extended Additional
I18N_UCHAR_UBLOCK_GREEK_EXTENDED	Greek Extended
I18N_UCHAR_UBLOCK_GENERAL_PUNCTUATION	General Punctuation
I18N_UCHAR_UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS	Superscripts And Subscripts
I18N_UCHAR_UBLOCK_CURRENCY_SYMBOLS	Currency Symbols
I18N_UCHAR_UBLOCK_COMBINING_MARKS_FOR_SYMBOLS	Combining Marks For Symbols
I18N_UCHAR_UBLOCK_LETTERLIKE_SYMBOLS	Letterlike Symbols
I18N_UCHAR_UBLOCK_NUMBER_FORMS	Number Forms
I18N_UCHAR_UBLOCK_ARROWS	Arrows
I18N_UCHAR_UBLOCK_MATHEMATICAL_OPERATORS	Mathematical Operators
I18N_UCHAR_UBLOCK_MISCELLANEOUS_TECHNICAL	Miscellaneous Technical
I18N_UCHAR_UBLOCK_CONTROL_PICTURES	Control Pictures
I18N_UCHAR_UBLOCK_OPTICAL_CHARACTER_RECOGNITION	Optical Character Recognition
I18N_UCHAR_UBLOCK_ENCLOSED_ALPHANUMERICS	Enclosed Alphanumerics
I18N_UCHAR_UBLOCK_BOX_DRAWING	Box Drawing
I18N_UCHAR_UBLOCK_BLOCK_ELEMENTS	Block Elements
I18N_UCHAR_UBLOCK_GEOMETRIC_SHAPES	Geometric Shapes
I18N_UCHAR_UBLOCK_MISCELLANEOUS_SYMBOLS	Miscellaneous Symbols
I18N_UCHAR_UBLOCK_DINGBATS	Dingbats
I18N_UCHAR_UBLOCK_BRAILLE_PATTERNS	Braille Patterns
I18N_UCHAR_UBLOCK_CJK_RADICALS_SUPPLEMENT	CJK Radicals Supplement
I18N_UCHAR_UBLOCK_KANGXI_RADICALS	Kangxi Radicals
I18N_UCHAR_UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS	Ideographic Description Characters
I18N_UCHAR_UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION	CJK Symbols And Punctuation
I18N_UCHAR_UBLOCK_HIRAGANA	Hiragana
I18N_UCHAR_UBLOCK_KATAKANA	Katakana
I18N_UCHAR_UBLOCK_BOPOMOFO	Bopomofo
I18N_UCHAR_UBLOCK_HANGUL_COMPATIBILITY_JAMO	Hangul Compatibility Jamo
I18N_UCHAR_UBLOCK_KANBUN	Kanbun
I18N_UCHAR_UBLOCK_BOPOMOFO_EXTENDED	Bopomofo Extended
I18N_UCHAR_UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS	Enclosed CJK Letters And Months
I18N_UCHAR_UBLOCK_CJK_COMPATIBILITY	CJK Compatibility
I18N_UCHAR_UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A	CJK Unified Ideographs Extension A
I18N_UCHAR_UBLOCK_CJK_UNIFIED_IDEOGRAPHS	CJK Unified Ideographs
I18N_UCHAR_UBLOCK_YI_SYLLABLES	Yi Syllables
I18N_UCHAR_UBLOCK_YI_RADICALS	Yi Radicals
I18N_UCHAR_UBLOCK_HANGUL_SYLLABLES	Hangul Syllables
I18N_UCHAR_UBLOCK_HIGH_SURROGATES	High Surrogates
I18N_UCHAR_UBLOCK_HIGH_PRIVATE_USE_SURROGATES	High Private Use Surrogates
I18N_UCHAR_UBLOCK_LOW_SURROGATES	Low Surrogates
I18N_UCHAR_UBLOCK_PRIVATE_USE_AREA	Private Use Area
I18N_UCHAR_UBLOCK_PRIVATE_USE	Private Use
I18N_UCHAR_UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS	CJK Compatibility Ideographs
I18N_UCHAR_UBLOCK_ALPHABETIC_PRESENTATION_FORMS	Alphabetic Presentation Forms
I18N_UCHAR_UBLOCK_ARABIC_PRESENTATION_FORMS_A	Arabic Presentation Forms A
I18N_UCHAR_UBLOCK_COMBINING_HALF_MARKS	Combining Half Marks
I18N_UCHAR_UBLOCK_CJK_COMPATIBILITY_FORMS	CJK Compatibility Forms
I18N_UCHAR_UBLOCK_SMALL_FORM_VARIANTS	Small Form Variants
I18N_UCHAR_UBLOCK_ARABIC_PRESENTATION_FORMS_B	Arabic Presentation Forms B
I18N_UCHAR_UBLOCK_SPECIALS	Specials
I18N_UCHAR_UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS	Halfwidth And Fullwidth Forms
I18N_UCHAR_UBLOCK_OLD_ITALIC	Old Italic
I18N_UCHAR_UBLOCK_GOTHIC	Gothic
I18N_UCHAR_UBLOCK_DESERET	Deseret
I18N_UCHAR_UBLOCK_BYZANTINE_MUSICAL_SYMBOLS	Byzantine Musical Symbols
I18N_UCHAR_UBLOCK_MUSICAL_SYMBOLS	Musical Symbols
I18N_UCHAR_UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS	Mathematical Alphanumeric Symbols
I18N_UCHAR_UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B	CJK Unified Ideographs Extension B
I18N_UCHAR_UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT	CJK Compatibility Ideographs Supplement
I18N_UCHAR_UBLOCK_TAGS	Tags
I18N_UCHAR_UBLOCK_CYRILLIC_SUPPLEMENT	Cyrillic Supplement
I18N_UCHAR_UBLOCK_CYRILLIC_SUPPLEMENTARY	Cyrillic Supplementary
I18N_UCHAR_UBLOCK_TAGALOG	Tagalog
I18N_UCHAR_UBLOCK_HANUNOO	Hanunoo
I18N_UCHAR_UBLOCK_BUHID	Buhid
I18N_UCHAR_UBLOCK_TAGBANWA	Tagbanwa
I18N_UCHAR_UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A	Miscellaneous Mathematical Symbols A
I18N_UCHAR_UBLOCK_SUPPLEMENTAL_ARROWS_A	Supplemental Arrows A
I18N_UCHAR_UBLOCK_SUPPLEMENTAL_ARROWS_B	Supplemental Arrows B
I18N_UCHAR_UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B	Miscellaneous Mathematical Symbols B
I18N_UCHAR_UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS	Supplemental Mathematical Operators
I18N_UCHAR_UBLOCK_KATAKANA_PHONETIC_EXTENSIONS	Katakana Phonetic Extensions
I18N_UCHAR_UBLOCK_VARIATION_SELECTORS	Variation Selectors
I18N_UCHAR_UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A	Supplementary Private Use Area A
I18N_UCHAR_UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B	Supplementary Private Use Area B
I18N_UCHAR_UBLOCK_LIMBU	Limbu
I18N_UCHAR_UBLOCK_TAI_LE	Tai Le
I18N_UCHAR_UBLOCK_KHMER_SYMBOLS	Khmer Symbols
I18N_UCHAR_UBLOCK_PHONETIC_EXTENSIONS	Phonetic Extensions
I18N_UCHAR_UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS	Miscellaneous Symbols And Arrows
I18N_UCHAR_UBLOCK_YIJING_HEXAGRAM_SYMBOLS	Yijing Hexagram Symbols
I18N_UCHAR_UBLOCK_LINEAR_B_SYLLABARY	Linear B Syllabary
I18N_UCHAR_UBLOCK_LINEAR_B_IDEOGRAMS	Linear B Ideograms
I18N_UCHAR_UBLOCK_AEGEAN_NUMBERS	Aegean Numbers
I18N_UCHAR_UBLOCK_UGARITIC	Ugaritic
I18N_UCHAR_UBLOCK_SHAVIAN	Shavian
I18N_UCHAR_UBLOCK_OSMANYA	Osmanya
I18N_UCHAR_UBLOCK_CYPRIOT_SYLLABARY	Cypriot Syllabary
I18N_UCHAR_UBLOCK_TAI_XUAN_JING_SYMBOLS	Tai Xuan Jing Symbols
I18N_UCHAR_UBLOCK_VARIATION_SELECTORS_SUPPLEMENT	Variation Selectors Supplement
I18N_UCHAR_UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION	Ancient Greek Musical Notation
I18N_UCHAR_UBLOCK_ANCIENT_GREEK_NUMBERS	Ancient Greek Numbers
I18N_UCHAR_UBLOCK_ARABIC_SUPPLEMENT	Arabic Supplement
I18N_UCHAR_UBLOCK_BUGINESE	Buginese
I18N_UCHAR_UBLOCK_CJK_STROKES	CJK Strokes
I18N_UCHAR_UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT	Combining Diacritical Marks Supplement
I18N_UCHAR_UBLOCK_COPTIC	Coptic
I18N_UCHAR_UBLOCK_ETHIOPIC_EXTENDED	Ethiopic Extended
I18N_UCHAR_UBLOCK_ETHIOPIC_SUPPLEMENT	Ethiopic Supplement
I18N_UCHAR_UBLOCK_GEORGIAN_SUPPLEMENT	Georgian Supplement
I18N_UCHAR_UBLOCK_GLAGOLITIC	Glagolitic
I18N_UCHAR_UBLOCK_KHAROSHTHI	Kharoshthi
I18N_UCHAR_UBLOCK_MODIFIER_TONE_LETTERS	Modifier Tone Letters
I18N_UCHAR_UBLOCK_NEW_TAI_LUE	New Tai Lue
I18N_UCHAR_UBLOCK_OLD_PERSIAN	Old Persian
I18N_UCHAR_UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT	Phonetic Extensions Supplement
I18N_UCHAR_UBLOCK_SUPPLEMENTAL_PUNCTUATION	Supplemental Punctuation
I18N_UCHAR_UBLOCK_SYLOTI_NAGRI	Syloti Nagri
I18N_UCHAR_UBLOCK_TIFINAGH	Tifinagh
I18N_UCHAR_UBLOCK_VERTICAL_FORMS	Vertical Forms
I18N_UCHAR_UBLOCK_NKO	Nko
I18N_UCHAR_UBLOCK_BALINESE	Balinese
I18N_UCHAR_UBLOCK_LATIN_EXTENDED_C	Latin Extended C
I18N_UCHAR_UBLOCK_LATIN_EXTENDED_D	Latin Extended D
I18N_UCHAR_UBLOCK_PHAGS_PA	Phags Pa
I18N_UCHAR_UBLOCK_PHOENICIAN	Phoenician
I18N_UCHAR_UBLOCK_CUNEIFORM	Cuneiform
I18N_UCHAR_UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION	Cuneiform Numbers And Punctuation
I18N_UCHAR_UBLOCK_COUNTING_ROD_NUMERALS	Counting Rod Numerals
I18N_UCHAR_UBLOCK_SUNDANESE	Sundanese
I18N_UCHAR_UBLOCK_LEPCHA	Lepcha
I18N_UCHAR_UBLOCK_OL_CHIKI	Ol Chiki
I18N_UCHAR_UBLOCK_CYRILLIC_EXTENDED_A	Cyrillic Extended A
I18N_UCHAR_UBLOCK_VAI	Vai
I18N_UCHAR_UBLOCK_CYRILLIC_EXTENDED_B	Cyrillic Extended B
I18N_UCHAR_UBLOCK_SAURASHTRA	Saurashtra
I18N_UCHAR_UBLOCK_KAYAH_LI	Kayah Li
I18N_UCHAR_UBLOCK_REJANG	Rejang
I18N_UCHAR_UBLOCK_CHAM	Cham
I18N_UCHAR_UBLOCK_ANCIENT_SYMBOLS	Ancient Symbols
I18N_UCHAR_UBLOCK_PHAISTOS_DISC	Phaistos Disc
I18N_UCHAR_UBLOCK_LYCIAN	Lycian
I18N_UCHAR_UBLOCK_CARIAN	Carian
I18N_UCHAR_UBLOCK_LYDIAN	Lydian
I18N_UCHAR_UBLOCK_MAHJONG_TILES	Mahjong Tiles
I18N_UCHAR_UBLOCK_DOMINO_TILES	Domino Tiles
I18N_UCHAR_UBLOCK_SAMARITAN	Samaritan
I18N_UCHAR_UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED	Unified Canadian Aboriginal Syllabics Extended
I18N_UCHAR_UBLOCK_TAI_THAM	Tai Tham
I18N_UCHAR_UBLOCK_VEDIC_EXTENSIONS	Vedic Extensions
I18N_UCHAR_UBLOCK_LISU	Lisu
I18N_UCHAR_UBLOCK_BAMUM	Bamum
I18N_UCHAR_UBLOCK_COMMON_INDIC_NUMBER_FORMS	Common Indic Number Forms
I18N_UCHAR_UBLOCK_DEVANAGARI_EXTENDED	Devanagari Extended
I18N_UCHAR_UBLOCK_HANGUL_JAMO_EXTENDED_A	Hangul Jamo Extended A
I18N_UCHAR_UBLOCK_JAVANESE	Javanese
I18N_UCHAR_UBLOCK_MYANMAR_EXTENDED_A	Myanmar Extended A
I18N_UCHAR_UBLOCK_TAI_VIET	Tai Viet
I18N_UCHAR_UBLOCK_MEETEI_MAYEK	Meetei Mayek
I18N_UCHAR_UBLOCK_HANGUL_JAMO_EXTENDED_B	Hangul Jamo Extended B
I18N_UCHAR_UBLOCK_IMPERIAL_ARAMAIC	Imperial Aramaic
I18N_UCHAR_UBLOCK_OLD_SOUTH_ARABIAN	Old South Arabian
I18N_UCHAR_UBLOCK_AVESTAN	Avestan
I18N_UCHAR_UBLOCK_INSCRIPTIONAL_PARTHIAN	Inscriptional Parthian
I18N_UCHAR_UBLOCK_INSCRIPTIONAL_PAHLAVI	Inscriptional Pahlavi
I18N_UCHAR_UBLOCK_OLD_TURKIC	Old Turkic
I18N_UCHAR_UBLOCK_RUMI_NUMERAL_SYMBOLS	Rumi Numeral Symbols
I18N_UCHAR_UBLOCK_KAITHI	Kaithi
I18N_UCHAR_UBLOCK_EGYPTIAN_HIEROGLYPHS	Egyptian Hieroglyphs
I18N_UCHAR_UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT	Enclosed Alphanumeric Supplement
I18N_UCHAR_UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT	Enclosed Ideographic Supplement
I18N_UCHAR_UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C	CJK Unified Ideographs Extension C
I18N_UCHAR_UBLOCK_MANDAIC	Mandaic
I18N_UCHAR_UBLOCK_BATAK	Batak
I18N_UCHAR_UBLOCK_ETHIOPIC_EXTENDED_A	Ethiopic Extended A
I18N_UCHAR_UBLOCK_BRAHMI	Brahmi
I18N_UCHAR_UBLOCK_BAMUM_SUPPLEMENT	Bamum Supplement
I18N_UCHAR_UBLOCK_KANA_SUPPLEMENT	Kana Supplement
I18N_UCHAR_UBLOCK_PLAYING_CARDS	Playing Cards
I18N_UCHAR_UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS	Miscellaneous Symbols And Pictographs
I18N_UCHAR_UBLOCK_EMOTICONS	Emoticons
I18N_UCHAR_UBLOCK_TRANSPORT_AND_MAP_SYMBOLS	Transport And Map Symbols
I18N_UCHAR_UBLOCK_ALCHEMICAL_SYMBOLS	Alchemical Symbols
I18N_UCHAR_UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D	CJK Unified Ideographs Extension D
I18N_UCHAR_UBLOCK_ARABIC_EXTENDED_A	Arabic Extended A (Since 4.0)
I18N_UCHAR_UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS	Arabic Mathematical Alphabetic Symbols (Since 4.0)
I18N_UCHAR_UBLOCK_CHAKMA	Chakma (Since 4.0)
I18N_UCHAR_UBLOCK_MEETEI_MAYEK_EXTENSIONS	Meetei Mayek Extensions (Since 4.0)
I18N_UCHAR_UBLOCK_MEROITIC_CURSIVE	Meroitic Cursive (Since 4.0)
I18N_UCHAR_UBLOCK_MEROITIC_HIEROGLYPHS	Meroitic Hieroglyphs (Since 4.0)
I18N_UCHAR_UBLOCK_MIAO	Miao (Since 4.0)
I18N_UCHAR_UBLOCK_SHARADA	Sharada (Since 4.0)
I18N_UCHAR_UBLOCK_SORA_SOMPENG	Sora Sompeng (Since 4.0)
I18N_UCHAR_UBLOCK_SUNDANESE_SUPPLEMENT	Sundanese Supplement (Since 4.0)
I18N_UCHAR_UBLOCK_TAKRI	Takri (Since 4.0)
I18N_UCHAR_UBLOCK_BASSA_VAH	Bassa Vah (Since 3.0)
I18N_UCHAR_UBLOCK_CAUCASIAN_ALBANIAN	Caucasian Albanian (Since 3.0)
I18N_UCHAR_UBLOCK_COPTIC_EPACT_NUMBERS	Coptic Epact Numbers (Since 3.0)
I18N_UCHAR_UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED	Combining Diacritical Marks Extended (Since 3.0)
I18N_UCHAR_UBLOCK_DUPLOYAN	Duployan (Since 3.0)
I18N_UCHAR_UBLOCK_ELBASAN	Elbasan (Since 3.0)
I18N_UCHAR_UBLOCK_GEOMETRIC_SHAPES_EXTENDED	Geometric Shapes Extended (Since 3.0)
I18N_UCHAR_UBLOCK_GRANTHA	Grantha (Since 3.0)
I18N_UCHAR_UBLOCK_KHOJKI	Khojki (Since 3.0)
I18N_UCHAR_UBLOCK_KHUDAWADI	Khudawadi (Since 3.0)
I18N_UCHAR_UBLOCK_LATIN_EXTENDED_E	Latin Extended E (Since 3.0)
I18N_UCHAR_UBLOCK_LINEAR_A	Linear A (Since 3.0)
I18N_UCHAR_UBLOCK_MAHAJANI	Mahajani (Since 3.0)
I18N_UCHAR_UBLOCK_MANICHAEAN	Manichaean (Since 3.0)
I18N_UCHAR_UBLOCK_MENDE_KIKAKUI	Mende Kikakui (Since 3.0)
I18N_UCHAR_UBLOCK_MODI	Modi (Since 3.0)
I18N_UCHAR_UBLOCK_MRO	Mro (Since 3.0)
I18N_UCHAR_UBLOCK_MYANMAR_EXTENDED_B	Myanmar Extended B (Since 3.0)
I18N_UCHAR_UBLOCK_NABATAEAN	Nabataean (Since 3.0)
I18N_UCHAR_UBLOCK_OLD_NORTH_ARABIAN	Old North Arabian (Since 3.0)
I18N_UCHAR_UBLOCK_OLD_PERMIC	Old Permic (Since 3.0)
I18N_UCHAR_UBLOCK_ORNAMENTAL_DINGBATS	Ornamental Dingbats (Since 3.0)
I18N_UCHAR_UBLOCK_PAHAWH_HMONG	Pahawh Hmong (Since 3.0)
I18N_UCHAR_UBLOCK_PALMYRENE	Palmyrene (Since 3.0)
I18N_UCHAR_UBLOCK_PAU_CIN_HAU	Pau Cin Hau (Since 3.0)
I18N_UCHAR_UBLOCK_PSALTER_PAHLAVI	Psalter Pahlavi (Since 3.0)
I18N_UCHAR_UBLOCK_SHORTHAND_FORMAT_CONTROLS	Shorthand Format Controls (Since 3.0)
I18N_UCHAR_UBLOCK_SIDDHAM	Siddham (Since 3.0)
I18N_UCHAR_UBLOCK_SINHALA_ARCHAIC_NUMBERS	Sinhala Archaic Numbers (Since 3.0)
I18N_UCHAR_UBLOCK_SUPPLEMENTAL_ARROWS_C	Supplemental Arrows C (Since 3.0)
I18N_UCHAR_UBLOCK_TIRHUTA	Tirhuta (Since 3.0)
I18N_UCHAR_UBLOCK_WARANG_CITI	Warang Citi (Since 3.0)
I18N_UCHAR_UBLOCK_AHOM	Ahom (Since 4.0)
I18N_UCHAR_ANATOLIAN_HIEROGLYPHS	Hieroglyphs (Since 4.0)
I18N_UCHAR_UBLOCK_CHEROKEE_SUPPLEMENT	Cherokee Supplement (Since 4.0)
I18N_UCHAR_UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E	CJK Unified Ideographs Extension E (Since 4.0)
I18N_UCHAR_UBLOCK_EARLY_DYNASTIC_CUNEIFORM	Early Dynastic Cuneiform (Since 4.0)
I18N_UCHAR_UBLOCK_HATRAN	Hatran (Since 4.0)
I18N_UCHAR_UBLOCK_MULTANI	Multani (Since 4.0)
I18N_UCHAR_UBLOCK_OLD_HUNGARIAN	Old Hungarian (Since 4.0)
I18N_UCHAR_UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS	Supplemental Symbols And Pictographs (Since 4.0)
I18N_UCHAR_UBLOCK_SUTTON_SIGNWRITING	Sutton Signwritting (Since 4.0)
I18N_UCHAR_UBLOCK_ADLAM	Adlam (Since 4.0)
I18N_UCHAR_UBLOCK_BHAIKSUKI	Bhaiksuki (Since 4.0)
I18N_UCHAR_UBLOCK_CYRILLIC_EXTENDED_C	Cyrillic Extended C (Since 4.0)
I18N_UCHAR_UBLOCK_GLAGOLITIC_SUPPLEMENT	Glagolitic Supplement (Since 4.0)
I18N_UCHAR_UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION	Ideographic Symbols And Punctuation (Since 4.0)
I18N_UCHAR_UBLOCK_MARCHEN	Marchen (Since 4.0)
I18N_UCHAR_UBLOCK_MONGOLIAN_SUPPLEMENT	Mongolian Supplement (Since 4.0)
I18N_UCHAR_UBLOCK_NEWA	Newa (Since 4.0)
I18N_UCHAR_UBLOCK_OSAGE	Osage (Since 4.0)
I18N_UCHAR_UBLOCK_TANGUT	Tangut (Since 4.0)
I18N_UCHAR_UBLOCK_TANGUT_COMPONENTS	Tangut Components (Since 4.0)
I18N_UCHAR_UBLOCK_COUNT	Count
I18N_UCHAR_UBLOCK_INVALID_CODE	Invalid Code

enum i18n_uchar_uproperty_e

Enumeration of constants for Unicode properties. The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). For details about the properties see http://www.unicode.org/ucd/ . For names of Unicode properties see the UCD file PropertyAliases.txt.

Since :: 2.3.1

Enumerator:

I18N_UCHAR_ALPHABETIC	Binary property Alphabetic. Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic
I18N_UCHAR_BINARY_START	First constant for binary Unicode properties.
I18N_UCHAR_ASCII_HEX_DIGIT	Binary property ASCII_Hex_Digit. 0-9 A-F a-f
I18N_UCHAR_BIDI_CONTROL	Binary property Bidi_Control. Format controls which have specific functions in the Bidi Algorithm.
I18N_UCHAR_BIDI_MIRRORED	Binary property Bidi_Mirrored. Characters that may change display in RTL text. See Bidi Algorithm, UTR 9.
I18N_UCHAR_DASH	Binary property Dash. Variations of dashes.
I18N_UCHAR_DEFAULT_IGNORABLE_CODE_POINT	Binary property Default_Ignorable_Code_Point (new in Unicode 3.2). Ignorable in most processing. <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space)
I18N_UCHAR_DEPRECATED	Binary property Deprecated (new in Unicode 3.2). The usage of deprecated characters is strongly discouraged.
I18N_UCHAR_DIACRITIC	Binary property Diacritic. Characters that linguistically modify the meaning of another character to which they apply.
I18N_UCHAR_EXTENDER	Binary property Extender. Extend the value or shape of a preceding alphabetic character, e.g. length and iteration marks.
I18N_UCHAR_FULL_COMPOSITION_EXCLUSION	Binary property Full_Composition_Exclusion. CompositionExclusions.txt+Singleton Decompositions+ Non-Starter Decompositions.
I18N_UCHAR_GRAPHEME_BASE	Binary property Grapheme_Base (new in Unicode 3.2). For programmatic determination of grapheme cluster boundaries. [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ
I18N_UCHAR_GRAPHEME_EXTEND	Binary property Grapheme_Extend (new in Unicode 3.2). For programmatic determination of grapheme cluster boundaries. Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ
I18N_UCHAR_GRAPHEME_LINK	Binary property Grapheme_Link (new in Unicode 3.2). For programmatic determination of grapheme cluster boundaries.
I18N_UCHAR_HEX_DIGIT	Binary property Hex_Digit. Characters commonly used for hexadecimal numbers.
I18N_UCHAR_HYPHEN	Binary property Hyphen. Dashes used to mark connections between pieces of words, plus the Katakana middle dot.
I18N_UCHAR_ID_CONTINUE	Binary property ID_Continue. Characters that can continue an identifier. DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." ID_Start+Mn+Mc+Nd+Pc
I18N_UCHAR_ID_START	Binary property ID_Start. Characters that can start an identifier. Lu+Ll+Lt+Lm+Lo+Nl
I18N_UCHAR_IDEOGRAPHIC	Binary property Ideographic. CJKV ideographs.
I18N_UCHAR_IDS_BINARY_OPERATOR	Binary property IDS_Binary_Operator (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences.
I18N_UCHAR_IDS_TRINARY_OPERATOR	Binary property IDS_Trinary_Operator (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences.
I18N_UCHAR_JOIN_CONTROL	Binary property Join_Control. Format controls for cursive joining and ligation.
I18N_UCHAR_LOGICAL_ORDER_EXCEPTION	Binary property Logical_Order_Exception (new in Unicode 3.2). Characters that do not use logical order and require special handling in most processing.
I18N_UCHAR_LOWERCASE	Binary property Lowercase. Ll+Other_Lowercase
I18N_UCHAR_MATH	Binary property Math. Sm+Other_Math
I18N_UCHAR_NONCHARACTER_CODE_POINT	Binary property Noncharacter_Code_Point. Code points that are explicitly defined as illegal for the encoding of characters.
I18N_UCHAR_QUOTATION_MARK	Binary property Quotation_Mark.
I18N_UCHAR_RADICAL	Binary property Radical (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences.
I18N_UCHAR_SOFT_DOTTED	Binary property Soft_Dotted (new in Unicode 3.2). Characters with a "soft dot", like i or j. An accent placed on these characters causes the dot to disappear.
I18N_UCHAR_TERMINAL_PUNCTUATION	Binary property Terminal_Punctuation. Punctuation characters that generally mark the end of textual units.
I18N_UCHAR_UNIFIED_IDEOGRAPH	Binary property Unified_Ideograph (new in Unicode 3.2). For programmatic determination of Ideographic Description Sequences.
I18N_UCHAR_UPPERCASE	Binary property Uppercase. Lu+Other_Uppercase
I18N_UCHAR_WHITE_SPACE	Binary property White_Space. Space characters+TAB+CR+LF-ZWSP-ZWNBSP
I18N_UCHAR_XID_CONTINUE	Binary property XID_Continue. ID_Continue modified to allow closure under normalization forms NFKC and NFKD.
I18N_UCHAR_XID_START	Binary property XID_Start. ID_Start modified to allow closure under normalization forms NFKC and NFKD.
I18N_UCHAR_CASE_SENSITIVE	Binary property Case_Sensitive. Either the source of a case mapping or in the target of a case mapping. Not the same as the general category Cased_Letter.
I18N_UCHAR_S_TERM	Binary property STerm (new in Unicode 4.0.1).
I18N_UCHAR_VARIATION_SELECTOR	Binary property Variation_Selector (new in Unicode 4.0.1). Indicates all those characters that qualify as Variation Selectors.
I18N_UCHAR_NFD_INERT	Binary property NFD_Inert. ICU-specific property for characters that are inert under NFD, i.e., they do not interact with adjacent characters.
I18N_UCHAR_NFKD_INERT	Binary property NFKD_Inert. ICU-specific property for characters that are inert under NFKD, i.e., they do not interact with adjacent characters.
I18N_UCHAR_NFC_INERT	Binary property NFC_Inert. ICU-specific property for characters that are inert under NFC, i.e., they do not interact with adjacent characters.
I18N_UCHAR_NFKC_INERT	Binary property NFKC_Inert. ICU-specific property for characters that are inert under NFKC, i.e., they do not interact with adjacent characters.
I18N_UCHAR_SEGMENT_STARTER	Binary Property Segment_Starter. Property for characters that are starters in terms of Unicode normalization and combining character sequences. They have ccc=0 and do not occur in non-initial position of the canonical decomposition of any character (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)).
I18N_UCHAR_PATTERN_SYNTAX	Binary property Pattern_Syntax (new in Unicode 4.1).
I18N_UCHAR_PATTERN_WHITE_SPACE	Binary property Pattern_White_Space (new in Unicode 4.1).
I18N_UCHAR_POSIX_ALNUM	Binary property alnum (a C/POSIX character class).
I18N_UCHAR_POSIX_BLANK	Binary property blank (a C/POSIX character class).
I18N_UCHAR_POSIX_GRAPH	Binary property graph (a C/POSIX character class).
I18N_UCHAR_POSIX_PRINT	Binary property print (a C/POSIX character class).
I18N_UCHAR_POSIX_XDIGIT	Binary property xdigit (a C/POSIX character class).
I18N_UCHAR_CASED	Binary property Cased. For Lowercase, Uppercase and Titlecase characters.
I18N_UCHAR_CASE_IGNORABLE	Binary property Case_Ignorable. Used in context-sensitive case mappings.
I18N_UCHAR_CHANGES_WHEN_LOWERCASED	Binary property Changes_When_Lowercased.
I18N_UCHAR_CHANGES_WHEN_UPPERCASED	Binary property Changes_When_Uppercased.
I18N_UCHAR_CHANGES_WHEN_TITLECASED	Binary property Changes_When_Titlecased.
I18N_UCHAR_CHANGES_WHEN_CASEFOLDED	Binary property Changes_When_Casefolded.
I18N_UCHAR_CHANGES_WHEN_CASEMAPPED	Binary property Changes_When_Casemapped.
I18N_UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED	Binary property Changes_When_NFKC_Casefolded.
I18N_UCHAR_EMOJI	Binary property Emoji (Since 4.0).
I18N_UCHAR_EMOJI_PRESENTATION	Binary property Emoji Presentation (Since 4.0).
I18N_UCHAR_EMOJI_MODIFIER	Binary property Emoji Modifier (Since 4.0).
I18N_UCHAR_EMOJI_MODIFIER_BASE	Binary property Emoji Modifier Base (Since 4.0).
I18N_UCHAR_BINARY_LIMIT	One more than the last constant for binary Unicode properties.
I18N_UCHAR_BIDI_CLASS	Enumerated property Bidi_Class. Same as u_charDirection, returns i18n_uchar_direction_e values.
I18N_UCHAR_INT_START	First constant for enumerated/integer Unicode properties.
I18N_UCHAR_BLOCK	Enumerated property Block. Returns i18n_uchar_ublock_code_e values.
I18N_UCHAR_CANONICAL_COMBINING_CLASS	Enumerated property Canonical_Combining_Class. Returns 8-bit numeric values.
I18N_UCHAR_DECOMPOSITION_TYPE	Enumerated property Decomposition_Type. Returns i18n_uchar_u_decomposition_type_e values.
I18N_UCHAR_EAST_ASIAN_WIDTH	Enumerated property East_Asian_Width. Returns i18n_uchar_u_east_asian_width_e values.
I18N_UCHAR_GENERAL_CATEGORY	Enumerated property General_Category. Returns i18n_uchar_category_e values.
I18N_UCHAR_JOINING_GROUP	Enumerated property Joining_Group. Returns i18n_uchar_u_joining_group_e values.
I18N_UCHAR_JOINING_TYPE	Enumerated property Joining_Type. Returns i18n_uchar_u_joining_type_e values.
I18N_UCHAR_LINE_BREAK	Enumerated property Line_Break. Returns i18n_uchar_u_line_break_e values.
I18N_UCHAR_NUMERIC_TYPE	Enumerated property Numeric_Type. Returns i18n_uchar_u_numeric_type_e values.
I18N_UCHAR_SCRIPT	Enumerated property Script. Returns i18n_uscript_code_e values.
I18N_UCHAR_HANGUL_SYLLABLE_TYPE	Enumerated property Hangul_Syllable_Type, new in Unicode 4. Returns i18n_uchar_u_hangul_syllable_type_e values.
I18N_UCHAR_NFD_QUICK_CHECK	Enumerated property NFD_Quick_Check. Returns i18n_unormalization_check_result_e values.
I18N_UCHAR_NFKD_QUICK_CHECK	Enumerated property NFKD_Quick_Check. Returns i18n_unormalization_check_result_e values.
I18N_UCHAR_NFC_QUICK_CHECK	Enumerated property NFC_Quick_Check. Returns i18n_unormalization_check_result_e values.
I18N_UCHAR_NFKC_QUICK_CHECK	Enumerated property NFKC_Quick_Check. Returns i18n_unormalization_check_result_e values.
I18N_UCHAR_LEAD_CANONICAL_COMBINING_CLASS	Enumerated property Lead_Canonical_Combining_Class. Returns 8-bit numeric values.
I18N_UCHAR_TRAIL_CANONICAL_COMBINING_CLASS	Enumerated property Trail_Canonical_Combining_Class. Returns 8-bit numeric values.
I18N_UCHAR_GRAPHEME_CLUSTER_BREAK	Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). Returns i18n_uchar_u_grapheme_cluster_break_e values.
I18N_UCHAR_SENTENCE_BREAK	Enumerated property Sentence_Break (new in Unicode 4.1). Returns i18n_uchar_u_sentence_break_e values.
I18N_UCHAR_WORD_BREAK	Enumerated property Word_Break (new in Unicode 4.1). Returns i18n_uchar_u_word_break_values_e values.
I18N_UCHAR_BIDI_PAIRED_BRACKET_TYPE	Enumerated property Bidi_Paired_Bracket_Type. Returns i18n_uchar_u_bidi_paired_bracket_type_e values .
I18N_UCHAR_INT_LIMIT	One more than the last constant for enumerated/integer Unicode properties.
I18N_UCHAR_GENERAL_CATEGORY_MASK	Bitmask property General_Category_Mask. Mask values should be cast to uint32_t.
I18N_UCHAR_MASK_START	First constant for bit-mask Unicode properties.
I18N_UCHAR_MASK_LIMIT	One more than the last constant for bit-mask Unicode properties.
I18N_UCHAR_NUMERIC_VALUE	Double property Numeric_Value.
I18N_UCHAR_DOUBLE_START	First constant for double Unicode properties.
I18N_UCHAR_DOUBLE_LIMIT	One more than the last constant for double Unicode properties.
I18N_UCHAR_AGE	String property Age.
I18N_UCHAR_STRING_START	First constant for string Unicode properties.
I18N_UCHAR_BIDI_MIRRORING_GLYPH	String property Bidi_Mirroring_Glyph.
I18N_UCHAR_CASE_FOLDING	String property Case_Folding.
I18N_UCHAR_LOWERCASE_MAPPING	String property Lowercase_Mapping.
I18N_UCHAR_NAME	String property Name.
I18N_UCHAR_SIMPLE_CASE_FOLDING	String property Simple_Case_Folding.
I18N_UCHAR_SIMPLE_LOWERCASE_MAPPING	String property Simple_Lowercase_Mapping.
I18N_UCHAR_SIMPLE_TITLECASE_MAPPING	String property Simple_Titlecase_Mapping.
I18N_UCHAR_SIMPLE_UPPERCASE_MAPPING	String property Simple_Uppercase_Mapping.
I18N_UCHAR_TITLECASE_MAPPING	String property Titlecase_Mapping.
I18N_UCHAR_UPPERCASE_MAPPING	String property Uppercase_Mapping.
I18N_UCHAR_BIDI_PAIRED_BRACKET	String property Bidi_Paired_Bracket.
I18N_UCHAR_STRING_LIMIT	One more than the last constant for string Unicode properties.
I18N_UCHAR_SCRIPT_EXTENSIONS	Provisional property Script_Extensions (new in Unicode 6.0).
I18N_UCHAR_OTHER_PROPERTY_START	First constant for Unicode properties with unusual value types.
I18N_UCHAR_OTHER_PROPERTY_LIMIT	One more than the last constant for Unicode properties with unusual value types.
I18N_UCHAR_INVALID_CODE	Represents a nonexistent or invalid property or property value.

enum i18n_uscript_code_e

Constants for ISO 15924 script codes.

Since :: 2.4

Enumerator:

I18N_USCRIPT_COMMON	Zyyy
I18N_USCRIPT_INHERITED	Zinh, "Code for inherited script", for non-spacing combining marks; also Qaai
I18N_USCRIPT_ARABIC	Arab
I18N_USCRIPT_ARMENIAN	Armn
I18N_USCRIPT_BENGALI	Beng
I18N_USCRIPT_BOPOMOFO	Bopo
I18N_USCRIPT_CHEROKEE	Cher
I18N_USCRIPT_COPTIC	Copt
I18N_USCRIPT_CYRILLIC	Cyrl
I18N_USCRIPT_DESERET	Dsrt
I18N_USCRIPT_DEVANAGARI	Deva
I18N_USCRIPT_ETHIOPIC	Ethi
I18N_USCRIPT_GEORGIAN	Geor
I18N_USCRIPT_GOTHIC	Goth
I18N_USCRIPT_GREEK	Grek
I18N_USCRIPT_GUJARATI	Gujr
I18N_USCRIPT_GURMUKHI	Guru
I18N_USCRIPT_HAN	Hani
I18N_USCRIPT_HANGUL	Hang
I18N_USCRIPT_HEBREW	Hebr
I18N_USCRIPT_HIRAGANA	Hira
I18N_USCRIPT_KANNADA	Knda
I18N_USCRIPT_KATAKANA	Kana
I18N_USCRIPT_KHMER	Khmr
I18N_USCRIPT_LAO	Laoo
I18N_USCRIPT_LATIN	Latn
I18N_USCRIPT_MALAYALAM	Mlym
I18N_USCRIPT_MONGOLIAN	Mong
I18N_USCRIPT_MYANMAR	Mymr
I18N_USCRIPT_OGHAM	Ogam
I18N_USCRIPT_OLD_ITALIC	Ital
I18N_USCRIPT_ORIYA	Orya
I18N_USCRIPT_RUNIC	Runr
I18N_USCRIPT_SINHALA	Sinh
I18N_USCRIPT_SYRIAC	Syrc
I18N_USCRIPT_TAMIL	Taml
I18N_USCRIPT_TELUGU	Telu
I18N_USCRIPT_THAANA	Thaa
I18N_USCRIPT_THAI	Thai
I18N_USCRIPT_TIBETAN	Tibt
I18N_USCRIPT_CANADIAN_ABORIGINAL	Cans, Canadian_Aboriginal script.
I18N_USCRIPT_UCAS	Canadian_Aboriginal script (alias).
I18N_USCRIPT_YI	Yiii
I18N_USCRIPT_TAGALOG	Tglg
I18N_USCRIPT_HANUNOO	Hano
I18N_USCRIPT_BUHID	Buhd
I18N_USCRIPT_TAGBANWA	Tagb
I18N_USCRIPT_BRAILLE	Brai
I18N_USCRIPT_CYPRIOT	Cprt
I18N_USCRIPT_LIMBU	Limb
I18N_USCRIPT_LINEAR_B	Linb
I18N_USCRIPT_OSMANYA	Osma
I18N_USCRIPT_SHAVIAN	Shaw
I18N_USCRIPT_TAI_LE	Tale
I18N_USCRIPT_UGARITIC	Ugar
I18N_USCRIPT_KATAKANA_OR_HIRAGANA	Hrkt
I18N_USCRIPT_BUGINESE	Bugi
I18N_USCRIPT_GLAGOLITIC	Glag
I18N_USCRIPT_KHAROSHTHI	Khar
I18N_USCRIPT_SYLOTI_NAGRI	Sylo
I18N_USCRIPT_NEW_TAI_LUE	Talu
I18N_USCRIPT_TIFINAGH	Tfng
I18N_USCRIPT_OLD_PERSIAN	Xpeo
I18N_USCRIPT_BALINESE	Bali
I18N_USCRIPT_BATAK	Batk
I18N_USCRIPT_BLISSYMBOLS	Blis
I18N_USCRIPT_BRAHMI	Brah
I18N_USCRIPT_CHAM	Cham
I18N_USCRIPT_CIRTH	Cirt
I18N_USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC	Cyrs
I18N_USCRIPT_DEMOTIC_EGYPTIAN	Egyd
I18N_USCRIPT_HIERATIC_EGYPTIAN	Egyh
I18N_USCRIPT_EGYPTIAN_HIEROGLYPHS	Egyp
I18N_USCRIPT_KHUTSURI	Geok
I18N_USCRIPT_SIMPLIFIED_HAN	Hans
I18N_USCRIPT_TRADITIONAL_HAN	Hant
I18N_USCRIPT_PAHAWH_HMONG	Hmng
I18N_USCRIPT_OLD_HUNGARIAN	Hung
I18N_USCRIPT_HARAPPAN_INDUS	Inds
I18N_USCRIPT_JAVANESE	Java
I18N_USCRIPT_KAYAH_LI	Kali
I18N_USCRIPT_LATIN_FRAKTUR	Latf
I18N_USCRIPT_LATIN_GAELIC	Latg
I18N_USCRIPT_LEPCHA	Lepc
I18N_USCRIPT_LINEAR_A	Lina
I18N_USCRIPT_MANDAIC	Mand
I18N_USCRIPT_MAYAN_HIEROGLYPHS	Maya
I18N_USCRIPT_MEROITIC_HIEROGLYPHS	Mero
I18N_USCRIPT_NKO	Nkoo
I18N_USCRIPT_ORKHON	Orkh
I18N_USCRIPT_OLD_PERMIC	Perm
I18N_USCRIPT_PHAGS_PA	Phag
I18N_USCRIPT_PHOENICIAN	Phnx
I18N_USCRIPT_PHONETIC_POLLARD	Plrd
I18N_USCRIPT_RONGORONGO	Roro
I18N_USCRIPT_SARATI	Sara
I18N_USCRIPT_ESTRANGELO_SYRIAC	Syre
I18N_USCRIPT_WESTERN_SYRIAC	Syrj
I18N_USCRIPT_EASTERN_SYRIAC	Syrn
I18N_USCRIPT_TENGWAR	Teng
I18N_USCRIPT_VAI	Vaii
I18N_USCRIPT_VISIBLE_SPEECH	Visp
I18N_USCRIPT_CUNEIFORM	Xsux
I18N_USCRIPT_UNWRITTEN_LANGUAGES	Zxxx
I18N_USCRIPT_UNKNOWN	Zzzz, Unknown="Code for uncoded script", for unassigned code points
I18N_USCRIPT_CARIAN	Cari
I18N_USCRIPT_JAPANESE	Jpan
I18N_USCRIPT_LANNA	Lana
I18N_USCRIPT_LYCIAN	Lyci
I18N_USCRIPT_LYDIAN	Lydi
I18N_USCRIPT_OL_CHIKI	Olck
I18N_USCRIPT_REJANG	Rjng
I18N_USCRIPT_SAURASHTRA	Saur
I18N_USCRIPT_SIGN_WRITING	Sgnw
I18N_USCRIPT_SUNDANESE	Sund
I18N_USCRIPT_MOON	Moon
I18N_USCRIPT_MEITEI_MAYEK	Mtei
I18N_USCRIPT_IMPERIAL_ARAMAIC	Armi
I18N_USCRIPT_AVESTAN	Avst
I18N_USCRIPT_CHAKMA	Cakm
I18N_USCRIPT_KOREAN	Kore
I18N_USCRIPT_KAITHI	Kthi
I18N_USCRIPT_MANICHAEAN	Mani
I18N_USCRIPT_INSCRIPTIONAL_PAHLAVI	Phli
I18N_USCRIPT_PSALTER_PAHLAVI	Phlp
I18N_USCRIPT_BOOK_PAHLAVI	Phlv
I18N_USCRIPT_INSCRIPTIONAL_PARTHIAN	Prti
I18N_USCRIPT_SAMARITAN	Samr
I18N_USCRIPT_TAI_VIET	Tavt
I18N_USCRIPT_MATHEMATICAL_NOTATION	Zmth
I18N_USCRIPT_SYMBOLS	Zsym
I18N_USCRIPT_BAMUM	Bamu
I18N_USCRIPT_LISU	Lisu
I18N_USCRIPT_NAKHI_GEBA	Nkgb
I18N_USCRIPT_OLD_SOUTH_ARABIAN	Sarb
I18N_USCRIPT_BASSA_VAH	Bass
I18N_USCRIPT_DUPLOYAN_SHORTAND	Dupl
I18N_USCRIPT_ELBASAN	Elba
I18N_USCRIPT_GRANTHA	Gran
I18N_USCRIPT_KPELLE	Kpel
I18N_USCRIPT_LOMA	Loma
I18N_USCRIPT_MENDE	Mend
I18N_USCRIPT_MEROITIC_CURSIVE	Merc
I18N_USCRIPT_OLD_NORTH_ARABIAN	Narb
I18N_USCRIPT_NABATAEAN	Nbat
I18N_USCRIPT_PALMYRENE	Palm
I18N_USCRIPT_SINDHI	Sind
I18N_USCRIPT_WARANG_CITI	Wara
I18N_USCRIPT_AFAKA	Afak
I18N_USCRIPT_JURCHEN	Jurc
I18N_USCRIPT_MRO	Mroo
I18N_USCRIPT_NUSHU	Nshu
I18N_USCRIPT_SHARADA	Shrd
I18N_USCRIPT_SORA_SOMPENG	Sora
I18N_USCRIPT_TAKRI	Takr
I18N_USCRIPT_TANGUT	Tang
I18N_USCRIPT_WOLEAI	Wole
I18N_USCRIPT_ANATOLIAN_HIEROGLYPHS	Hluw
I18N_USCRIPT_KHOJKI	Khoj
I18N_USCRIPT_TIRHUTA	Tirh
I18N_USCRIPT_CODE_LIMIT	Count of i18n_uscript_code_e enumerators

Function Documentation

int i18n_uchar_char_age	(	i18n_uchar32	c,
		i18n_uversion_info	version_array
	)

Gets the "age" of the code point.

The "age" is the Unicode version when the code point was first designated (as a non-character or for private use) or assigned a character. This can be useful to avoid emitting code points to receiving processes that do not accept newer characters.

Since :: 4.0

Parameters:

[in]	c	The code point
[in]	version_array	The Unicode version number array, to be filled in

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_char_digit_value	(	i18n_uchar32	c,
		int32_t *	char_digit_value
	)

Returns the decimal digit value of a decimal digit character.

Such characters have the general category "Nd" (decimal digit numbers) and a i18n_uchar_u_numeric_type_e of I18N_UCHAR_U_NT_DECIMAL. Unlike ICU releases before 2.6, no digit values are returned for any Han characters because Han number characters are often used with a special chinese-style number format (with characters for powers of 10 in between) instead of in decimal-positional notation. Unicode 4 explicitly assigns Han number characters the i18n_uchar_u_numeric_type_e I18N_UCHAR_U_NT_NUMERIC instead of I18N_UCHAR_U_NT_DECIMAL. See jitterbug 1483 for more details. Use i18n_uchar_get_int_property_value() and i18n_uchar_get_numeric_value() for complete numeric unicode properties.

Since :: 4.0

Parameters:

[in]	c	The code point for which to get the decimal digit value
[out]	char_digit_value	The decimal digit value of c, or `-1` if c is not a decimal digit character

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_get_numeric_value()

int i18n_uchar_char_direction	(	i18n_uchar32	c,
		i18n_uchar_direction_e *	direction
	)

Returns the bidirectional category value for the code point.

Which is used in the unicode bidirectional algorithm (UAX #9 Unicode reports). Note that some unassigned code points have bidi values of R or AL because they are in blocks that are reserved for right-to-left scripts. Same as java.lang.Character.getDirectionality()

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	direction	The bidirectional category (i18n_uchar_direction_e) value

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_direction_e

int i18n_uchar_char_from_name	(	i18n_uchar_u_char_name_choice_e	name_choice,
		const char *	name,
		i18n_uchar32 *	char_from_name
	)

Finds a unicode character by its name and return its code point value.

The name is matched exactly and completely. If the name does not correspond to a code point, I18N_ERROR_INVALID_CHAR_FOUND is returned. A unicode 1.0 name is matched only if it differs from the modern name. Unicode names are all uppercase. Extended names are lowercase followed by an uppercase hexadecimal number, and within angle brackets.

Since :: 4.0

Parameters:

[in]	name_choice	Selector for which name to match
[in]	name	The name to match
[out]	char_from_name	The Unicode value of the code point with the given name, or an undefined value if there is no such code point

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_enum_char_names(); i18n_uchar_char_name(); i18n_uchar_name_choice()

int i18n_uchar_char_mirror	(	i18n_uchar32	c,
		i18n_uchar32 *	char_mirror
	)

Maps the specified character to a "mirror-image" character.

For characters with the bidi_mirrored property, implementations sometimes need a "poor man's" mapping to another unicode character (code point) such that the default glyph may serve as the mirror-image of the default glyph of the specified character. This is useful for text conversion to and from codepages with visual order, and for displays without glyph selection capabilities.

Since :: 4.0

Parameters:

[in]	c	The code point to be mapped
[out]	char_mirror	Another Unicode code point that may serve as a mirror-image substitute, or c itself if there is no such mapping or c does not have the Bidi_Mirrored property

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: I18N_UCHAR_BIDI_MIRRORED; i18n_uchar_is_mirrored()

int i18n_uchar_char_name	(	i18n_uchar32	code,
		i18n_uchar_u_char_name_choice_e	name_choice,
		char *	buffer,
		int32_t	buffer_length,
		int32_t *	name_length
	)

Retrieves the name of a unicode character.

Depending on name_choice, the character name written into the buffer is the "modern" name or the name that was defined in unicode version 1.0. The name contains only "invariant" characters like a-z, 0-9, space, and '-'. unicode 1.0 names are only retrieved if they are different from the modern names and if the data file contains the data for them.

Since :: 4.0

Parameters:

[in]	code	The character (code point) for which to get the name. It must be `0 <= code <= 0x10ffff`.
[in]	name_choice	Selector for which name to get.
[in]	buffer	Destination buffer for copying the name. The name will always be zero-terminated. If there is no name, then the buffer will be set to the empty string.
[in]	buffer_length	The length of the buffer
[out]	name_length	The length of the name, or 0 if there is no name for this character. If the bufferLength is less than or equal to the length, then the buffer contains the truncated name and the returned length indicates the full length of the name. The length does not include the zero-termination.

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_char_from_name(); i18n_uchar_enum_char_names(); i18n_uchar_u_char_name_choice_e

int i18n_uchar_char_type	(	i18n_uchar32	c,
		int8_t *	char_type
	)

Returns the general category value for the code point.

Same as java.lang.character.getType().

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	char_type	The general category (i18n_uchar_category_e) value

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_category_e

int i18n_uchar_digit	(	i18n_uchar32	ch,
		int8_t	radix,
		int32_t *	numerical_value
	)

Returns the decimal digit value of the code point in the specified radix.

If the radix is not in the range 2<= radix <= 36 or if the value of c is not a valid digit in the specified radix, -1 is returned. A character is a valid digit if at least one of the following is true:

The character has a decimal digit value. Such characters have the general category "Nd" (decimal digit numbers) and a I18N_UCHAR_NUMERIC_TYPE of I18N_UCHAR_U_NT_DECIMAL. In this case the value is the character's decimal digit value.
The character is one of the uppercase Latin letters 'A' through 'Z'. In this case the value is c - 'A' + 10.
The character is one of the lowercase Latin letters 'a' through 'z'. In this case the value is ch - 'a' + 10.
Latin letters from both the ASCII range (0061..007A, 0041..005A) as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A) are recognized.

Same as java.lang.Character.digit().

Since :: 4.0

Parameters:

[in]	ch	The code point to be tested
[in]	radix	The radix
[out]	numerical_value	The numeric value represented by the character in the specified radix, or `-1` if there is no value or if the value exceeds the radix

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: I18N_UCHAR_NUMERIC_TYPE; i18n_uchar_for_digit(); i18n_uchar_char_digit_value(); i18n_uchar_is_digit()

int i18n_uchar_enum_char_names	(	i18n_uchar32	start,
		i18n_uchar32	limit,
		i18n_uchar_enum_char_name_cb	cb,
		void *	user_data,
		i18n_uchar_u_char_name_choice_e	name_choice
	)

Enumerates all assigned unicode characters between the start and limit code points (start inclusive, limit exclusive) and call a function for each, passing the code point value and the character name.

For unicode 1.0 names, only those are enumerated that differ from the modern names.

Since :: 4.0

Parameters:

[in]	start	The first code point in the enumeration range
[in]	limit	One more than the last code point in the enumeration range (the first one after the range)
[in]	cb	The function that is to be called for each character name
[in]	user_data	An arbitrary pointer that is passed to the function
[in]	name_choice	Selector for which kind of names to enumerate

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_char_name(); i18n_uchar_u_char_name_choice_e

int i18n_uchar_enum_char_types	(	i18n_uchar_enum_char_type_cb	cb,
		const void *	user_data
	)

Enumerates efficiently all code points with their unicode general categories.

This is useful for building data structures (e.g., unicode_set's), for enumerating all assigned code points ( type != I18N_UCHAR_U_UNASSIGNED), etc.for each contiguous range of code points with a given general category ("character type"), the i18n_uchar_enum_char_type_cb() function is called. Adjacent ranges have different types. The unicode standard guarantees that the numeric value of the type is 0..31.

Since :: 4.0

Parameters:

[in]	cb	A pointer to a function that is called for each contiguous range of code points with the same general category
[in]	user_data	An opaque pointer that is passed on to the callback function

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_enum_char_type_cb; i18n_uchar_category_e

int i18n_uchar_fold_case	(	i18n_uchar32	c,
		uint32_t	options,
		i18n_uchar32 *	code_point
	)

Maps the given character to its case folding equivalent.

If the character has no case folding equivalent, the character itself is returned. This function only returns the simple, single-code point case mapping. Full case mappings should be used whenever possible because they produce better results by working on whole strings. They take into account the string context and the language and can map to a result string with a different length as appropriate. Full case mappings are applied by the string case mapping functions, see utils_i18n_ustring.h. See also the user guide chapter on c/posix migration: User Guide: Case Mappings

Since :: 4.0

Parameters:

[in]	c	The code point to be mapped
[in]	options	Either I18N_U_FOLD_CASE_DEFAULT or I18N_U_FOLD_CASE_EXCLUDE_SPECIAL_I
[out]	code_point	The I18N_UCHAR_SIMPLE_CASE_FOLDING of the code point, if any; otherwise the code point itself.

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_for_digit	(	int32_t	digit,
		int8_t	radix,
		i18n_uchar32 *	representation
	)

Determines the character representation for a specific digit in the specified radix.

If the value of radix is not a valid radix, or the value of digit is not a valid digit in the specified radix, the null character (u+0000) is returned. The radix argument is valid if it is greater than or equal to 2 and less than or equal to 36. The digit argument is valid if 0 <= digit < radix. If the digit is less than 10, then '0' + digit is returned. Otherwise, the value 'a' + digit - 10 is returned. Same as java.lang.Character.forDigit().

Since :: 4.0

Parameters:

[in]	digit	The number to convert to a character
[in]	radix	The radix
[out]	representation	The char representation of the specified digit in the specified radix

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_digit(); i18n_uchar_digit_value(); i18n_uchar_is_digit()

int i18n_uchar_get_bidi_paired_bracket	(	i18n_uchar32	c,
		i18n_uchar32 *	bidi_paired_bracket
	)

Maps the specified character to its paired bracket character.

For bidi_paired_bracket_type!=none, this is the same as u_char_mirror(). Otherwise c itself is returned. See Unicode report

Since :: 4.0

Parameters:

[in]	c	The code point to be mapped
[out]	bidi_paired_bracket	The paired bracket code point, or c itself if there is no such mapping (Bidi_Paired_Bracket_Type=None)

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: I18N_UCHAR_BIDI_PAIRED_BRACKET; I18N_UCHAR_BIDI_PAIRED_BRACKET_TYPE; i18n_uchar_char_mirror()

int i18n_uchar_get_combining_class	(	i18n_uchar32	c,
		uint8_t *	combining_class
	)

Returns the combining class of the code point.

Since :: 4.0

Parameters:

[in]	c	The code point of the character
[out]	combining_class	The combining class of the character

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_get_fc_nfkc_closure	(	i18n_uchar32	c,
		i18n_uchar *	dest,
		int32_t	dest_capacity,
		int32_t *	length
	)

Gets the FC_NFKC_Closure property string for a character.

See unicode standard annex #15 for details, search for "FC_NFKC_Closure" or for "FNC": Unicode reports .

Since :: 4.0

Parameters:

[in]	c	The character (code point) for which to get the FC_NFKC_Closure string. It must be `0 <= c <= 0x10ffff`.
[in]	dest	The destination buffer for copying the string. The string will be zero-terminated if possible. If there is no FC_NFKC_Closure string, then the buffer will be set to the empty string.
[in]	dest_capacity	The capacity of the destination buffer
[out]	length	The length of the string, or 0 if there is no FC_NFKC_Closure string for this character. If the dest_capacity is less than or equal to the length, then the buffer contains the truncated name and the length indicates the full length of the name. The length does not include the zero-termination.

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_get_int_property_max_value	(	i18n_uchar_uproperty_e	which,
		int32_t *	int_max_value
	)

Gets the maximum value for an enumerated/integer/binary Unicode property.

Can be used together with i18n_uchar_get_int_property_min_value to allocate arrays of UnicodeSet or similar. Examples for min/max values (for unicode 3.2): for undefined i18n_uchar_uproperty_e constant values, min/max values will be 0/ -1.

Since :: 4.0

Parameters:

[in]	which	i18n_uchar_uproperty_e selector constant, identifies which binary property to check. Must be `I18N_UCHAR_BINARY_START <= which < I18N_UCHAR_BINARY_LIMIT` or `I18N_UCHAR_INT_START <= which < I18N_UCHAR_INT_LIMIT`.
[out]	int_max_value	Maximum value returned by i18n_uchar_get_int_property_value() for a Unicode property. int_max_value will be set to value lower than or equal to 0 if the property selector is out of range.

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_uproperty_e; i18n_uchar_has_binary_property(); i18n_uchar_get_unicode_version(); i18n_uchar_get_int_property_min_value(); i18n_uchar_get_int_property_value()

int i18n_uchar_get_int_property_min_value	(	i18n_uchar_uproperty_e	which,
		int32_t *	int_min_value
	)

Gets the minimum value for an enumerated/integer/binary Unicode property.

Since :: 4.0

Parameters:

[in]	which	i18n_uchar_uproperty_e selector constant, identifies which binary property to check. Must be `I18N_UCHAR_BINARY_START <= which < I18N_UCHAR_BINARY_LIMIT` or `I18N_UCHAR_INT_START <= which < I18N_UCHAR_INT_LIMIT`.
[out]	int_min_value	Minimum value returned by i18n_uchar_get_int_property_value() for a Unicode property. `0` if the property selector is out of range.

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_uproperty_e; i18n_uchar_has_binary_property(); i18n_uchar_get_unicode_version(); i18n_uchar_get_int_property_max_value(); i18n_uchar_get_int_property_value()

int i18n_uchar_get_int_property_value	(	i18n_uchar32	c,
		i18n_uchar_uproperty_e	which,
		int32_t *	property_val
	)

Gets the property value for an enumerated property for a code point.

int property_value; i18n_uchar_u_east_asian_width_e east_asian_width; i18n_uchar_get_int_property_value (c, I18N_UCHAR_EAST_ASIAN_WIDTH, &property_value); east_asian_width = (i18n_uchar_u_east_asian_width_e)property_value;

int property_value; bool is_ideographic; i18n_uchar_get_int_property_value(c, I18N_UCHAR_IDEOGRAPHIC, &property_value); is_ideographic = (bool)property_value;

Since :: 2.3.1

Parameters:

[in]	c	The code point to test.
[in]	which	The i18n_uchar_uproperty_e selector constant, identifies which property to check. Must be `I18N_UCHAR_BINARY_START <= which < I18N_UCHAR_BINARY_LIMIT` or `I18N_UCHAR_INT_START <= which < I18N_UCHAR_INT_LIMIT` or `I18N_UCHAR_MASK_START <= which < I18N_UCHAR_MASK_LIMIT`.
[out]	property_val	The numeric value that is directly the property value or, for enumerated properties, corresponds to the numeric value of the enumerated constant of the respective property value enumeration type (cast to enum type if necessary) Returns `0` or `1` (for false/true) for binary Unicode properties Returns a bit-mask for mask properties Returns `0` if which is out of bounds or if the Unicode version does not have data for the property at all, or not for this code point.

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_get_numeric_value	(	i18n_uchar32	c,
		double *	numeric_val
	)

Gets the numeric value for a Unicode code point as defined in the Unicode Character Database.

A "double" param type is necessary because some numeric values are fractions, negative, or too large for int32_t. For characters without any numeric values in the Unicode Character Database, numeric_val will be equal to I18N_U_NO_NUMERIC_VALUE. Note: This is different from the Unicode Standard which specifies NaN as the default value. (NaN is not available on all platforms) This function is similar to java.lang.Character.getNumericValue(), but i18n_uchar_get_numeric_value() also supports negative values, large values, and fractions, while Java's getNumericValue() returns values 10..35 for ASCII letters.

Since :: 4.0

Parameters:

[in]	c	The code point to get the numeric value for
[out]	numeric_val	The numeric value of c or I18N_U_NO_NUMERIC_VALUE if none is defined

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: I18N_U_NO_NUMERIC_VALUE

int i18n_uchar_get_property_enum	(	const char *	alias,
		i18n_uchar_uproperty_e *	property_enum
	)

Returns the i18n_uchar_uproperty_e enum for a given property name, as specified in the unicode database.

Short, long, and any other variants are recognized. In addition, this function maps the synthetic names "gcm" / "General_Category_Mask" to the property I18N_UCHAR_GENERAL_CATEGORY_MASK.

Since :: 4.0

Parameters:

[in]	alias	The property name to be matched. The name is compared using "loose matching"
[out]	property_enum	A i18n_uchar_uproperty_e enum, or I18N_UCHAR_INVALID_CODE if the given name does not match any property

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_uproperty_e

int i18n_uchar_get_property_name	(	i18n_uchar_uproperty_e	property,
		i18n_uchar_u_property_name_choice_e	name_choice,
		const char **	name
	)

Returns the unicode name for a given property, as given in the unicode database file.

In addition, this function maps the property I18N_UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" / "General_Category_Mask".

Since :: 4.0

Parameters:

[in]	property	#18n_uchar_uproperty_e selector other than I18N_UCHAR_INVALID_CODE. If out of range, NULL is returned.
[in]	name_choice	Selector for which name to get. If out of range, NULL is returned. All properties have a long name. Most have a short name, but some do not. Unicode allows for additional names; if present these will be returned by `I18N_UCHAR_U_LONG_PROPERTY_NAME + i`, where `i = 1, 2,..`.
[out]	name	A pointer to the name, or NULL if either the property or the name_choice is out of range. If a given name_choice returns NULL, then all larger values of name_choice will return NULL, with one exception: if NULL is returned for I18N_UCHAR_U_SHORT_PROPERTY_NAME, then I18N_UCHAR_U_LONG_PROPERTY_NAME (and higher) may still return a non-NULL value. The returned pointer is valid until 18n_uclean_cleanup() (not implemented) is called.

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_uproperty_e; i18n_uchar_u_property_name_choice_e

int i18n_uchar_get_property_value_enum	(	i18n_uchar_uproperty_e	property,
		const char *	alias,
		int32_t *	value_enum
	)

Returns the property value integer for a given value name, as specified in the unicode database.

Short, long, and any other variants are recognized. Some of the names will only be recognized with I18N_UCHAR_GENERAL_CATEGORY_MASK, not I18N_UCHAR_GENERAL_CATEGORY. these include: "C" / "Other", "L" / "Letter", "Lc" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" / "Punctuation", "S" / "Symbol", and "Z" / "Separator".

Since :: 4.0

Parameters:

[in]	property	i18n_uchar_uproperty_e selector constant. Must be `I18N_UCHAR_BINARY_START <= property < I18N_UCHAR_BINARY_LIMIT` or `I18N_UCHAR_INT_START <= property < I18N_UCHAR_INT_LIMIT` or `I18N_UCHAR_MASK_START <= property < I18N_UCHAR_MASK_LIMIT`. If out of range, I18N_UCHAR_INVALID_CODE is returned.
[in]	alias	The value name to be matched. The name is compared using "loose matching"
[out]	value_enum	A value integer or I18N_UCHAR_INVALID_CODE if the given name does not match any value of the given property, or if the property is invalid. Note: I18N_UCHAR_GENERAL_CATEGORY_MASK values are not values of i18n_uchar_category_e, but rather mask values produced by I18N_U_GET_GC_MASK(). This allows grouped categories such as [:L:] to be represented.

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_uproperty_e

int i18n_uchar_get_property_value_name	(	i18n_uchar_uproperty_e	property,
		int32_t	value,
		i18n_uchar_u_property_name_choice_e	name_choice,
		const char **	name
	)

Returns the unicode name for a given property value.

Note: some of the names can only be retrieved using I18N_UCHAR_GENERAL_CATEGORY_MASK, not I18N_UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / "Letter", "Lc" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" / "Punctuation", "S" / "Symbol", and "Z" / "Separator".

Since :: 4.0

Parameters:

[in]	property	i18n_uchar_uproperty_e selector constant. Must be `I18N_UCHAR_BINARY_START <= property < I18N_UCHAR_BINARY_LIMIT` or `I18N_UCHAR_INT_START <= property < I18N_UCHAR_INT_LIMIT` or `I18N_UCHAR_MASK_START <= property < I18N_UCHAR_MASK_LIMIT`. If out of range, NULL is returned.
[in]	value	Selector for a value for the given property. If out of range, NULL is returned. In general, valid values range from `0` up to some maximum. There are a few exceptions: I18N_UCHAR_BLOCK values begin at the non-zero value I18N_UCHAR_UBLOCK_BASIC_LATIN. I18N_UCHAR_CANONICAL_COMBINING_CLASS values are not contiguous and range from 0..240. I18N_UCHAR_GENERAL_CATEGORY_MASK values are not values of i18n_uchar_category_e, but rather mask values produced by I18N_U_GET_GC_MASK(). This allows grouped categories such as [:L:] to be represented. Mask values range non-contiguously from 1..I18N_U_GC_P_MASK.
[in]	name_choice	Selector for which name to get. If out of range, NULL is returned. All values have a long name. Most have a short name, but some do not. Unicode allows for additional names; if present these will be returned by `I18N_UCHAR_U_LONG_PROPERTY_NAME + i`, where `i = 1, 2,..`.
[out]	name	A pointer to the name, or NULL if either the property or the name_choice is out of range. If a given name_choice returns NULL, then all larger values of name_choice will return NULL, with one exception: if NULL is returned for I18N_UCHAR_U_SHORT_PROPERTY_NAME, then I18N_UCHAR_U_LONG_PROPERTY_NAME (and higher) may still return a non-NULL value. The returned pointer is valid until 18n_uclean_cleanup() (not implemented) is called.

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_u_property_name_choice_e; i18n_uchar_uproperty_e

int i18n_uchar_get_ublock_code	(	i18n_uchar32	c,
		i18n_uchar_ublock_code_e *	block_val
	)

Gets the Unicode allocation block that contains the character.

Since :: 2.3.1

Parameters:

[in]	c	The code point to test
[out]	block_val	The block value for the code point

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_get_unicode_version ( i18n_uversion_info version_array )

Gets the unicode version information.

The version array is filled in with the version information for the unicode standard that is currently used by icu. for example, unicode version 3.1.1 is represented as an array with the values { 3, 1, 1, 0 }.

Since :: 4.0

Parameters:

[in] version_array An output array that will be filled in with the Unicode version number

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_has_binary_property	(	i18n_uchar32	c,
		i18n_uchar_uproperty_e	which,
		i18n_ubool *	has_binary_property
	)

Checks a binary unicode property for a code point.

The properties APIs are intended to reflect unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). For details about the properties see Unicode.org.

Since :: 4.0

Parameters:

[in]	c	Code point to test
[in]	which	i18n_uchar_uproperty_e selector constant, identifies which binary property to check. Must be `I18N_UCHAR_BINARY_START <= which < I18N_UCHAR_BINARY_LIMIT`.
[out]	has_binary_property	`true` or `false` according to the binary Unicode property value for c. Also `false` if which is out of bounds or if the Unicode version does not have data for the property at all, or not for this code point.

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_uproperty_e; i18n_uchar_get_int_property_value(); i18n_uchar_get_unicode_version()

int i18n_uchar_is_alnum	(	i18n_uchar32	c,
		i18n_ubool *	is_alnum
	)

Determines whether the specified code point is an alphanumeric character (letter or digit) according to Java.

true for characters with general categories "L" (letters) and "Nd" (decimal digit numbers). Same as java.lang.Character.isLetterOrDigit(). In addition to being equivalent to a Java function, this also serves as a c/posix migration function. See the comments about c/posix character classification functions in the documentation at the top of this header file.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_alnum	`true` if the code point is an alphanumeric character according to Character.isLetterOrDigit()

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_is_alpha	(	i18n_uchar32	c,
		i18n_ubool *	is_alpha
	)

Determines whether the specified code point is a letter character.

true for general categories "L" (letters). Same as java.lang.Character.isLetter(). In addition to being equivalent to a Java function, this also serves as a c/posix migration function. See the comments about c/posix character classification functions in the documentation at the top of this header file.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_alpha	`true` if the code point is a letter character

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_is_digit(); i18n_uchar_is_alnum()

int i18n_uchar_is_alphabetic	(	i18n_uchar32	c,
		i18n_ubool *	is_alphabetic
	)

Checks if a code point has the Alphabetic Unicode unicode property.

Same as i18n_uchar_has_binary_property(c, I18N_UCHAR_ALPHABETIC). This is different from the i18n_uchar_is_alpha() function.

Since :: 4.0

Parameters:

[in]	c	Code point to test
[out]	is_alphabetic	`true` if the code point has the I18N_UCHAR_ALPHABETIC Unicode property, `false` otherwise

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: I18N_UCHAR_ALPHABETIC; i18n_uchar_is_alpha(); i18n_uchar_has_binary_property()

int i18n_uchar_is_base	(	i18n_uchar32	c,
		i18n_ubool *	is_base
	)

Determines whether the specified code point is a base character.

true for general categories "L" (letters), "N" (numbers), "Mc" (spacing combining marks), and "Me" (enclosing marks). Note that this is different from the unicode definition in chapter 3.5, conformance clause D13, which defines base characters to be all characters (not Cn) that do not graphically combine with preceding characters (M) and that are neither control (Cc) or format (Cf) characters.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_base	`true` if the code point is a base character according to this function

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_is_alpha(); i18n_uchar_is_digit()

int i18n_uchar_is_blank	(	i18n_uchar32	c,
		i18n_ubool *	is_blank
	)

Determines whether the specified code point is a "blank" or "horizontal space", a character that visibly separates words on a line.

The following are equivalent definitions: true for unicode white_space characters except for "vertical space controls" where "vertical space controls" are the following characters: u+000_a (Lf), u+000_b (Vt), u+000_c (Ff), u+000_d (Cr), u+0085 (Nel), u+2028 (Ls), u+2029 (Ps). Same as true for u+0009 (tab) and characters with general category "Zs" (space separators) except zero width space (zwsp, u+200_b). Note: There are several ICU whitespace functions; please see the utils_i18n_uchar.h file documentation for a detailed comparison. This is a c/posix migration function. See the comments about c/posix character classification functions in the documentation at the top of this header file.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_blank	`true` if the code point is a "blank"

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_is_control	(	i18n_uchar32	c,
		i18n_ubool *	is_control
	)

Determines whether the specified code point is a control character (as defined by this function).

A control character is one of the following:

ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
I18N_UCHAR_U_CONTROL_CHAR (Cc)
I18N_UCHAR_U_FORMAT_CHAR (Cf)
I18N_UCHAR_U_LINE_SEPARATOR (Zl)
I18N_UCHAR_U_PARAGRAPH_SEPARATOR (Zp)

This is a c/posix migration function. See the comments about c/posix character classification functions in the documentation at the top of this header file.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_control	`true` if the code point is a control character

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: I18N_UCHAR_DEFAULT_IGNORABLE_CODE_POINT; i18n_uchar_is_printable()

int i18n_uchar_is_defined	(	i18n_uchar32	c,
		i18n_ubool *	is_defined
	)

Determines whether the specified code point is "defined", which usually means that it is assigned a character.

true for general categories other than "Cn" (other, not assigned), i.e. Note that non-character code points (e.g., u+fdd0) are not "defined" (they are Cn), but surrogate code points are "defined" (Cs). Same as java.lang.Character.isDefined().

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_defined	`true` if the code point is assigned a character

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_is_digit(); i18n_uchar_is_alpha(); i18n_uchar_is_alnum(); i18n_uchar_is_upper(); i18n_uchar_is_lower(); i18n_uchar_is_title()

int i18n_uchar_is_digit	(	i18n_uchar32	c,
		i18n_ubool *	is_digit
	)

Determines whether the specified code point is a digit character according to Java.

true for characters with general category "Nd" (decimal digit numbers). Beginning with Unicode 4, this is the same as testing for the I18N_UCHAR_NUMERIC_TYPE of I18N_UCHAR_U_NT_DECIMAL. Same as java.lang.Character.isDigit(). In addition to being equivalent to a Java function, this also serves as a c/posix migration function. See the comments about c/posix character classification functions in the documentation at the top of this header file.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_digit	`true` if the code point is a digit character according to Character.is_digit()

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_is_graph	(	i18n_uchar32	c,
		i18n_ubool *	is_graph
	)

Determines whether the specified code point is a "graphic" character (printable, excluding, spaces).

true for all characters except those with general categories "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates), "Cn" (unassigned), and "Z" (separators). This is a c/posix migration function. See the comments about c/posix character classification functions in the documentation at the top of this header file.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_graph	`true` if the code point is a "graphic" character

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_is_id_ignorable	(	i18n_uchar32	c,
		i18n_ubool *	is_id_ignorable
	)

Determines if the specified character should be regarded as an ignorable character in an identifier, according to java.

true for characters with general category "Cf" (format controls) as well as non-whitespace ISO controls (u+0000..u+0008, u+000_e..u+001_b, u+007_f..u+009_f). Same as java.lang.Character.isIdentifierIgnorable(). Note that unicode just recommends to ignore Cf (format controls).

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_id_ignorable	`true` if the code point is ignorable in identifiers according to Java

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: I18N_UCHAR_DEFAULT_IGNORABLE_CODE_POINT; i18n_uchar_is_id_start(); i18n_uchar_is_id_part()

int i18n_uchar_is_id_part	(	i18n_uchar32	c,
		i18n_ubool *	is_id_part
	)

Determines if the specified character is permissible in an identifier according to Java.

true for characters with general categories "L" (letters), "Nl" (letter numbers), "Nd" (decimal digits), "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and i18n_uchar_is_id_ignorable(). Same as java.lang.Character.isUnicodeIdentifierPart(). Almost the same as Unicode's ID_Continue (I18N_UCHAR_ID_CONTINUE) except that unicode recommends to ignore Cf which is less than i18n_uchar_is_id_ignorable().

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_id_part	`true` if the code point may occur in an identifier according to Java

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_is_id_start(); i18n_uchar_is_id_ignorable(); I18N_UCHAR_ID_CONTINUE

int i18n_uchar_is_id_start	(	i18n_uchar32	c,
		i18n_ubool *	is_id_start
	)

Determines if the specified character is permissible as the first character in an identifier according to unicode (the unicode standard, version 3.0, chapter 5.16 identifiers).

true for characters with general categories "L" (letters) and "Nl" (letter numbers). Same as java.lang.Character.isUnicodeIdentifierStart(). Same as I18N_UCHAR_ID_START

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_id_start	`true` if the code point may start an identifier

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_is_alpha(); i18n_uchar_is_id_part(); I18N_UCHAR_ID_START

int i18n_uchar_is_iso_control	(	i18n_uchar32	c,
		i18n_ubool *	is_iso_control
	)

Determines whether the specified code point is an ISO control code.

true for u+0000..u+001f and u+007f..u+009f (general category "cc"). Same as java.lang.Character.isISOControl().

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_iso_control	`true` if the code point is an ISO control code

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_is_control()

int i18n_uchar_is_java_id_part	(	i18n_uchar32	c,
		i18n_ubool *	is_java_id_part
	)

Determines if the specified character is permissible in a java identifier.

In addition to i18n_uchar_is_id_part(), true for characters with general category "Sc" (currency symbols). Same as java.lang.Character.isJavaIdentifierPart().

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_java_id_part	`true` if the code point may occur in a Java identifier

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_is_id_ignorable(); i18n_uchar_is_java_id_start(); i18n_uchar_is_alpha(); i18n_uchar_is_digit(); i18n_uchar_is_id_part()

int i18n_uchar_is_java_id_start	(	i18n_uchar32	c,
		i18n_ubool *	is_java_id_start
	)

Determines if the specified character is permissible as the first character in a java identifier.

In addition to i18n_uchar_is_id_start(), true for characters with general categories "Sc" (currency symbols) and "Pc" (connecting punctuation). Same as java.lang.Character.isJavaIdentifierStart().

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_java_id_start	`true` if the code point may start a Java identifier

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_is_java_id_part(); i18n_uchar_is_alpha(); i18n_uchar_is_id_start()

int i18n_uchar_is_java_space_char	(	i18n_uchar32	c,
		i18n_ubool *	is_java_space_char
	)

Determines if the specified code point is a space character according to Java.

true for characters with general categories "z" (separators), which does not include control codes (e.g., tab or line feed). Same as java.lang.Character.isSpaceChar(). Note: There are several icu whitespace functions; please see the utils_i18n_uchar.h file documentation for a detailed comparison.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_java_space_char	`true` if the code point is a space character according to Character.is_spaceChar()

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_is_space(); i18n_uchar_is_whitespace(); i18n_uchar_is_white_space()

int i18n_uchar_is_lower	(	i18n_uchar32	c,
		i18n_ubool *	is_lower
	)

Determines whether the specified code point has the general category "Ll" (lowercase letter).

Same as java.lang.Character.isLowerCase(). This misses some characters that are also lowercase but have a different general category value. In order to include those, use I18N_UCHAR_LOWERCASE. In addition to being equivalent to a Java function, this also serves as a c/posix migration function. See the comments about c/posix character classification functions in the documentation at the top of this header file.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_lower	`true` if the code point is an Ll lowercase letter

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: I18N_UCHAR_LOWERCASE; i18n_uchar_is_upper(); i18n_uchar_is_title()

int i18n_uchar_is_lowercase	(	i18n_uchar32	c,
		i18n_ubool *	is_lowercase
	)

Checks if a code point has the Lowercase Unicode property.

Same as i18n_uchar_has_binary_property(c, I18N_UCHAR_LOWERCASE). This is different from i18n_uchar_is_lower() function.

Since :: 4.0

Parameters:

[in]	c	Code point to test
[out]	is_lowercase	`true` if the code point has the I18N_UCHAR_LOWERCASE Unicode property, `false` otherwise

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: I18N_UCHAR_LOWERCASE; i18n_uchar_is_lower(); i18n_has_binary_property()

int i18n_uchar_is_mirrored	(	i18n_uchar32	c,
		i18n_ubool *	is_mirrored
	)

Determines whether the code point has the Bidi_Mirrored property.

This property is set for characters that are commonly used in right-to-left contexts and need to be displayed with a "mirrored" glyph. Same as java.lang.Character.isMirrored(). Same as I18N_UCHAR_BIDI_MIRRORED.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_mirrored	`true` if the character has the Bidi_Mirrored property

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: I18N_UCHAR_BIDI_MIRRORED

int i18n_uchar_is_printable	(	i18n_uchar32	c,
		i18n_ubool *	is_printable
	)

Determines whether the specified code point is a printable character.

true for general categories other than "C" (controls).this is a c/posix migration function. See the comments about c/posix character classification functions in the documentation at the top of this header file.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_printable	`true` if the code point is a printable character

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: I18N_UCHAR_DEFAULT_IGNORABLE_CODE_POINT; i18n_uchar_is_control()

int i18n_uchar_is_punct	(	i18n_uchar32	c,
		i18n_ubool *	is_punct
	)

Determines whether the specified code point is a punctuation character.

true for characters with general categories "P" (Punctuation).this is a c/posix migration function. See the comments about c/posix character classification functions in the documentation at the top of this header file.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_punct	`true` if the code point is a punctuation character

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_is_space	(	i18n_uchar32	c,
		i18n_ubool *	is_space
	)

Determines if the specified character is a space character or not.

Note: there are several icu whitespace functions; please see the utils_i18n_uchar.h file documentation for a detailed comparison.this is a c/posix migration function. See the comments about c/posix character classification functions in the documentation at the top of this header file.

Since :: 4.0

Parameters:

[in]	c	The character to be tested
[out]	is_space	`true` if the character is a space character; `false` otherwise.

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_is_java_space_char(); i18n_uchar_is_whitespace(); i18n_uchar_is_white_space()

int i18n_uchar_is_title	(	i18n_uchar32	c,
		i18n_ubool *	is_title
	)

Determines whether the specified code point is a titlecase letter.

True for general category "Lt" (titlecase letter). Same as java.lang.Character.isTitleCase().

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_title	`true` if the code point is an Lt titlecase letter

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_is_upper(); i18n_uchar_is_lower(); i18n_uchar_to_title()

int i18n_uchar_is_upper	(	i18n_uchar32	c,
		i18n_ubool *	is_upper
	)

Determines whether the specified code point has the general category "Lu" (uppercase letter).

Same as java.lang.Character.isUpperCase(). This misses some characters that are also uppercase but have a different general category value. In order to include those, use I18N_UCHAR_UPPERCASE. In addition to being equivalent to a Java function, this also serves as a c/posix migration function. See the comments about c/posix character classification functions in the documentation at the top of this header file.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_upper	`true` if the code point is an Lu uppercase letter

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: I18N_UCHAR_UPPERCASE; i18n_uchar_is_lower(); i18n_uchar_is_title(); i18n_uchar_to_lower()

int i18n_uchar_is_uppercase	(	i18n_uchar32	c,
		i18n_ubool *	is_uppercase
	)

Checks if a code point has the Uppercase Unicode property.

Same as i18n_uchar_has_binary_property(c, I18N_UCHAR_UPPERCASE). This is different from i18n_uchar_is_upper() function.

Since :: 4.0

Parameters:

[in]	c	Code point to test
[out]	is_uppercase	`true` if the code point has the I18N_UCHAR_UPPERCASE Unicode property, `false` otherwise

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: I18N_UCHAR_UPPERCASE; i18n_uchar_is_upper(); i18n_uchar_has_binary_property()

int i18n_uchar_is_white_space	(	i18n_uchar32	c,
		i18n_ubool *	is_white_space
	)

Checks if a code point has the White_Space Unicode property.

Same as i18n_uchar_has_binary_property(c, I18N_UCHAR_WHITE_SPACE). This is different from both i18n_uchar_is_space() and i18n_uchar_is_whitespace() functions. Note: There are several icu whitespace functions.

Since :: 4.0

Parameters:

[in]	c	Code point to test
[out]	is_white_space	`true` if the code point has the I18N_UCHAR_WHITE_SPACE Unicode property, `false` otherwise.

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: I18N_UCHAR_WHITE_SPACE; i18n_uchar_is_whitespace(); i18n_uchar_is_space(); i18n_uchar_is_java_space_char(); i18n_uchar_has_binary_property()

int i18n_uchar_is_whitespace	(	i18n_uchar32	c,
		i18n_ubool *	is_whitespace
	)

Determines if the specified code point is a whitespace character according to Java/ICU.

A character is considered to be a Java whitespace character if and only if it satisfies one of the following criteria:

It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
It is U+0009 HORIZONTAL TABULATION.
It is U+000A LINE FEED.
It is U+000B VERTICAL TABULATION.
It is U+000C FORM FEED.
It is U+000D CARRIAGE RETURN.
It is U+001C FILE SEPARATOR.
It is U+001D GROUP SEPARATOR.
It is U+001E RECORD SEPARATOR.
It is U+001F UNIT SEPARATOR.

This api tries to sync with the semantics of Java's java.lang.Character.isWhitespace(), but it may not return the exact same results because of the Unicode version difference. Note: unicode 4.0.1 changed u+200_b zero width space from a space separator (Zs) to a format control (Cf). Since then, i18n_uchar_is_whitespace(0x200b) returns false. See Unicode 4.0.1 Note: there are several icu whitespace functions. Please see the uchar.h file documentation for a detailed comparison.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_whitespace	`true` if the code point is a whitespace character according to Java/ICU

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

See also:: i18n_uchar_is_space(); i18n_uchar_js_java_space_char(); i18n_uchar_is_white_space()

int i18n_uchar_is_xdigit	(	i18n_uchar32	c,
		i18n_ubool *	is_xdigit
	)

Determines whether the specified code point is a hexadecimal digit.

This is equivalent to i18n_uchar_digit(c, 16) >= 0. true for characters with general category "Nd" (decimal digit numbers) as well as latin letters a-f and a-f in both ascii and fullwidth ascii. (that is, for letters with code points 0041..0046, 0061..0066, ff21..ff26, ff41..ff46.)

In order to narrow the definition of hexadecimal digits to only ascii characters, use (c <= 0x7f && i18n_uchar_is_xdigit(c)). This is a c/posix migration function. See the comments about c/posix character classification functions in the documentation at the top of this header file.

Since :: 4.0

Parameters:

[in]	c	The code point to be tested
[out]	is_xdigit	`true` if the code point is a hexadecimal digit

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_to_lower	(	i18n_uchar32	c,
		i18n_uchar32 *	lower
	)

Maps the given character to its lowercase equivalent.

If the character has no lowercase equivalent, the character itself is returned. This function only returns the simple, single-code point case mapping. Full case mappings should be used whenever possible because they produce better results by working on whole strings. They take into account the string context and the language and can map to a result string with a different length as appropriate. Full case mappings are applied by the string case mapping functions, see utils_i18n_ustring.h. See also the user guide chapter on c/posix migration: User Guide: Case Mappings Same as java.lang.Character.toLowerCase().

Since :: 4.0

Parameters:

[in]	c	The code point to be mapped
[out]	lower	The I18N_UCHAR_SIMPLE_LOWERCASE_MAPPING of the code point, if any; otherwise the code point itself.

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_to_title	(	i18n_uchar32	c,
		i18n_uchar32 *	title
	)

Maps the given character to its titlecase equivalent.

If none is defined, the character itself is returned. This function only returns the simple, single-code point case mapping. Full case mappings should be used whenever possible because they produce better results by working on whole strings. They take into account the string context and the language and can map to a result string with a different length as appropriate. Full case mappings are applied by the string case mapping functions, see utils_i18n_ustring.h. See also the user guide chapter on c/posix migration: User Guide: Case Mappings Same as java.lang.Character.toTitleCase().

Since :: 4.0

Parameters:

[in]	c	The code point to be mapped
[out]	title	The I18N_UCHAR_SIMPLE_TITLECASE_MAPPING of the code point, if any; otherwise the code point itself.

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

int i18n_uchar_to_upper	(	i18n_uchar32	c,
		i18n_uchar32 *	upper
	)

Maps the given character to its uppercase equivalent.

If the character has no uppercase equivalent, the character itself is returned. This function only returns the simple, single-code point case mapping. Full case mappings should be used whenever possible because they produce better results by working on whole strings. They take into account the string context and the language and can map to a result string with a different length as appropriate. Full case mappings are applied by the string case mapping functions, see utils_i18n_ustring.h. See also the user guide chapter on c/posix migration: User Guide: Case Mappings Same as java.lang.Character.toUpperCase().

Since :: 4.0

Parameters:

[in]	c	The code point to be mapped
[out]	upper	The I18N_UCHAR_SIMPLE_UPPERCASE_MAPPING of the code point, if any; otherwise the code point itself.

Returns:: 0 on success, otherwise a negative error value

Return values:

I18N_ERROR_NONE	Successful
I18N_ERROR_INVALID_PARAMETER	Invalid function parameter

Required Header

Overview

Sample Code 1

Functions

Typedefs

Defines

Define Documentation

Typedef Documentation

Enumeration Type Documentation

Function Documentation