Tizen Native API  9.0
Ucollator

The Ucollator module performs locale-sensitive string comparison.

Required Header

#include <utils_i18n.h>

Overview

The Ucollator module performs locale-sensitive string comparison. It builds searching and sorting routines for natural language text and provides correct sorting orders for most locales supported.

Sample Code 1

Converts two different byte strings to two different unicode strings and compares the unicode strings to check if the strings are equal to each other.

    i18n_uchar uchar_src[64] = {0,};
    i18n_uchar uchar_target[64] = {0,};
    char *src = "tizen";
    char *target = "bada";
    int uchar_src_len = 0;
    int uchar_target_len = 0;
    i18n_ucollator_h coll = NULL;
    i18n_ubool result = NULL;

    i18n_ustring_from_UTF8( uchar_src, 64, NULL, src, -1 );
    i18n_ustring_from_UTF8( uchar_target, 64, NULL, target, -1 );

    // creates a collator
    i18n_ucollator_create( "en_US", &coll );

    // sets strength for coll
    i18n_ucollator_set_strength( coll, I18N_UCOLLATOR_PRIMARY );

    // compares uchar_src with uchar_target
    i18n_ustring_get_length( uchar_src, &uchar_src_len );
    i18n_ustring_get_length( uchar_target, &uchar_target_len );
    i18n_ucollator_equal( coll, uchar_src, uchar_src_len, uchar_target, uchar_target_len, &result );
    dlog_print(DLOG_INFO, LOG_TAG, "%s %s %s\n",
    src, result == 1 ? "is equal to" : "is not equal to", target ); // tizen is not equal to bada

    // destroys the collator
    i18n_ucollator_destroy( coll );

Sample Code 2

Sorts in ascending order on the given data using string_ucollator

    i18n_ucollator_h coll = NULL;
    char *src[3] = { "cat", "banana", "airplane" };
    char *tmp = NULL;
    i18n_uchar buf_01[16] = {0,};
    i18n_uchar buf_02[16] = {0,};
    i18n_ucollator_result_e result = I18N_UCOLLATOR_EQUAL;
    int i = 0, j = 0;
    int ret = I18N_ERROR_NONE;
    int buf_01_len = 0, buf_02_len = 0;

    for (i = 0; i < sizeof(src) / sizeof(src[0]); i++) {
        dlog_print(DLOG_INFO, LOG_TAG, "%s\n", src[i]);
    }    // cat    banana    airplane

    // creates a collator
    ret = i18n_ucollator_create("en_US", &coll);

    // compares and sorts in ascending order
    if (ret == I18N_ERROR_NONE) {
        i18n_ucollator_set_strength(coll, I18N_UCOLLATOR_TERTIARY);
        for (i = 0; i < 2; i++) {
            for (j = 0; j < 2 - i; j++) {
                i18n_ustring_copy_ua(buf_01, src[j]);
                i18n_ustring_copy_ua(buf_02, src[j+1]);
                i18n_ustring_get_length(buf_01, &buf_01_len);
                i18n_ustring_get_length(buf_02, &buf_02_len);
                // compares buf_01 with buf_02
                i18n_ucollator_str_collator(coll, buf_01, buf_01_len, buf_02, buf_02_len, &result);
                if (result == I18N_UCOLLATOR_GREATER) {
                    tmp = src[j];
                    src[j] = src[j+1];
                    src[j+1] = tmp;
                }
            }
        }
    }
    // destroys the collator
    i18n_ucollator_destroy( coll );    // deallocate memory for collator

    for (i = 0; i < sizeof(src) / sizeof(src[0]); i++) {
        dlog_print(DLOG_INFO, LOG_TAG, "%s\n", src[i]);
    }    // ariplane    banana    cat

Functions

int i18n_ucollator_create (const char *locale, i18n_ucollator_h *collator)
 Creates a i18n_ucollator_h for comparing strings.
int i18n_ucollator_create_rules (const i18n_uchar *rules, int32_t rules_length, i18n_ucollator_attribute_value_e normalization_mode, i18n_ucollator_strength_e strength, i18n_uparse_error_s *parse_error, i18n_ucollator_h *collator)
 Produces an i18n_ucollator_h instance according to the rules supplied.
int i18n_ucollator_get_contractions_and_expansions (const i18n_ucollator_h collator, i18n_ubool add_prefixes, i18n_uset_h contractions, i18n_uset_h expansions)
 Gets a set containing the expansions defined by the collator.
int i18n_ucollator_destroy (i18n_ucollator_h collator)
 Destroys a i18n_ucollator_h.
int i18n_ucollator_str_collator (const i18n_ucollator_h collator, const i18n_uchar *src, int32_t src_len, const i18n_uchar *target, int32_t target_len, i18n_ucollator_result_e *result)
 Compares two strings.
int i18n_ucollator_str_collator_utf8 (const i18n_ucollator_h collator, const char *src, int32_t src_len, const char *target, int32_t target_len, i18n_ucollator_result_e *result)
 Compares two strings in UTF-8.
int i18n_ucollator_greater (const i18n_ucollator_h collator, const i18n_uchar *src, int32_t src_len, const i18n_uchar *target, int32_t target_len, i18n_ubool *is_greater)
 Determines if one string is greater than another.
int i18n_ucollator_greater_or_equal (const i18n_ucollator_h collator, const i18n_uchar *src, int32_t src_len, const i18n_uchar *target, int32_t target_len, i18n_ubool *result)
 Determines if one string is greater than or equal to another.
int i18n_ucollator_equal (const i18n_ucollator_h collator, const i18n_uchar *src, int32_t src_len, const i18n_uchar *target, int32_t target_len, i18n_ubool *equal)
 Compares two strings for equality.
int i18n_ucollator_str_collator_iter (const i18n_ucollator_h collator, const i18n_uchar_iter_h *src_iter, const i18n_uchar_iter_h *dest_iter, i18n_ucollator_result_e *result)
 Compares two UTF-8 encoded strings.
int i18n_ucollator_get_strength (const i18n_ucollator_h collator, i18n_ucollator_strength_e *strength)
 Gets the collation strength used in an i18n_ucollator_h.
int i18n_ucollator_set_strength (i18n_ucollator_h collator, i18n_ucollator_strength_e strength)
 Sets the collation strength used in a collator.
int i18n_ucollator_get_reorder_codes (const i18n_ucollator_h collator, int32_t dest_size, int32_t *dest, int32_t *n_codes)
 Retrieves the reordering codes for this collator.
int i18n_ucollator_set_reorder_codes (i18n_ucollator_h collator, const int32_t *reorder_codes, int32_t codes_length)
 Sets the reordering codes for this collator.
int i18n_ucollator_get_equivalent_reorder_codes (int32_t reorder_code, int32_t dest_size, int32_t *dest, int32_t *n_codes)
 Retrieves the reorder codes that are grouped with the given reorder code.
int i18n_ucollator_get_display_name (const char *obj_locale, const char *disp_locale, int32_t dest_size, i18n_uchar *dest, int32_t *display_size)
 Gets the display name for an i18n_ucollator_h.
int i18n_ucollator_get_available (int32_t locale_index, const char **locale)
 Gets a locale for which collation rules are available.
int i18n_ucollator_count_available (int32_t *n_available)
 Determines how many locales have collation rules available.
int i18n_ucollator_create_available_locales (i18n_uenumeration_h *locales)
 Creates a string enumerator of all locales for which a valid collator may be created.
int i18n_ucollator_get_keywords (i18n_uenumeration_h *keywords)
 Creates a string enumerator of all possible keywords that are relevant to collation.
int i18n_ucollator_get_keyword_values (const char *keyword, i18n_uenumeration_h *keywords)
 Given a keyword, create a string enumeration of all values for that keyword that are currently in use.
int i18n_ucollator_get_keyword_values_for_locale (const char *key, const char *locale, i18n_ubool commonly_used, i18n_uenumeration_h *keywords)
 Given a key and a locale, returns an array of string values in a preferred order that would make a difference.
int i18n_ucollator_get_functional_equivalent (const char *keyword, const char *locale, int32_t dest_size, char *dest, i18n_ubool *is_available, int32_t *buffer_size)
 Returns the functionally equivalent locale for the specified input locale, with respect to given keyword, for the collation service.
int i18n_ucollator_get_rules (const i18n_ucollator_h collator, int32_t *length, const i18n_uchar **rules)
 Gets the collation tailoring rules from a i18n_ucollator_h.
int i18n_ucollator_get_sort_key (const i18n_ucollator_h collator, const i18n_uchar *src, int32_t src_length, int32_t dest_size, uint8_t *dest, int32_t *result_length)
 Gets a sort key for a string from a i18n_ucollator_h.
int i18n_ucollator_next_sort_key_part (const i18n_ucollator_h collator, i18n_uchar_iter_h *iter, uint32_t state[2], uint8_t *dest, int32_t count, int32_t *result_length)
 Gets the next count bytes of a sort key.
int i18n_ucollator_get_bound (const uint8_t *src, int32_t src_length, i18n_ucollator_bound_mode_e mode, uint32_t n_levels, uint8_t *dest, int32_t dest_length, int32_t *needed_size)
 Produces a bound for a given sortkey and a number of levels.
int i18n_ucollator_get_version (const i18n_ucollator_h collator, i18n_uversion_info info)
 Gets the version information for a i18n_ucollator_h.
int i18n_ucollator_get_uca_version (const i18n_ucollator_h collator, i18n_uversion_info info)
 Gets the UCA version information for a i18n_ucollator_h.
int i18n_ucollator_merge_sort_keys (const uint8_t *src1, int32_t src1_length, const uint8_t *src2, int32_t src2_length, int32_t dest_size, uint8_t *dest, int32_t *merged_length)
 Merges two sort keys.
int i18n_ucollator_set_attribute (i18n_ucollator_h collator, i18n_ucollator_attribute_e attr, i18n_ucollator_attribute_value_e val)
 Sets an attribute's value.
int i18n_ucollator_get_attribute (i18n_ucollator_h collator, i18n_ucollator_attribute_e attr, i18n_ucollator_attribute_value_e *val)
 Gets an attribute's value.
int i18n_ucollator_set_max_variable (i18n_ucollator_h collator, i18n_ucollator_reorder_code_e group)
 Sets the variable top to the top of the specified reordering group.
int i18n_ucollator_get_max_variable (i18n_ucollator_h collator, i18n_ucollator_reorder_code_e *group)
 Returns the maximum reordering group whose characters are affected by I18N_UCOLLATOR_ALTERNATE_HANDLING.
int i18n_ucollator_get_variable_top (i18n_ucollator_h collator, uint32_t *weight)
 Gets the variable top value of a i18n_ucollator_h.
int i18n_ucollator_safe_clone (i18n_ucollator_h collator, i18n_ucollator_h *clone)
 Clones the given collator, the cloning is thread-safe.
int i18n_ucollator_get_rules_ex (i18n_ucollator_h collator, i18n_ucollator_rule_option_e option, int32_t dest_size, i18n_uchar *dest, int32_t *current_rules)
 Returns the current rules.
int i18n_ucollator_get_locale_by_type (i18n_ucollator_h collator, i18n_ulocale_data_locale_type_e type, const char **locale)
 Gets the locale name of the collator.
int i18n_ucollator_get_tailored_set (i18n_ucollator_h collator, i18n_uset_h *uset)
 Gets a Unicode set that contains all the characters and sequences tailored in this collator.
int i18n_ucollator_clone_binary (i18n_ucollator_h collator, int32_t dest_size, uint8_t *dest, int32_t *image_size)
 Creates a binary image of a collator.
int i18n_ucollator_create_binary (const uint8_t *bin, int32_t length, i18n_ucollator_h base, i18n_ucollator_h *collator)
 Creates a collator from a collator binary image created using i18n_ucollator_create_binary().

Typedefs

typedef void * i18n_ucollator_h
 Structure representing a collator object instance.
typedef
i18n_ucollator_attribute_value_e 
i18n_ucollator_strength_e
 Enumeration in which the base letter represents a primary difference. Set comparison level to I18N_UCOLLATOR_PRIMARY to ignore secondary and tertiary differences. Use this to set the strength of an i18n_ucollator_h. Example of primary difference, "abc" < "abd" Diacritical differences on the same base letter represent a secondary difference. Set comparison level to I18N_UCOLLATOR_SECONDARY to ignore tertiary differences. Use this to set the strength of an i18n_ucollator_h. Example of secondary difference, "&auml;" >> "a". Uppercase and lowercase versions of the same character represent a tertiary difference. Set comparison level to I18N_UCOLLATOR_TERTIARY to include all comparison differences. Use this to set the strength of an i18n_ucollator_h. Example of tertiary difference, "abc" <<< "ABC". Two characters are considered "identical" when they have the same unicode spellings. I18N_UCOLLATOR_IDENTICAL. For example, "&auml;" == "&auml;". i18n_ucollator_strength_e is also used to determine the strength of sort keys generated from i18n_ucollator_h. These values can now be found in the i18n_ucollator_attribute_value_e enum.

Typedef Documentation

typedef void* i18n_ucollator_h

Structure representing a collator object instance.

Since :
2.3

Enumeration in which the base letter represents a primary difference. Set comparison level to I18N_UCOLLATOR_PRIMARY to ignore secondary and tertiary differences. Use this to set the strength of an i18n_ucollator_h. Example of primary difference, "abc" < "abd" Diacritical differences on the same base letter represent a secondary difference. Set comparison level to I18N_UCOLLATOR_SECONDARY to ignore tertiary differences. Use this to set the strength of an i18n_ucollator_h. Example of secondary difference, "&auml;" >> "a". Uppercase and lowercase versions of the same character represent a tertiary difference. Set comparison level to I18N_UCOLLATOR_TERTIARY to include all comparison differences. Use this to set the strength of an i18n_ucollator_h. Example of tertiary difference, "abc" <<< "ABC". Two characters are considered "identical" when they have the same unicode spellings. I18N_UCOLLATOR_IDENTICAL. For example, "&auml;" == "&auml;". i18n_ucollator_strength_e is also used to determine the strength of sort keys generated from i18n_ucollator_h. These values can now be found in the i18n_ucollator_attribute_value_e enum.

Since :
2.3

Enumeration Type Documentation

Enumeration for attributes that collation service understands. All the attributes can take I18N_UCOLLATOR_DEFAULT value, as well as the values specific to each one.

Since :
2.3
Enumerator:
I18N_UCOLLATOR_FRENCH_COLLATION 

Attribute for direction of secondary weights - used in Canadian French. Acceptable values are I18N_UCOLLATOR_ON, which results in secondary weights being considered backwards, and I18N_UCOLLATOR_OFF which treats secondary weights in the order they appear

I18N_UCOLLATOR_ALTERNATE_HANDLING 

Attribute for handling variable elements. Acceptable values are I18N_UCOLLATOR_NON_IGNORABLE (default) which treats all the codepoints with non-ignorable primary weights in the same way, and I18N_UCOLLATOR_SHIFTED which causes codepoints with primary weights that are equal or below the variable top value to be ignored at the primary level and moved to the quaternary level

I18N_UCOLLATOR_CASE_FIRST 

Controls the ordering of upper and lower case letters. Acceptable values are I18N_UCOLLATOR_OFF (default), which orders upper and lower case letters in accordance to their tertiary weights, I18N_UCOLLATOR_UPPER_FIRST which forces upper case letters to sort before lower case letters, and I18N_UCOLLATOR_LOWER_FIRST which does the opposite

I18N_UCOLLATOR_CASE_LEVEL 

Controls whether an extra case level (positioned before the third level) is generated or not. Acceptable values are I18N_UCOLLATOR_OFF (default), when case level is not generated, and I18N_UCOLLATOR_ON which causes the case level to be generated. Contents of the case level are affected by the value of the I18N_UCOLLATOR_CASE_FIRST attribute. A simple way to ignore accent differences in a string is to set the strength to I18N_UCOLLATOR_PRIMARY and enable case level

I18N_UCOLLATOR_NORMALIZATION_MODE 

Controls whether the normalization check and necessary normalizations are performed. When set to I18N_UCOLLATOR_OFF (default) no normalization check is performed. The correctness of the result is guaranteed only if the input data is in so-called FCD form (see users manual for more info). When set to I18N_UCOLLATOR_ON, an incremental check is performed to see whether the input data is in the FCD form. If the data is not in the FCD form, incremental NFD normalization is performed

I18N_UCOLLATOR_DECOMPOSITION_MODE 

An alias for the I18N_UCOLLATOR_NORMALIZATION_MODE attribute

I18N_UCOLLATOR_STRENGTH 

The strength attribute. Can be either I18N_UCOLLATOR_PRIMARY, I18N_UCOLLATOR_SECONDARY, I18N_UCOLLATOR_TERTIARY, I18N_UCOLLATOR_QUATERNARY, or I18N_UCOLLATOR_IDENTICAL. The usual strength for most locales (except Japanese) is tertiary. Quaternary strength is useful when combined with shifted setting for the alternate handling attribute and for JIS X 4061 collation, when it is used to distinguish between Katakana and Hiragana. Otherwise, quaternary level is affected only by the number of non-ignorable code points in the string. Identical strength is rarely useful, as it amounts to codepoints of the NFD form of the string

I18N_UCOLLATOR_NUMERIC_COLLATION 

When turned on, this attribute makes substrings of digits that are sort according to their numeric values. This is a way to get '100' to sort AFTER '2'. Note that the longest digit substring that can be treated as a single unit is 254 digits (not counting leading zeros). If a digit substring is longer than that, the digits beyond the limit will be treated as a separate digit substring. A "digit" in this sense is a code point with General_Category=Nd, which does not include circled numbers, roman numerals, and so on. Only a contiguous digit substring is considered, that is, non-negative integers without separators. There is no support for plus/minus signs, decimals, exponents, and so on

I18N_UCOLLATOR_ATTRIBUTE_COUNT 

The number of i18n_ucollator_attribute_e constants

Enumeration containing attribute values for controlling collation behavior. Here are all the allowable values. Not every attribute can take every value. The only universal value is I18N_UCOLLATOR_DEFAULT, which resets the attribute value to the predefined value for that locale.

Since :
2.3
Enumerator:
I18N_UCOLLATOR_DEFAULT 

Accepted by most attributes

I18N_UCOLLATOR_PRIMARY 

Primary collation strength

I18N_UCOLLATOR_SECONDARY 

Secondary collation strength

I18N_UCOLLATOR_TERTIARY 

Tertiary collation strength

I18N_UCOLLATOR_DEFAULT_STRENGTH 

Default collation strength

I18N_UCOLLATOR_QUATERNARY 

Quaternary collation strength

I18N_UCOLLATOR_IDENTICAL 

Identical collation strength

I18N_UCOLLATOR_OFF 

Turn the feature off - works for I18N_UCOLLATOR_FRENCH_COLLATION, I18N_UCOLLATOR_CASE_LEVEL & I18N_UCOLLATOR_DECOMPOSITION_MODE

I18N_UCOLLATOR_ON 

Turn the feature on - works for I18N_UCOLLATOR_FRENCH_COLLATION, I18N_UCOLLATOR_CASE_LEVEL & I18N_UCOLLATOR_DECOMPOSITION_MODE

I18N_UCOLLATOR_SHIFTED 

Valid for I18N_UCOLLATOR_ALTERNATE_HANDLING. Alternate handling will be shifted.

I18N_UCOLLATOR_NON_IGNORABLE 

Valid for I18N_UCOLLATOR_ALTERNATE_HANDLING. Alternate handling will be non ignorable.

I18N_UCOLLATOR_LOWER_FIRST 

Valid for I18N_UCOLLATOR_CASE_FIRST - lower case sorts before upper case.

I18N_UCOLLATOR_UPPER_FIRST 

Upper case sorts before lower case.

Enumeration that is taken by i18n_ucollator_get_bound().

Since :
4.0
Enumerator:
I18N_UCOLLATOR_BOUND_LOWER 

Lower bound.

I18N_UCOLLATOR_BOUND_UPPER 

Upper bound that will match strings of exact size.

I18N_UCOLLATOR_BOUND_UPPER_LONG 

Upper bound that will match all the strings that have the same initial substring as the given string.

I18N_UCOLLATOR_BOUND_VALUE_COUNT 

One more than the highest normal i18n_ucollator_bound_mode_e value.

Enumeration containing the codes for reordering segments of the collation table that are not script codes. These reordering codes are to be used in conjunction with the script codes.

Since :
4.0
Enumerator:
I18N_UCOLLATOR_REORDER_CODE_DEFAULT 

A special reordering code that is used to specify the default reordering codes for a locale.

I18N_UCOLLATOR_REORDER_CODE_NONE 

A special reordering code that is used to specify no reordering codes.

I18N_UCOLLATOR_REORDER_CODE_OTHERS 

A special reordering code that is used to specify all other codes used for reordering except for the codes listed as i18n_ucollator_reorder_code_e values and those listed explicitly in a reordering.

I18N_UCOLLATOR_REORDER_CODE_SPACE 

Characters with the space property. This is equivalent to the rule value "space".

I18N_UCOLLATOR_REORDER_CODE_FIRST 

The first entry in the enumeration of reordering groups. This is intended for use in range checking and enumeration of the reorder codes.

I18N_UCOLLATOR_REORDER_CODE_PUNCTUATION 

Characters with the punctuation property. This is equivalent to the rule value "punct".

I18N_UCOLLATOR_REORDER_CODE_SYMBOL 

Characters with the symbol property. This is equivalent to the rule value "symbol".

I18N_UCOLLATOR_REORDER_CODE_CURRENCY 

Characters with the currency property. This is equivalent to the rule value "currency".

I18N_UCOLLATOR_REORDER_CODE_DIGIT 

Characters with the digit property. This is equivalent to the rule value "digit".

I18N_UCOLLATOR_REORDER_CODE_LIMIT 

The limit of the reorder codes. This is intended for use in range checking and enumeration of the reorder codes.

Enumeration for source and target string comparison result. I18N_UCOLLATOR_LESS is returned if the source string is compared to be less than the target string in the i18n_ucollator_str_collator() method. I18N_UCOLLATOR_EQUAL is returned if the source string is compared to be equal to the target string in the i18n_ucollator_str_collator() method. I18N_UCOLLATOR_GREATER is returned if the source string is compared to be greater than the target string in the i18n_ucollator_str_collator() method.

Since :
2.3
Enumerator:
I18N_UCOLLATOR_EQUAL 

string a == string b

I18N_UCOLLATOR_GREATER 

string a > string b

I18N_UCOLLATOR_LESS 

string a < string b

Options for retrieving the rule string.

Since :
4.0
Enumerator:
I18N_UCOLLATOR_TAILORING_ONLY 

Retrieves the tailoring rules only. Same as calling the version of i18n_ucollator_get_rules() without i18n_ucollator_rule_option_e.

I18N_UCOLLATOR_FULL_RULES 

Retrieves the "UCA rules" concatenated with the tailoring rules. The "UCA rules" are an approximation of the root collator's sort order. See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales.


Function Documentation

int i18n_ucollator_clone_binary ( i18n_ucollator_h  collator,
int32_t  dest_size,
uint8_t *  dest,
int32_t *  image_size 
)

Creates a binary image of a collator.

This binary image can be stored and later used to instantiate a collator using i18n_ucollator_create_binary(). This API supports preflighting.

Since :
4.0
Parameters:
[in]collatorThe collator
[in]dest_sizeCapacity of the dest buffer
[out]destA fill-in buffer to receive the binary image
[out]image_sizeThe size of the image
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_create_binary()
int i18n_ucollator_count_available ( int32_t *  n_available)

Determines how many locales have collation rules available.

This function is most useful for determining the loop ending condition for calls to i18n_ucollator_get_available().

Since :
4.0
Parameters:
[out]n_availableThe number of locales for which collation rules are available
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
int i18n_ucollator_create ( const char *  locale,
i18n_ucollator_h collator 
)

Creates a i18n_ucollator_h for comparing strings.

For some languages, multiple collation types are available; for example, "de@collation=phonebook". Collation attributes can be specified via locale keywords as well, in the old locale extension syntax ("el@colCaseFirst=upper") or in language tag syntax ("el-u-kf-upper"). See User Guide: Collation API.

The i18n_ucollator_h is used in all the calls to the Collation service.
After finished, collator must be disposed off by calling i18n_ucollator_destroy().

Since :
2.3
Remarks:
Must release collator using i18n_ucollator_destroy().
Parameters:
[in]localeThe locale containing the required collation rules
Special values for locales can be passed in - if NULL is passed for the locale, the default locale collation rules will be used
If empty string ("") or "root" is passed, UCA rules will be used.
[out]collatori18n_ucollator_h, otherwise 0 if an error occurs
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_destroy()

Creates a string enumerator of all locales for which a valid collator may be created.

Since :
4.0
Remarks:
The locales should be released using i18n_uenumeration_destroy().
Parameters:
[out]localesA string enumeration over locale strings. The caller is responsible for releasing the result.
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
 #include <stdio.h>
 #include <stdlib.h>
 #include <utils_i18n.h>
 int main() {
    i18n_error_code_e error_code;
    i18n_uenumeration_h locales;

    error_code = i18n_ucollator_create_available_locales(&locales);
    if (error_code != I18N_ERROR_NONE) {
        printf("Error i18n_ucollator_create_available_locales: %d\n", error_code);
        return EXIT_FAILURE;
    }
    i18n_uenumeration_destroy(locales);
    return EXIT_SUCCESS;
 }
int i18n_ucollator_create_binary ( const uint8_t *  bin,
int32_t  length,
i18n_ucollator_h  base,
i18n_ucollator_h collator 
)

Creates a collator from a collator binary image created using i18n_ucollator_create_binary().

Binary image used in instantiation of the collator remains owned by the user and should stay around for the lifetime of the collator. The API also takes a base collator which must be the root collator.

Since :
4.0
Parameters:
[in]binThe binary image owned by the user and required through the lifetime of the collator
[in]lengthThe size of the image. If negative, the API will try to figure out the length of the image.
[in]baseBase collator, for lookup of untailored characters. Must be the root collator, must not be NULL. The base is required to be present through the lifetime of the collator.
[in]collatorThe newly created collator
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_clone_binary()
int i18n_ucollator_create_rules ( const i18n_uchar rules,
int32_t  rules_length,
i18n_ucollator_attribute_value_e  normalization_mode,
i18n_ucollator_strength_e  strength,
i18n_uparse_error_s parse_error,
i18n_ucollator_h collator 
)

Produces an i18n_ucollator_h instance according to the rules supplied.

The rules are used to change the default ordering, defined in the UCA in a process called tailoring. The resulting i18n_ucollator_h pointer can be used in the same way as the one obtained by i18n_ucollator_str_collator().

Since :
4.0
Remarks:
The collator should be released using i18n_ucollator_destroy().
Parameters:
[in]rulesA string describing the collation rules. For the syntax of the rules please see users guide
[in]rules_lengthThe length of rules, or -1 if null-terminated
[in]normalization_modeThe normalization mode, one of:
I18N_UCOLLATOR_OFF (expect the text to not need normalization),
I18N_UCOLLATOR_ON (normalize)
I18N_UCOLLATOR_DEFAULT (set the mode according to the rules)
[in]strengthThe default collation strength: One of
I18N_UCOLLATOR_PRIMARY
, I18N_UCOLLATOR_SECONDARY
, I18N_UCOLLATOR_TERTIARY
, I18N_UCOLLATOR_IDENTICAL
, I18N_UCOLLATOR_DEFAULT_STRENGTH - can be also set in the rules
[out]parse_errorA pointer to i18n_uparse_error_s to receive information about errors occurred during parsing. This argument can currently be set to NULL, but at users own risk. Please provide a real structure.
[out]collatorA pointer to a i18n_ucollator_h. It is not guaranteed that NULL be returned in case of error - please use status argument to check for errors.
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_create()
i18n_ucollator_safe_clone()
i18n_ucollator_destroy()
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <utils_i18n.h>
 int main() {
    i18n_error_code_e error_code;
    i18n_ucollator_h collator = NULL;
    i18n_uparse_error_s parse_error;
    const char *test_rules = "&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E";
    i18n_uchar rules[128];

    i18n_ustring_copy_ua(rules, test_rules);
    error_code = i18n_ucollator_create_rules(rules, strlen(test_rules), I18N_UCOLLATOR_ON, I18N_UCOLLATOR_DEFAULT_STRENGTH,
                                        &parse_error, &collator);
    if (error_code != I18N_ERROR_NONE) {
        printf("Error i18n_ucollator_create_rules: %d\n", error_code);
        return EXIT_FAILURE;
    }
    i18n_ucollator_destroy(collator);
    return EXIT_SUCCESS;
 }

Destroys a i18n_ucollator_h.

Once destroyed, the i18n_ucollator_h should not be used. Every created collator should be destroyed.

Since :
2.3
Parameters:
[in]collatorThe i18n_ucollator_h to close
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_create()
int i18n_ucollator_equal ( const i18n_ucollator_h  collator,
const i18n_uchar src,
int32_t  src_len,
const i18n_uchar target,
int32_t  target_len,
i18n_ubool equal 
)

Compares two strings for equality.

This function is equivalent to i18n_ucollator_str_collator().

Since :
2.3
Parameters:
[in]collatorThe i18n_ucollator_h containing the comparison rules
[in]srcThe source string
[in]src_lenThe length of the source, otherwise -1 if null-terminated
[in]targetThe target string
[in]target_lenThe length of the target, otherwise -1 if null-terminated
[out]equalIf true source is equal to target, otherwise false
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_str_collator()

Gets an attribute's value.

Since :
4.0
Parameters:
[in]collatorThe collator
[in]attrThe attribute type
[out]valThe attribute value
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
int i18n_ucollator_get_available ( int32_t  locale_index,
const char **  locale 
)

Gets a locale for which collation rules are available.

An i18n_ucollator_h in a locale returned by this function will perform the correct collation for the locale.

Since :
4.0
Remarks:
The locale should not be released.
Parameters:
[in]locale_indexThe index of the desired locale
[out]localeA locale for which collation rules are available, or 0 if none
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
    i18n_error_code_e error_code;
    const char *locale = NULL;
    int32_t locale_index = 0;

    i18n_ucollator_get_available(locale_index, &locale);
    if (error_code != I18N_ERROR_NONE) {
        printf("Error i18n_ucollator_get_available: %d\n", error_code);
        return EXIT_FAILURE;
    }
int i18n_ucollator_get_bound ( const uint8_t *  src,
int32_t  src_length,
i18n_ucollator_bound_mode_e  mode,
uint32_t  n_levels,
uint8_t *  dest,
int32_t  dest_length,
int32_t *  needed_size 
)

Produces a bound for a given sortkey and a number of levels.

Return value is always the number of bytes needed, regardless of whether the result buffer was big enough or even valid.

Resulting bounds can be used to produce a range of strings that are between upper and lower bounds. For example, if bounds are produced for a sortkey of string "smith", strings between upper and lower bounds with one level would include "Smith", "SMITH", "sMiTh".

There are two upper bounds that can be produced. If I18N_UCOLLATOR_BOUND_UPPER is produced, strings matched would be as above. However, if bound produced using UCOL_BOUND_UPPER_LONG is used, the above example will also match "Smithsonian" and similar.

Sort keys may be compared using strcmp.

Since :
4.0
Parameters:
[in]srcThe source sortkey
[in]src_lengthThe length of source, or -1 if null-terminated. (If an unmodified sortkey is passed, it is always null terminated).
[in]modeType of bound required. It can be I18N_UCOLLATOR_BOUND_LOWER, which produces a lower inclusive bound, I18N_UCOLLATOR_BOUND_UPPER, that produces upper bound that matches strings of the same length or I18N_UCOLLATOR_BOUND_UPPER_LONG that matches strings that have the same starting substring as the source string.
[in]n_levelsNumber of levels required in the resulting bound (for most uses, the recommended value is 1). See users guide for explanation on number of levels a sortkey can have.
[in]destA pointer to a buffer to receive the resulting sortkey.
[out]dest_lengthThe maximum size of result
[out]needed_sizeThe size needed to fully store the bound
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
int i18n_ucollator_get_contractions_and_expansions ( const i18n_ucollator_h  collator,
i18n_ubool  add_prefixes,
i18n_uset_h  contractions,
i18n_uset_h  expansions 
)

Gets a set containing the expansions defined by the collator.

The set includes both the root collator's expansions and the expansions defined by the tailoring.

Since :
4.0
Parameters:
[in]collatorThe collator
[in]add_prefixesAdd the prefix contextual elements to contractions
[out]contractionsIf not NULL, the set to hold the contractions
[out]expansionsIf not NULL, the set to hold the expansions
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
int i18n_ucollator_get_display_name ( const char *  obj_locale,
const char *  disp_locale,
int32_t  dest_size,
i18n_uchar dest,
int32_t *  display_size 
)

Gets the display name for an i18n_ucollator_h.

The display name is suitable for presentation to a user.

Since :
4.0
Parameters:
[in]obj_localeThe locale of the collator in question
[in]disp_localeThe locale for display
[in]dest_sizeThe maximum size of dest
[out]destA pointer to a buffer to receive the attribute
[out]display_sizeThe total buffer size needed; if greater than dest_size, the output was truncated
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
int i18n_ucollator_get_equivalent_reorder_codes ( int32_t  reorder_code,
int32_t  dest_size,
int32_t *  dest,
int32_t *  n_codes 
)

Retrieves the reorder codes that are grouped with the given reorder code.

Some reorder codes will be grouped and must reorder together. Beginning with ICU 55, scripts only reorder together if they are primary-equal, for example Hiragana and Katakana.

Since :
4.0
Parameters:
[in]reorder_codeThe reorder code to determine equivalence for
[in]dest_sizeThe length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any codes (pre-flighting).
[out]destThe array to fill with the script ordering
[out]n_codesThe number of reordering codes written to the dest array.
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_set_reorder_codes()
i18n_ucollator_get_reorder_codes()
i18n_uscript_code_e
i18n_ucollator_reorder_code_e
int i18n_ucollator_get_functional_equivalent ( const char *  keyword,
const char *  locale,
int32_t  dest_size,
char *  dest,
i18n_ubool is_available,
int32_t *  buffer_size 
)

Returns the functionally equivalent locale for the specified input locale, with respect to given keyword, for the collation service.

If two different input locale + keyword combinations produce the same result locale, then collators instantiated for these two different input locales will behave equivalently. The converse is not always true; two collators may in fact be equivalent, but return different results, due to internal details. The return result has no other meaning than that stated above, and implies nothing as to the relationship between the two locales. This is intended for use by applications who wish to cache collators, or otherwise reuse collators when possible. The functional equivalent may change over time.

Since :
4.0
Parameters:
[in]keywordA particular keyword as enumerated by i18n_ucollator_get_keywords()
[in]localeThe specified input locale
[in]dest_sizeCapacity of the dest buffer
[out]destThe functionally equivalent result locale
[out]is_availableIf non-NULL, indicates whether the specified input locale was 'available' to the collation service. A locale is defined as 'available' if it physically exists within the collation locale data.
[out]buffer_sizeThe actual buffer size needed for the locale. If greater than dest_size, the returned full name will be truncated and an error code will be returned.
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
int i18n_ucollator_get_keyword_values ( const char *  keyword,
i18n_uenumeration_h keywords 
)

Given a keyword, create a string enumeration of all values for that keyword that are currently in use.

Since :
4.0
Remarks:
The keywords should be released using i18n_uenumeration_destroy().
Parameters:
[in]keywordA particular keyword as enumerated by i18n_ucollator_get_keywords(). If any other keyword is passed in, returns I18N_ERROR_INVALID_PARAMETER
[out]keywordsA string enumeration over collation keyword values, or NULL upon error. The caller is responsible for releasing the result.
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
 #include <stdio.h>
 #include <stdlib.h>
 #include <utils_i18n.h>
 int main() {
    i18n_error_code_e error_code;
    i18n_uenumeration_h keyword;

    error_code = i18n_ucollator_get_keyword_values("collation", &keyword);
    if (error_code != I18N_ERROR_NONE) {
        printf("Error i18n_ucollator_get_keyword_values: %d\n", error_code);
        return EXIT_FAILURE;
    }
    i18n_uenumeration_destroy(keyword);
    return EXIT_SUCCESS;
 }
int i18n_ucollator_get_keyword_values_for_locale ( const char *  key,
const char *  locale,
i18n_ubool  commonly_used,
i18n_uenumeration_h keywords 
)

Given a key and a locale, returns an array of string values in a preferred order that would make a difference.

These are all and only those values where the creation of the service with the locale formed from the input locale plus input keyword and that value has different behavior than creation with the input locale alone.

Since :
4.0
Remarks:
The keywords should be released using i18n_uenumeration_destroy().
Parameters:
[in]keyOne of the keys supported by this service. For now, only "collation" is supported
[in]localeThe locale
[in]commonly_usedIf set to true it will return only commonly used values with the given locale in preferred order. Otherwise, it will return all the available values for the locale
[out]keywordsA string enumeration over keyword values for the given key and the locale
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
 #include <stdio.h>
 #include <stdlib.h>
 #include <utils_i18n.h>
 int main() {
    i18n_error_code_e error_code;
    i18n_uenumeration_h keywords;

    error_code = i18n_ucollator_get_keyword_values_for_locale("collation", "en_US", false, &keywords);
    if (error_code != I18N_ERROR_NONE) {
        printf("Error i18n_ucollator_get_keyword_values_for_locale: %d\n", error_code);
        return EXIT_FAILURE;
    }
    i18n_uenumeration_destroy(keywords);
    return EXIT_SUCCESS;
 }

Creates a string enumerator of all possible keywords that are relevant to collation.

At this point, the only recognized keyword for this service is "collation".

Since :
4.0
Remarks:
The keywords should be released using i18n_uenumeration_destroy().
Parameters:
[out]keywordsA string enumeration over locale strings. The caller is responsible for releasing the result.
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
 #include <stdio.h>
 #include <stdlib.h>
 #include <utils_i18n.h>
 int main() {
    i18n_error_code_e error_code;
    i18n_uenumeration_h keywords;

    error_code = i18n_ucollator_get_keywords(&keywords);
    if (error_code != I18N_ERROR_NONE) {
        printf("Error i18n_ucollator_get_keywords: %d\n", error_code);
        return EXIT_FAILURE;
    }
    i18n_uenumeration_destroy(keywords);
    return EXIT_SUCCESS;
 }
int i18n_ucollator_get_locale_by_type ( i18n_ucollator_h  collator,
i18n_ulocale_data_locale_type_e  type,
const char **  locale 
)

Gets the locale name of the collator.

If the collator is instantiated from the rules, then this function returns NULL.

Since :
4.0
Remarks:
The locale should not be released.
Parameters:
[in]collatorThe i18n_ucollator_h for which the locale is needed
[in]typeYou can choose between requested, valid and actual locale. For description see the definition of i18n_ulocale_data_locale_type_e.
[out]localeThe real locale name from which the collation data comes. If the collator was instantiated from rules, returns NULL.
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter

Returns the maximum reordering group whose characters are affected by I18N_UCOLLATOR_ALTERNATE_HANDLING.

Since :
4.0
Parameters:
[in]collatorThe collator
[out]groupThe maximum variable reordering group
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
int i18n_ucollator_get_reorder_codes ( const i18n_ucollator_h  collator,
int32_t  dest_size,
int32_t *  dest,
int32_t *  n_codes 
)

Retrieves the reordering codes for this collator.

These reordering codes are a combination of i18n_uscript_code_e codes and i18n_ucollator_reorder_code_e entries.

Since :
4.0
Parameters:
[in]collatorThe i18n_ucollator_h to query
[in]dest_sizeThe length of dest. If it is 0, then dest may be NULL and the function will only set the n_codes parameter to the length of the result without writing any codes (pre-flighting).
[out]destThe array to fill with the script ordering
[out]n_codesThe number of reordering codes written to the dest array
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_set_reorder_codes()
i18n_ucollator_get_equivalent_reorder_codes()
i18n_uscript_code_e
i18n_ucollator_reorder_code_e
int i18n_ucollator_get_rules ( const i18n_ucollator_h  collator,
int32_t *  length,
const i18n_uchar **  rules 
)

Gets the collation tailoring rules from a i18n_ucollator_h.

The rules will follow the rule syntax.

Since :
4.0
Remarks:
The rules should not be released.
Parameters:
[in]collatorThe i18n_ucollator_h to query
[out]lengthThe length of the rules
[out]rulesThe collation tailoring rules
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
int i18n_ucollator_get_rules_ex ( i18n_ucollator_h  collator,
i18n_ucollator_rule_option_e  option,
int32_t  dest_size,
i18n_uchar dest,
int32_t *  current_rules 
)

Returns the current rules.

Delta defines whether full rules are returned or just the tailoring. Returns number of i18n_uchar needed to store rules. If dest is NULL or dest_size is not enough to store rules, will store up to available space. See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales

Remarks:
i18n_ucollator_get_rules() should normally be used instead.
Since :
4.0
Parameters:
[in]collatorThe collator to get the rules from
[in]optionThe first sort key
[in]dest_sizeThe length of buffer to store rules in. If less than needed you'll get only the part that fits in.
[out]destThe buffer to store the result in. If NULL, you'll get no rules.
[out]current_rulesThe current rules.
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
I18N_UCOLLATOR_FULL_RULES
int i18n_ucollator_get_sort_key ( const i18n_ucollator_h  collator,
const i18n_uchar src,
int32_t  src_length,
int32_t  dest_size,
uint8_t *  dest,
int32_t *  result_length 
)

Gets a sort key for a string from a i18n_ucollator_h.

Sort keys may be compared using strcmp. Note that sort keys are often less efficient than simply doing comparison. For more details, see the ICU User Guide. Like ICU functions that write to an output buffer, the buffer contents is undefined if the buffer capacity (dest_size parameter) is too small. Unlike ICU functions that write a string to an output buffer, the terminating zero byte is counted in the sort key length.

Since :
4.0
Parameters:
[in]collatorThe i18n_ucollator_h containing the collation rules
[in]srcThe string to transform
[in]src_lengthThe length of source, or -1 if null-terminated
[in]dest_sizeThe maximum size of dest buffer
[out]destA pointer to a buffer to receive the attribute
[out]result_lengthThe size needed to fully store the sort key. If there was an internal error generating the sort key, a zero value is returned.
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter

Gets the collation strength used in an i18n_ucollator_h.

The strength influences how strings are compared.

Since :
4.0
Parameters:
[in]collatorThe i18n_ucollator_h to query
[out]strengthThe collation strength; One of I18N_UCOLLATOR_PRIMARY, I18N_UCOLLATOR_SECONDARY, I18N_UCOLLATOR_TERTIARY, I18N_UCOLLATOR_QUATERNARY, I18N_UCOLLATOR_IDENTICAL
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_set_strength()

Gets a Unicode set that contains all the characters and sequences tailored in this collator.

The result must be disposed of by using i18n_uset_destroy().

Since :
4.0
Remarks:
The uset should not be released.
Parameters:
[in]collatorThe i18n_ucollator_h for which we want to get tailored chars
[out]usetA pointer to the newly created i18n_uset_h. Must be be disposed by using i18n_uset_destroy().
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_create_rules()
i18n_ucollator_destroy()

Gets the UCA version information for a i18n_ucollator_h.

Since :
4.0
Parameters:
[in]collatorThe i18n_ucollator_h to query
[in]infoThe version information, the result will be filled in
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
int i18n_ucollator_get_variable_top ( i18n_ucollator_h  collator,
uint32_t *  weight 
)

Gets the variable top value of a i18n_ucollator_h.

Since :
4.0
Parameters:
[in]collatorThe collator, which variable top needs to be retrieve
[out]weightThe variable top primary weight
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_get_max_variable()
i18n_ucollator_get_variable_top()

Gets the version information for a i18n_ucollator_h.

Version is currently an opaque 32-bit number which depends, among other things, on major versions of the collator tailoring and UCA.

Since :
4.0
Parameters:
[in]collatorThe i18n_ucollator_h to query
[in]infoThe version information, the result will be filled in
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
int i18n_ucollator_greater ( const i18n_ucollator_h  collator,
const i18n_uchar src,
int32_t  src_len,
const i18n_uchar target,
int32_t  target_len,
i18n_ubool is_greater 
)

Determines if one string is greater than another.

This function is equivalent to i18n_ucollator_str_collator() == I18N_UCOLLATOR_GREATER

Since :
4.0
Parameters:
[in]collatorThe i18n_ucollator_h containing the comparison rules
[in]srcThe source string
[in]src_lenThe length of the src, otherwise -1 if null-terminated
[in]targetThe target string
[in]target_lenThe length of the target, otherwise -1 if null-terminated
[out]is_greaterTRUE if source is greater than target, FALSE otherwise
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_str_collator()
i18n_ucollator_greater_or_equal()
i18n_ucollator_equal()
int i18n_ucollator_greater_or_equal ( const i18n_ucollator_h  collator,
const i18n_uchar src,
int32_t  src_len,
const i18n_uchar target,
int32_t  target_len,
i18n_ubool result 
)

Determines if one string is greater than or equal to another.

This function is equivalent to i18n_ucollator_str_collator() != I18N_UCOLLATOR_LESS

Since :
4.0
Parameters:
[in]collatorThe i18n_ucollator_h containing the comparison rules
[in]srcThe source string
[in]src_lenThe length of the src, otherwise -1 if null-terminated
[in]targetThe target string
[in]target_lenThe length of the target, otherwise -1 if null-terminated
[out]resultTRUE if source string is greater than or equal to target, FALSE otherwise
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_str_collator()
i18n_ucollator_greater()
i18n_ucollator_equal()
int i18n_ucollator_merge_sort_keys ( const uint8_t *  src1,
int32_t  src1_length,
const uint8_t *  src2,
int32_t  src2_length,
int32_t  dest_size,
uint8_t *  dest,
int32_t *  merged_length 
)

Merges two sort keys.

The levels are merged with their corresponding counterparts (primaries with primaries, secondaries with secondaries etc.). Between the values from the same level a separator is inserted.

This is useful, for example, for combining sort keys from first and last names to sort such pairs. See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys

The recommended way to achieve "merged" sorting is by concatenating strings with U+FFFE between them. The concatenation has the same sort order as the merged sort keys, but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '' + str2). Using strings with U+FFFE may yield shorter sort keys.

For details about Sort Key Features see http://userguide.icu-project.org/collation/api#TOC-Sort-Key-Features

It is possible to merge multiple sort keys by consecutively merging another one with the intermediate result.

The length of the merge result is the sum of the lengths of the input sort keys.

Example (uncompressed):
191B1D 01 050505 01 910505 00
1F2123 01 050505 01 910505 00

will be merged as

191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00

If the destination buffer is not big enough, then its contents are undefined. If any of source lengths are zero or any of the source pointers are NULL / undefined, the result is of size zero.

Since :
4.0
Parameters:
[in]src1The first sort key
[in]src1_lengthThe length of the first sort key, including the zero byte at the end; can be -1 if the function is to find the length.
[in]src2The second sort key
[in]src2_lengthThe length of the second sort key, including the zero byte at the end; can be -1 if the function is to find the length.
[in]dest_sizeThe number of bytes in the dest buffer
[out]destThe buffer where the merged sort key is written, can be NULL if dest_size==0
[out]merged_lengthThe length of the merged sort key, src1_length+ src2_length; can be larger than dest_size, or 0 if an error occurs (only for illegal arguments), in which cases the contents of dest is undefined .
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
int i18n_ucollator_next_sort_key_part ( const i18n_ucollator_h  collator,
i18n_uchar_iter_h iter,
uint32_t  state[2],
uint8_t *  dest,
int32_t  count,
int32_t *  result_length 
)

Gets the next count bytes of a sort key.

Caller needs to preserve state array between calls and to provide the same type of i18n_ucollator_h set with the same string. The destination buffer provided must be big enough to store the number of requested bytes. The generated sort key may or may not be compatible with sort keys generated using i18n_ucollator_get_sort_key().

Since :
4.0
Parameters:
[in]collatorThe i18n_ucollator_h containing the collation rules
[in]iteri18n_uchar_iter_h containing the string we need the sort key to be calculated for
[in]stateOpaque state of sortkey iteration
[in]destBuffer to hold the resulting sortkey part
[out]countThe number of sort key bytes required
[out]result_lengthThe actual number of bytes of a sortkey. It can be smaller than count if we have reached the end of the sort key.
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter

Clones the given collator, the cloning is thread-safe.

Since :
4.0
Remarks:
The clone should be released using i18n_ucollator_destroy().
Parameters:
[in]collatorThe collator to be cloned
[out]cloneThe pointer to the new clone
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_create()
i18n_ucollator_create_rules()
i18n_ucollator_destroy()
 #include <stdio.h>
 #include <stdlib.h>
 #include <utils_i18n.h>
 int main() {
    i18n_error_code_e error_code;
    i18n_ucollator_h collator = NULL;
    i18n_ucollator_h clone = NULL;

  error_code = i18n_ucollator_create("en_US", &collator);
    if (error_code != I18N_ERROR_NONE) {
        printf("Error i18n_ucollator_create: %d\n", error_code);
        return EXIT_FAILURE;
    }
    error_code = i18n_ucollator_safe_clone(collator, &clone);
    if (error_code != I18N_ERROR_NONE) {
        printf("Error i18n_ucollator_safe_clone: %d\n", error_code);
        i18n_ucollator_destroy(collator);
        return EXIT_FAILURE;
    }
    i18n_ucollator_destroy(clone);
    i18n_ucollator_destroy(collator);
    return EXIT_SUCCESS;
 }

Sets an attribute's value.

Since :
2.3
Parameters:
[in]collatorThe i18n_ucollator_h containing attributes to be changed
[in]attrThe attribute type
[in]valThe attribute value
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter

Sets the variable top to the top of the specified reordering group.

The variable top determines the highest-sorting character which is affected by I18N_UCOLLATOR_ALTERNATE_HANDLING. If that attribute is set to I18N_UCOLLATOR_NON_IGNORABLE, then the variable top has no effect.

Since :
4.0
Parameters:
[in]collatorThe collator
[in]groupOne of:
I18N_UCOLLATOR_REORDER_CODE_SPACE
, I18N_UCOLLATOR_REORDER_CODE_PUNCTUATION
, I18N_UCOLLATOR_REORDER_CODE_SYMBOL
, I18N_UCOLLATOR_REORDER_CODE_CURRENCY
, I18N_UCOLLATOR_REORDER_CODE_DEFAULT to restore the default max variable group
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
int i18n_ucollator_set_reorder_codes ( i18n_ucollator_h  collator,
const int32_t *  reorder_codes,
int32_t  codes_length 
)

Sets the reordering codes for this collator.

Collation reordering allows scripts and some other groups of characters to be moved relative to each other. This reordering is done on top of the DUCET/CLDR standard collation order. Reordering can specify groups to be placed at the start and/or the end of the collation order. These groups are specified using i18n_uscript_code_e codes and i18n_ucollator_reorder_code_e entries.

By default, reordering codes specified for the start of the order are placed in the order given after several special non-script blocks. These special groups of characters are space, punctuation, symbol, currency, and digit. These special groups are represented with i18n_ucollator_reorder_code_e entries. Script groups can be intermingled with these special non-script groups if those special groups are explicitly specified in the reordering.

The special code OTHERS stands for any script that is not explicitly mentioned in the list of reordering codes given.

The special reorder code DEFAULT will reset the reordering for this collator to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that was specified when this collator was created from resource data or from rules. The DEFAULT code must be the sole code supplied when it is used. If not, then I18N_ERROR_INVALID_PARAMETER will be set.

The special reorder code NONE will remove any reordering for this collator. The result of setting no reordering will be to have the DUCET/CLDR ordering used.

Since :
4.0
Parameters:
[in]collatorThe i18n_ucollator_h to query
[in]reorder_codesAn array of script codes in the new order. This can be NULL if the codes_length is also set to 0. An empty array will clear any reordering codes on the collator.
[in]codes_lengthThe length of reorder_codes
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_get_reorder_codes()
i18n_ucollator_get_equivalent_reorder_codes()
i18n_uscript_code_e
i18n_ucollator_reorder_code_e

Sets the collation strength used in a collator.

The strength influences how strings are compared.

Since :
2.3
Parameters:
[in]collatorThe i18n_ucollator_h to set.
[in]strengthThe desired collation strength.
One of i18n_ucollator_strength_e
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
int i18n_ucollator_str_collator ( const i18n_ucollator_h  collator,
const i18n_uchar src,
int32_t  src_len,
const i18n_uchar target,
int32_t  target_len,
i18n_ucollator_result_e result 
)

Compares two strings.

The strings will be compared using the options already specified.

Since :
2.3
Parameters:
[in]collatorThe i18n_ucollator_h containing the comparison rules
[in]srcThe source string
[in]src_lenThe length of the source, otherwise -1 if null-terminated
[in]targetThe target string
[in]target_lenThe length of the target, otherwise -1 if null-terminated
[out]resultThe result of comparing the strings
One of I18N_UCOLLATOR_EQUAL, I18N_UCOLLATOR_GREATER, or I18N_UCOLLATOR_LESS
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_equal()
int i18n_ucollator_str_collator_iter ( const i18n_ucollator_h  collator,
const i18n_uchar_iter_h src_iter,
const i18n_uchar_iter_h dest_iter,
i18n_ucollator_result_e result 
)

Compares two UTF-8 encoded strings.

The strings will be compared using the options already specified.

Since :
4.0
Parameters:
[in]collatorThe i18n_ucollator_h containing the comparison rules
[in]src_iterThe source string iterator
[in]dest_iterThe dest string iterator
[out]resultThe result of comparing the strings
One of I18N_UCOLLATOR_EQUAL, I18N_UCOLLATOR_GREATER, or I18N_UCOLLATOR_LESS
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_equal()
int i18n_ucollator_str_collator_utf8 ( const i18n_ucollator_h  collator,
const char *  src,
int32_t  src_len,
const char *  target,
int32_t  target_len,
i18n_ucollator_result_e result 
)

Compares two strings in UTF-8.

The strings will be compared using the options already specified.

Remarks:
Note: When input string contains a malformed UTF-8 byte sequence, this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD).
Since :
4.0
Parameters:
[in]collatorThe i18n_ucollator_h containing the comparison rules
[in]srcThe source UTF-8 string
[in]src_lenThe length of the src, otherwise -1 if null-terminated
[in]targetThe target UTF-8 string
[in]target_lenThe length of the target, otherwise -1 if null-terminated
[out]resultThe result of comparing the strings
One of I18N_UCOLLATOR_EQUAL, I18N_UCOLLATOR_GREATER, or I18N_UCOLLATOR_LESS
Return values:
I18N_ERROR_NONESuccessful
I18N_ERROR_INVALID_PARAMETERInvalid function parameter
See also:
i18n_ucollator_greater()
i18n_ucollator_greater_or_equal()
i18n_ucollator_equal()