Tizen Native API
5.5
|
Ubidi module provides an implementation of the Unicode Bidirectional Algorithm.
#include <utils_i18n.h>
Ubidi module provides an implementation of the Unicode Bidirectional Algorithm. The algorithm is defined in the Unicode Standard Annex #9. Note: Libraries that perform a bidirectional algorithm and reorder strings accordingly are sometimes called "Storage Layout Engines". ubidi and shaping APIs can be used at the core of such "Storage Layout Engines".
Functions | |
int | i18n_ubidi_destroy (i18n_ubidi_h ubidi) |
This function must be called to free the memory associated with an i18n_ubidi_h handle. | |
int | i18n_ubidi_count_paragraphs (i18n_ubidi_h ubidi, int32_t *count) |
Gets the number of paragraphs. | |
int | i18n_ubidi_count_runs (i18n_ubidi_h ubidi, int32_t *count) |
Gets the number of runs. | |
int | i18n_ubidi_get_base_direction (const i18n_uchar *text, int32_t length, i18n_ubidi_direction_e *direction) |
Gets the base direction of the text provided according to the Unicode Bidirectional Algorithm. | |
int | i18n_ubidi_get_class_cb (i18n_ubidi_h ubidi, i18n_ubidi_class_cb *fn, const void **context) |
Gets the current callback function used for ubidi class determination. | |
int | i18n_ubidi_get_customized_class (i18n_ubidi_h ubidi, i18n_uchar32 c, i18n_uchar_direction_e *direction) |
Retrieves the ubidi class for a given code point. | |
int | i18n_ubidi_get_direction (const i18n_ubidi_h ubidi, i18n_ubidi_direction_e *direction) |
Gets the directionality of the text. | |
int | i18n_ubidi_get_length (const i18n_ubidi_h ubidi, int32_t *length) |
Gets the length of the text. | |
int | i18n_ubidi_get_level_at (const i18n_ubidi_h ubidi, int32_t char_index, i18n_ubidi_level_t *level) |
Gets the level for one character. | |
int | i18n_ubidi_get_levels (i18n_ubidi_h ubidi, const i18n_ubidi_level_t **levels) |
Gets an array of levels for each character. | |
int | i18n_ubidi_get_logical_index (i18n_ubidi_h ubidi, int32_t visual_index, int32_t *logical_index) |
Gets the logical text position from a visual position. | |
int | i18n_ubidi_get_logical_map (i18n_ubidi_h ubidi, int32_t *index_map) |
Gets a logical-to-visual index map (array) for the characters in the i18n_ubidi_h (paragraph or line) object. | |
int | i18n_ubidi_get_logical_run (const i18n_ubidi_h ubidi, int32_t logical_position, int32_t *logical_limit, i18n_ubidi_level_t *level) |
Gets a logical run. | |
int | i18n_ubidi_get_paragraph (const i18n_ubidi_h ubidi, int32_t char_index, int32_t *para_start, int32_t *para_limit, i18n_ubidi_level_t *para_level, int32_t *index) |
Gets a paragraph, given a position within the text. | |
int | i18n_ubidi_get_paragraph_by_index (const i18n_ubidi_h ubidi, int32_t para_index, int32_t *para_start, int32_t *para_limit, i18n_ubidi_level_t *para_level) |
Gets a paragraph, given the index of this paragraph. | |
int | i18n_ubidi_get_para_level (const i18n_ubidi_h ubidi, i18n_ubidi_level_t *level) |
Gets the paragraph level of the text. | |
int | i18n_ubidi_get_processed_length (const i18n_ubidi_h ubidi, int32_t *length) |
Gets the length of the source text processed by the last call to i18n_ubidi_set_para(). | |
int | i18n_ubidi_get_reordering_mode (i18n_ubidi_h ubidi, i18n_ubidi_reordering_mode_e *mode) |
Gets the requested reordering mode for a given i18n_ubidi_h object. | |
int | i18n_ubidi_get_reordering_options (i18n_ubidi_h ubidi, uint32_t *options) |
Gets the reordering options applied to a given i18n_ubidi_h object. | |
int | i18n_ubidi_get_result_length (const i18n_ubidi_h ubidi, int32_t *length) |
Gets the length of the reordered text resulting from the last call to i18n_ubidi_set_para(). | |
int | i18n_ubidi_get_text (const i18n_ubidi_h ubidi, char **text) |
Gets the pointer to the given i18n_ubidi_h object's text. | |
int | i18n_ubidi_get_visual_index (i18n_ubidi_h ubidi, int32_t logical_index, int32_t *visual_index) |
Gets the visual position from a logical text position. | |
int | i18n_ubidi_get_visual_map (i18n_ubidi_h ubidi, int32_t *index_map) |
Gets a visual-to-logical index map (array) for the characters in the i18n_ubidi_h (paragraph or line) object. | |
int | i18n_ubidi_get_visual_run (i18n_ubidi_h ubidi, int32_t run_index, int32_t *logical_index, int32_t *length, i18n_ubidi_direction_e *direction) |
Gets one run's logical start, length, and directionality, which can be 0 for LTR or 1 for RTL. | |
int | i18n_ubidi_invert_map (const int32_t *src_map, int32_t length, int32_t *dest_map) |
Inverts an index map. | |
int | i18n_ubidi_is_inverse (i18n_ubidi_h ubidi, i18n_ubool *is_inverse) |
Gets whether the given i18n_ubidi_h object is set to perform the inverse ubidi algorithm. | |
int | i18n_ubidi_is_order_paragraphs_ltr (i18n_ubidi_h ubidi, i18n_ubool *is_order) |
Gets whether the given i18n_ubidi_h object is set to allocate level 0 to block separators. | |
int | i18n_ubidi_create (i18n_ubidi_h *ubidi) |
Creates an ubidi object. | |
int | i18n_ubidi_create_sized (int32_t max_length, int32_t max_run_count, i18n_ubidi_h *ubidi) |
Creates an ubidi structure with preallocated memory for internal structures. | |
int | i18n_ubidi_order_paragraphs_ltr (i18n_ubidi_h ubidi, i18n_ubool order_paragraphs_ltr) |
Sets whether block separators must be allocated level zero, so that successive paragraphs will progress from left to right. | |
int | i18n_ubidi_reorder_logical (const i18n_ubidi_level_t *levels, int32_t length, int32_t *index_map) |
Performs logical reordering. | |
int | i18n_ubidi_reorder_visual (const i18n_ubidi_level_t *levels, int32_t length, int32_t *index_map) |
Performs visual reordering. | |
int | i18n_ubidi_set_class_cb (i18n_ubidi_h ubidi, i18n_ubidi_class_cb new_fn, const void *new_context, i18n_ubidi_class_cb *old_fn, const void **old_context) |
Sets the callback function and callback data used by the UBA implementation for ubidi class determination. | |
int | i18n_ubidi_set_context (i18n_ubidi_h ubidi, const i18n_uchar *prologue, int32_t pro_length, const i18n_uchar *epilogue, int32_t epi_length) |
Sets the context before a call to i18n_ubidi_set_para(). | |
int | i18n_ubidi_set_inverse (i18n_ubidi_h ubidi, i18n_ubool is_inverse) |
Modifies the operation of the ubidi algorithm such that it approximates an "inverse ubidi" algorithm. | |
int | i18n_ubidi_set_line (const i18n_ubidi_h para_bidi, int32_t start, int32_t limit, i18n_ubidi_h line_bidi) |
Sets an i18n_ubidi_h object to contain the reordering information, especially the resolved levels, for all the characters in a line of text. | |
int | i18n_ubidi_set_para (i18n_ubidi_h ubidi, const i18n_uchar *text, int32_t length, i18n_ubidi_level_t para_level, i18n_ubidi_level_t *embedding_levels) |
Performs the Unicode bidi algorithm. | |
int | i18n_ubidi_set_reordering_mode (i18n_ubidi_h ubidi, i18n_ubidi_reordering_mode_e reordering_mode) |
Modifies the operation of the ubidi algorithm such that it implements some variant to the basic ubidi algorithm or approximates an "inverse ubidi" algorithm, depending on different values of the "reordering mode". | |
int | i18n_ubidi_set_reordering_options (i18n_ubidi_h ubidi, uint32_t reordering_options) |
Specifies which of the reordering options should be applied during ubidi transformations. | |
int | i18n_ubidi_write_reordered (i18n_ubidi_h ubidi, uint16_t options, int32_t dest_size, i18n_uchar *dest, int32_t *output_length) |
Takes an i18n_ubidi_h object containing the reordering information for a piece of text (one or more paragraphs) set by i18n_ubidi_set_para() or for a line of text set by i18n_ubidi_set_line() and write a reordered string to the destination buffer. | |
int | i18n_ubidi_write_reverse (const i18n_uchar *src, int32_t src_length, uint16_t options, int32_t dest_size, i18n_uchar *dest, int32_t *output_length) |
Reverses a Right-To-Left run of Unicode text. | |
Typedefs | |
typedef void * | i18n_ubidi_h |
An i18n_ubidi_h handle. | |
typedef i18n_uchar_direction_e(* | i18n_ubidi_class_cb )(const void *context, i18n_uchar32 c) |
Callback type declaration for overriding default ubidi class values with custom ones. | |
typedef uint8_t | i18n_ubidi_level_t |
The type of the level values in this ubidi implementation. | |
Defines | |
#define | I18N_UBIDI_CLASS_DEFAULT I18N_UCHAR_U_CHAR_DIRECTION_COUNT |
Value returned by i18n_ubidi_class_cb() callbacks when there is no need to override the standard ubidi class for a given code point. | |
#define | I18N_UBIDI_DEFAULT_LTR 0xfe |
Paragraph level setting: Constant indicating that the base direction depends on the first strong directional character in the text according to the Unicode Bidirectional Algorithm. If no strong directional character is present, then set the paragraph level to 0 (left-to-right). | |
#define | I18N_UBIDI_DEFAULT_RTL 0xff |
Paragraph level setting: | |
#define | I18N_UBIDI_DO_MIRRORING 2 |
Option bit for i18n_ubidi_write_reordered(): replace characters with the "mirrored" property in RTL runs by their mirror-image mappings. | |
#define | I18N_UBIDI_INSERT_LRM_FOR_NUMERIC 4 |
Option bit for i18n_ubidi_write_reordered(): surround the run with LRMs if necessary; this is part of the approximate "inverse ubidi" algorithm. | |
#define | I18N_UBIDI_KEEP_BASE_COMBINING 1 |
Option bit for i18n_ubidi_write_reordered(): keep combining characters after their base characters in RTL runs. | |
#define | I18N_UBIDI_LEVEL_OVERRIDE 0x80 |
Bit flag for level input. | |
#define | I18N_UBIDI_MAP_NOWHERE (-1) |
Special value which can be returned by the mapping functions when a logical index has no corresponding visual index or vice-versa. | |
#define | I18N_UBIDI_MAX_EXPLICIT_LEVEL 125 |
Maximum explicit embedding level. | |
#define | I18N_UBIDI_OUTPUT_REVERSE 16 |
Option bit for i18n_ubidi_write_reordered(): write the output in reverse order. | |
#define | I18N_UBIDI_REMOVE_BIDI_CONTROLS 8 |
Option bit for i18n_ubidi_write_reordered(): remove ubidi control characters (this does not affect I18N_UBIDI_INSERT_LRM_FOR_NUMERIC). |
Value returned by i18n_ubidi_class_cb() callbacks when there is no need to override the standard ubidi class for a given code point.
#define I18N_UBIDI_DEFAULT_LTR 0xfe |
Paragraph level setting: Constant indicating that the base direction depends on the first strong directional character in the text according to the Unicode Bidirectional Algorithm. If no strong directional character is present, then set the paragraph level to 0 (left-to-right).
If this value is used in conjunction with reordering modes I18N_UBIDI_REORDER_INVERSE_LIKE_DIRECT or I18N_UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, the text to reorder is assumed to be visual LTR, and the text after reordering is required to be the corresponding logical string with appropriate contextual direction. The direction of the result string will be RTL if either the rightmost or leftmost strong character of the source text is RTL or Arabic Letter, the direction will be LTR otherwise.
If reordering option I18N_UBIDI_OPTION_INSERT_MARKS is set, an RLM may be added at the beginning of the result string to ensure round trip (that the result string, when reordered back to visual, will produce the original source text).
#define I18N_UBIDI_DEFAULT_RTL 0xff |
Paragraph level setting:
Constant indicating that the base direction depends on the first strong directional character in the text according to the Unicode Bidirectional Algorithm. If no strong directional character is present, then set the paragraph level to 1 (right-to-left).
If this value is used in conjunction with reordering modes I18N_UBIDI_REORDER_INVERSE_LIKE_DIRECT or I18N_UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, the text to reorder is assumed to be visual LTR, and the text after reordering is required to be the corresponding logical string with appropriate contextual direction. The direction of the result string will be RTL if either the rightmost or leftmost strong character of the source text is RTL or Arabic Letter, or if the text contains no strong character; the direction will be LTR otherwise.
If reordering option I18N_UBIDI_OPTION_INSERT_MARKS is set, an RLM may be added at the beginning of the result string to ensure round trip (that the result string, when reordered back to visual, will produce the original source text).
#define I18N_UBIDI_DO_MIRRORING 2 |
Option bit for i18n_ubidi_write_reordered(): replace characters with the "mirrored" property in RTL runs by their mirror-image mappings.
#define I18N_UBIDI_INSERT_LRM_FOR_NUMERIC 4 |
Option bit for i18n_ubidi_write_reordered(): surround the run with LRMs if necessary; this is part of the approximate "inverse ubidi" algorithm.
This option does not imply corresponding adjustment of the index mappings.
#define I18N_UBIDI_KEEP_BASE_COMBINING 1 |
Option bit for i18n_ubidi_write_reordered(): keep combining characters after their base characters in RTL runs.
#define I18N_UBIDI_LEVEL_OVERRIDE 0x80 |
Bit flag for level input.
Overrides directional properties.
#define I18N_UBIDI_MAP_NOWHERE (-1) |
Special value which can be returned by the mapping functions when a logical index has no corresponding visual index or vice-versa.
This may happen for the logical-to-visual mapping of a ubidi control when option I18N_UBIDI_OPTION_REMOVE_CONTROLS is specified. This can also happen for the visual-to-logical mapping of a ubidi mark (LRM or RLM) inserted by option I18N_UBIDI_OPTION_INSERT_MARKS.
#define I18N_UBIDI_MAX_EXPLICIT_LEVEL 125 |
Maximum explicit embedding level.
The maximum resolved level can be up to I18N_UBIDI_MAX_EXPLICIT_LEVEL + 1.
#define I18N_UBIDI_OUTPUT_REVERSE 16 |
Option bit for i18n_ubidi_write_reordered(): write the output in reverse order.
This has the same effect as calling i18n_ubidi_write_reordered() first without this option, and then calling i18n_ubidi_write_reordered() without mirroring. Doing this in the same step is faster and avoids a temporary buffer. An example for using this option is output to a character terminal that is designed for RTL scripts and stores text in reverse order.
#define I18N_UBIDI_REMOVE_BIDI_CONTROLS 8 |
Option bit for i18n_ubidi_write_reordered(): remove ubidi control characters (this does not affect I18N_UBIDI_INSERT_LRM_FOR_NUMERIC).
This option does not imply corresponding adjustment of the index mappings.
typedef i18n_uchar_direction_e(* i18n_ubidi_class_cb)(const void *context, i18n_uchar32 c) |
Callback type declaration for overriding default ubidi class values with custom ones.
Usually, the function pointer will be propagated to an i18n_ubidi_h handle by calling the i18n_ubidi_set_class_cb() function; then the callback will be invoked by the UBA implementation any time the class of a character is to be determined.
[in] | context | A pointer to the callback private data |
[in] | c | The code point to get a ubidi class for |
direction | The directional property / ubidi class for the given code point c if the default class has been overridden, or I18N_UBIDI_CLASS_DEFAULT if the standard ubidi class value for code point c is to be used. |
typedef void* i18n_ubidi_h |
typedef uint8_t i18n_ubidi_level_t |
The type of the level values in this ubidi implementation.
It holds an embedding level and indicates the visual direction by its bit 0 (even/odd value).
It can also hold non-level values for the para_level
and embedding_levels
arguments of i18n_ubidi_set_para(); there:
embedding_levels
[] value indicates whether the using application is specifying the level of a character to override whatever the ubidi implementation would resolve it to.para_level
can be set to the pseudo-level values I18N_UBIDI_DEFAULT_LTR and I18N_UBIDI_DEFAULT_RTL.The related constants are not real, valid level values. I18N_UBIDI_DEFAULT_XXX can be used to specify a default for the paragraph level for when the i18n_ubidi_set_para() function shall determine it but there is no strongly typed character in the input.
Note that the value for I18N_UBIDI_DEFAULT_LTR is even and the one for I18N_UBIDI_DEFAULT_RTL is odd, just like with normal LTR and RTL level values - these special values are designed that way. Also, the implementation assumes that I18N_UBIDI_MAX_EXPLICIT_LEVEL is odd.
Enumeration for text direction.
I18N_UBIDI_LTR |
Left-to-right text. This is a 0 value.
|
I18N_UBIDI_RTL |
Right-to-left text. This is a 1 value.
|
I18N_UBIDI_MIXED |
Mixed-directional text. As return value for i18n_ubidi_get_direction(), it means that the source string contains both left-to-right and right-to-left characters. |
I18N_UBIDI_NEUTRAL |
No strongly directional text. As return value for i18n_ubidi_get_direction(), it means that the source string is missing or empty, or contains neither left-to-right nor right-to-left characters. |
Enumeration for reordering mode.
These values indicate which variant of the ubidi algorithm to use.
I18N_UBIDI_REORDER_DEFAULT |
Regular Logical to Visual ubidi algorithm according to Unicode. This is a 0 value. |
I18N_UBIDI_REORDER_NUMBERS_SPECIAL |
Logical to Visual algorithm which handles numbers in a way which mimicks the behavior of Windows XP. |
I18N_UBIDI_REORDER_GROUP_NUMBERS_WITH_R |
Logical to Visual algorithm grouping numbers with adjacent R characters (reversible algorithm). |
I18N_UBIDI_REORDER_RUNS_ONLY |
Reorder runs only to transform a Logical LTR string to the Logical RTL string with the same display, or vice-versa. If this mode is set together with option I18N_UBIDI_OPTION_INSERT_MARKS, some ubidi controls in the source text may be removed and other controls may be added to produce the minimum combination which has the required display. |
I18N_UBIDI_REORDER_INVERSE_NUMBERS_AS_L |
Visual to Logical algorithm which handles numbers like L (same algorithm as selected by i18n_ubidi_set_inverse(true). |
I18N_UBIDI_REORDER_INVERSE_LIKE_DIRECT |
Visual to Logical algorithm equivalent to the regular Logical to Visual algorithm. |
I18N_UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL |
Inverse ubidi (Visual to Logical) algorithm for the I18N_UBIDI_REORDER_NUMBERS_SPECIAL ubidi algorithm. |
I18N_UBIDI_REORDER_COUNT |
Number of values for reordering mode. |
Enumeration for reordering options.
These values indicate which options are specified to affect the ubidi algorithm.
I18N_UBIDI_OPTION_DEFAULT |
Option value for i18n_ubidi_set_reordering_options(): disable all the options which can be set with this function. |
I18N_UBIDI_OPTION_INSERT_MARKS |
Option bit for i18n_ubidi_set_reordering_options(): insert ubidi marks (LRM or RLM) when needed to ensure correct result of a reordering to a Logical order. This option must be set or reset before calling i18n_ubidi_set_para(). This option is significant only with reordering modes which generate a result with Logical order, specifically:
If this option is set in conjunction with reordering mode I18N_UBIDI_REORDER_INVERSE_NUMBERS_AS_L or with calling i18n_ubidi_set_inverse(true), it implies option I18N_UBIDI_INSERT_LRM_FOR_NUMERIC in calls to function i18n_ubidi_write_reordered(). For other reordering modes, a minimum number of LRM or RLM characters will be added to the source text after reordering it so as to ensure round trip, i.e. when applying the inverse reordering mode on the resulting logical text with removal of ubidi marks (option I18N_UBIDI_OPTION_REMOVE_CONTROLS set before calling i18n_ubidi_set_para() or option I18N_UBIDI_REMOVE_BIDI_CONTROLS in i18n_ubidi_write_reordered(), the result will be identical to the source text in the first transformation. This option will be ignored if specified together with option I18N_UBIDI_OPTION_REMOVE_CONTROLS. It inhibits option I18N_UBIDI_REMOVE_BIDI_CONTROLS in calls to function i18n_ubidi_write_reordered() and it implies option I18N_UBIDI_INSERT_LRM_FOR_NUMERIC in calls to function i18n_ubidi_write_reordered() if the reordering mode is I18N_UBIDI_REORDER_INVERSE_NUMBERS_AS_L. |
I18N_UBIDI_OPTION_REMOVE_CONTROLS |
Option bit for i18n_ubidi_set_reordering_options(): remove ubidi control characters. This option must be set or reset before calling i18n_ubidi_set_para(). This option nullifies option I18N_UBIDI_OPTION_INSERT_MARKS. It inhibits option I18N_UBIDI_INSERT_LRM_FOR_NUMERIC in calls to function i18n_write_reordered() and it implies option I18N_UBIDI_REMOVE_BIDI_CONTROLS in calls to that function. |
I18N_UBIDI_OPTION_STREAMING |
Option bit for i18n_ubidi_set_reordering_options(): process the output as part of a stream to be continued. This option must be set or reset before calling i18n_ubidi_set_para(). This option specifies that the caller is interested in processing large text object in parts. The results of the successive calls are expected to be concatenated by the caller. Only the call for the last part will have this option bit off. When this option bit is on, i18n_ubidi_set_para() may process less than the full source text in order to truncate the text at a meaningful boundary. The caller should call i18n_ubidi_get_processed_length() immediately after calling i18n_ubidi_set_para() in order to determine how much of the source text has been processed. Source text beyond that length should be resubmitted in following calls to i18n_ubidi_set_para(). The processed length may be less than the length of the source text if a character preceding the last character of the source text constitutes a reasonable boundary (like a block separator) for text to be continued. If the last character of the source text constitutes a reasonable boundary, the whole text will be processed at once. If nowhere in the source text there exists such a reasonable boundary, the processed length will be zero. The caller should check for such an occurrence and do one of the following:
In all cases, this option should be turned off before processing the last part of the text. When the I18N_UBIDI_OPTION_STREAMING option is used, it is recommended to call i18n_ubidi_order_paragraphs_ltr() with argument order_paragraphs_ltr set to true before calling i18n_ubidi_set_para() so that later paragraphs may be concatenated to previous paragraphs on the right. |
int i18n_ubidi_count_paragraphs | ( | i18n_ubidi_h | ubidi, |
int32_t * | count | ||
) |
Gets the number of paragraphs.
[in] | ubidi | The paragraph or line i18n_ubidi_h object |
[out] | count | The number of paragraphs |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_count_runs | ( | i18n_ubidi_h | ubidi, |
int32_t * | count | ||
) |
Gets the number of runs.
This function may invoke the actual reordering on the i18n_ubidi_h handle, after i18n_ubidi_set_para() may have resolved only the levels of the text. Therefore, i18n_ubidi_count_runs() may have to allocate memory, and may fail doing so.
[in] | ubidi | The paragraph or line i18n_ubidi_h object |
[out] | count | The number of runs |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
I18N_ERROR_OUT_OF_MEMORY | Out of memory |
int i18n_ubidi_create | ( | i18n_ubidi_h * | ubidi | ) |
Creates an ubidi object.
Such an object is initially empty. It is assigned the ubidi properties of a piece of text containing one or more paragraphs by i18n_ubidi_set_para() or the ubidi properties of a line within a paragraph by i18n_ubidi_set_line().
This object can be reused for as long as it is not deallocated by calling i18n_ubidi_destroy().
i18n_ubidi_set_para() and i18n_ubidi_set_line() will allocate additional memory for internal structures as necessary.
[out] | ubidi | An empty i18n_ubidi_h object |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_create_sized | ( | int32_t | max_length, |
int32_t | max_run_count, | ||
i18n_ubidi_h * | ubidi | ||
) |
Creates an ubidi structure with preallocated memory for internal structures.
This function provides an ubidi object like i18n_ubidi_create() with no arguments, but it also preallocates memory for internal structures according to the sizings supplied by the caller.
Subsequent functions will not allocate any more memory, and are thus guaranteed not to fail because of lack of memory.
The preallocation can be limited to some of the internal memory by setting some values to 0 here. That means that if, e.g., max_run_count cannot be reasonably predetermined and should not be set to max_length (the only failproof value) to avoid wasting memory, then max_run_count could be set to 0 here and the internal structures that are associated with it will be allocated on demand, just like with i18n_ubidi_create().
[in] | max_length | The maximum text or line length that internal memory will be preallocated for. An attempt to associate this object with a longer text will fail, unless this value is 0, which leaves the allocation up to the implementation. |
[in] | max_run_count | The maximum anticipated number of same-level runs that internal memory will be preallocated for. An attempt to access visual runs on an object that was not preallocated for as many runs as the text was actually resolved to will fail, unless this value is 0, which leaves the allocation up to the implementation. The number of runs depends on the actual text and maybe anywhere between 1 and max_length. It is typically small |
[out] | ubidi | An empty i18n_ubidi_h handle with preallocated memory |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_destroy | ( | i18n_ubidi_h | ubidi | ) |
This function must be called to free the memory associated with an i18n_ubidi_h handle.
Important: A parent i18n_ubidi_h handle must not be destroyed or reused if it still has children. If an i18n_ubidi_h handle has become the child of another one (its parent) by calling i18n_ubidi_set_line(), then the child object must be destroyed or reused (by calling i18n_ubidi_set_para() or i18n_ubidi_set_line()) before the parent object.
[in] | ubidi | i18n_ubidi_h handle to be destroyed |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_base_direction | ( | const i18n_uchar * | text, |
int32_t | length, | ||
i18n_ubidi_direction_e * | direction | ||
) |
Gets the base direction of the text provided according to the Unicode Bidirectional Algorithm.
The base direction is derived from the first character in the string with bidirectional character type L, R, or AL. If the first such character has type L, I18N_UBIDI_LTR is returned. If the first such character has type R or AL, I18N_UBIDI_RTL is returned. If the string does not contain any character of these types, then I18N_UBIDI_NEUTRAL is returned.
This is a lightweight function for use when only the base direction is needed and no further bidi processing of the text is needed.
[in] | text | A pointer to the text whose base direction is needed. Note: the text must be (at least) length long. |
[in] | length | The length of the text; if length == -1 then the text must be zero-terminated |
[out] | direction | Base direction of the text |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_class_cb | ( | i18n_ubidi_h | ubidi, |
i18n_ubidi_class_cb * | fn, | ||
const void ** | context | ||
) |
Gets the current callback function used for ubidi class determination.
[in] | ubidi | The paragraph i18n_ubidi_h object |
[out] | fn | The callback function pointer. This can be NULL . |
[out] | context | The callback's private context. This can be NULL . |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_customized_class | ( | i18n_ubidi_h | ubidi, |
i18n_uchar32 | c, | ||
i18n_uchar_direction_e * | direction | ||
) |
Retrieves the ubidi class for a given code point.
If an i18n_ubidi_class_cb callback is defined and returns a value other than I18N_UBIDI_CLASS_DEFAULT, that value is used; otherwise the default class determination mechanism is invoked.
[in] | ubidi | The paragraph i18n_ubidi_h object |
[in] | c | The code point whose ubidi class must be retrieved |
[out] | direction | The ubidi class for character c based on the given ubidi instance |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_direction | ( | const i18n_ubidi_h | ubidi, |
i18n_ubidi_direction_e * | direction | ||
) |
Gets the directionality of the text.
[in] | ubidi | The paragraph or line i18n_ubidi_h object |
[out] | direction | A value of I18N_UBIDI_LTR, I18N_UBIDI_RTL or I18N_UBIDI_MIXED that indicates if the entire text represented by this object is unidirectional, and which direction, or if it is mixed-directional. Note - The value I18N_UBIDI_NEUTRAL is never returned from this method. |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_length | ( | const i18n_ubidi_h | ubidi, |
int32_t * | length | ||
) |
Gets the length of the text.
[in] | ubidi | The paragraph or line i18n_ubidi_h object |
[out] | length | The length of the text that the i18n_ubidi_h object was created for |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_level_at | ( | const i18n_ubidi_h | ubidi, |
int32_t | char_index, | ||
i18n_ubidi_level_t * | level | ||
) |
Gets the level for one character.
[in] | ubidi | The paragraph or line i18n_ubidi_h object |
[in] | char_index | The index of a character. It must be in the range [0..i18n_ubidi_get_processed_length(ubidi)-1] |
[out] | level | The level for the character at char_index (0 if char_index is not in the valid range) |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_levels | ( | i18n_ubidi_h | ubidi, |
const i18n_ubidi_level_t ** | levels | ||
) |
Gets an array of levels for each character.
Note that this function may allocate memory under some circumstances, unlike i18n_ubidi_get_level_at().
[in] | ubidi | The paragraph or line i18n_ubidi_h object, whose text length must be strictly positive |
[out] | levels | The levels array for the text, or NULL if an error occurs |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
I18N_ERROR_OUT_OF_MEMORY | Out of memory |
int i18n_ubidi_get_logical_index | ( | i18n_ubidi_h | ubidi, |
int32_t | visual_index, | ||
int32_t * | logical_index | ||
) |
Gets the logical text position from a visual position.
If such a mapping is used many times on the same i18n_ubidi_h object, then calling i18n_ubidi_get_visual_map() is more efficient.
The value returned may be I18N_UBIDI_MAP_NOWHERE if there is no logical position because the corresponding text character is a ubidi mark inserted in the output by option I18N_UBIDI_OPTION_INSERT_MARKS.
This is the inverse function to i18n_ubidi_get_visual_index().
When the visual output is altered by using options of i18n_ubidi_write_reordered() such as I18N_UBIDI_INSERT_LRM_FOR_NUMERIC, I18N_UBIDI_KEEP_BASE_COMBINING, I18N_UBIDI_OUTPUT_REVERSE, I18N_UBIDI_REMOVE_BIDI_CONTROLS, the logical position returned may not be correct. It is advised to use, when possible, reordering options such as I18N_UBIDI_OPTION_INSERT_MARKS and I18N_UBIDI_OPTION_REMOVE_CONTROLS.
[in] | ubidi | The paragraph or line i18n_ubidi_h object |
[in] | visual_index | The visual position of a character |
[out] | logical_index | The index of this character in the text |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_logical_map | ( | i18n_ubidi_h | ubidi, |
int32_t * | index_map | ||
) |
Gets a logical-to-visual index map (array) for the characters in the i18n_ubidi_h (paragraph or line) object.
Some values in the map may be I18N_UBIDI_MAP_NOWHERE if the corresponding text characters are ubidi controls removed from the visual output by the option I18N_UBIDI_OPTION_REMOVE_CONTROLS.
When the visual output is altered by using options of i18n_ubidi_write_reordered() such as I18N_UBIDI_INSERT_LRM_FOR_NUMERIC, I18N_UBIDI_KEEP_BASE_COMBINING, I18N_UBIDI_OUTPUT_REVERSE, I18N_UBIDI_REMOVE_BIDI_CONTROLS, the visual positions returned may not be correct. It is advised to use, when possible, reordering options such as I18N_UBIDI_OPTION_INSERT_MARKS and I18N_UBIDI_OPTION_REMOVE_CONTROLS.
Note that in right-to-left runs, this mapping places second surrogates before first ones (which is generally a bad idea) and combining characters before base characters. Use of i18n_ubidi_write_reordered(), optionally with the I18N_UBIDI_KEEP_BASE_COMBINING option can be considered instead of using the mapping, in order to avoid these issues.
[in] | ubidi | The paragraph or line i18n_ubidi_h object |
[out] | index_map | A pointer to an array of i18n_ubidi_get_processed_length() indexes which will reflect the reordering of the characters. If option I18N_UBIDI_OPTION_INSERT_MARKS is set, the number of elements allocated in the index_map must be no less than i18n_ubidi_get_result_length(). |
The array does not need to be initialized.
The index map will result in index_map[logical_index] == visual_index.
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_logical_run | ( | const i18n_ubidi_h | ubidi, |
int32_t | logical_position, | ||
int32_t * | logical_limit, | ||
i18n_ubidi_level_t * | level | ||
) |
Gets a logical run.
This function returns information about a run and is used to retrieve runs in logical order.
This is especially useful for line-breaking on a paragraph.
[in] | ubidi | The paragraph or line i18n_ubidi_h object |
[in] | logical_position | A logical position within the source text |
[out] | logical_limit | The limit of the corresponding run. The l-value that you point to here may be the same expression (variable) as the one for logical_position. This pointer can be NULL if this value is not necessary. |
[out] | level | The level of the corresponding run. This pointer can be NULL if this value is not necessary. |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_para_level | ( | const i18n_ubidi_h | ubidi, |
i18n_ubidi_level_t * | level | ||
) |
Gets the paragraph level of the text.
[in] | ubidi | The paragraph or line i18n_ubidi_h object |
[out] | level | The paragraph level. If there are multiple paragraphs, their level may vary if the required para_level is I18N_UBIDI_DEFAULT_LTR or I18N_UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph is returned. |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_paragraph | ( | const i18n_ubidi_h | ubidi, |
int32_t | char_index, | ||
int32_t * | para_start, | ||
int32_t * | para_limit, | ||
i18n_ubidi_level_t * | para_level, | ||
int32_t * | index | ||
) |
Gets a paragraph, given a position within the text.
This function returns information about a paragraph.
Note: if the paragraph index is known, it is more efficient to retrieve the paragraph information using i18n_ubidi_get_paragraph_by_index().
[in] | ubidi | The paragraph or line i18n_ubidi_h object |
[in] | char_index | The index of a character within the text, in the range [0..i18n_ubidi_get_processed_length(ubidi)-1] |
[out] | para_start | The index of the first character of the paragraph in the text. This pointer can be NULL if this value is not necessary. |
[out] | para_limit | The limit of the paragraph. The l-value that you point to here may be the same expression (variable) as the one for char_index. This pointer can be NULL if this value is not necessary. |
[out] | para_level | The level of the paragraph. This pointer can be NULL if this value is not necessary. |
[out] | index | The index of the paragraph containing the specified position |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_paragraph_by_index | ( | const i18n_ubidi_h | ubidi, |
int32_t | para_index, | ||
int32_t * | para_start, | ||
int32_t * | para_limit, | ||
i18n_ubidi_level_t * | para_level | ||
) |
Gets a paragraph, given the index of this paragraph.
This function returns information about paragraphs.
[in] | ubidi | The paragraph i18n_ubidi_h object |
[in] | para_index | The number of the paragraph, in the range [0..i18n_ubidi_count_paragraphs(ubidi)-1] |
[out] | para_start | The index of the first character of the paragraph in the text. This pointer can be NULL if this value is not necessary. |
[out] | para_limit | The limit of the paragraph. This pointer can be NULL if this value is not necessary. |
[out] | para_level | The level of the paragraph. This pointer can be NULL if this value is not necessary. |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_processed_length | ( | const i18n_ubidi_h | ubidi, |
int32_t * | length | ||
) |
Gets the length of the source text processed by the last call to i18n_ubidi_set_para().
This length may be different from the length of the source text if option I18N_UBIDI_OPTION_STREAMING has been set.
Note that whenever the length of the text affects the execution or the result of a function, it is the processed length which must be considered, except for i18n_ubidi_set_para() (which receives unprocessed source text) and i18n_ubidi_get_length() (which returns the original length of the source text).
In particular, the processed length is the one to consider in the following cases:
[in] | ubidi | The paragraph i18n_ubidi_h object |
[out] | length | The length of the part of the source text processed by the last call to i18n_ubidi_set_para() |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_reordering_mode | ( | i18n_ubidi_h | ubidi, |
i18n_ubidi_reordering_mode_e * | mode | ||
) |
Gets the requested reordering mode for a given i18n_ubidi_h object.
[in] | ubidi | An i18n_ubidi_h object |
[out] | mode | The current reordering mode of the ubidi object |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_reordering_options | ( | i18n_ubidi_h | ubidi, |
uint32_t * | options | ||
) |
Gets the reordering options applied to a given i18n_ubidi_h object.
[in] | ubidi | An i18n_ubidi_h object |
[out] | options | The current reordering options of the ubidi object; i18n_ubidi_reordering_option_e values combined with bitwise 'or' |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_result_length | ( | const i18n_ubidi_h | ubidi, |
int32_t * | length | ||
) |
Gets the length of the reordered text resulting from the last call to i18n_ubidi_set_para().
This length may be different from the length of the source text if option I18N_UBIDI_OPTION_INSERT_MARKS or option I18N_UBIDI_OPTION_REMOVE_CONTROLS has been set.
This resulting length is the one to consider in the following cases:
Note that this length stays identical to the source text length if ubidi marks are inserted or removed using option bits of i18n_ubidi_write_reordered(), or if option I18N_UBIDI_REORDER_INVERSE_NUMBERS_AS_L has been set.
[in] | ubidi | The paragraph i18n_ubidi_h object |
[out] | length | The length of the reordered text resulting from the last call to i18n_ubidi_set_para() |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_text | ( | const i18n_ubidi_h | ubidi, |
char ** | text | ||
) |
Gets the pointer to the given i18n_ubidi_h object's text.
[in] | ubidi | The paragraph or line i18n_ubidi_h object |
[out] | text | The pointer to the text that the ubidi object was created for |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
I18N_ERROR_OUT_OF_MEMORY | Out of memory |
int i18n_ubidi_get_visual_index | ( | i18n_ubidi_h | ubidi, |
int32_t | logical_index, | ||
int32_t * | visual_index | ||
) |
Gets the visual position from a logical text position.
If such a mapping is used many times on the same i18n_ubidi_h object, then calling i18n_ubidi_get_logical_map() is more efficient.
The value returned may be I18N_UBIDI_MAP_NOWHERE if there is no visual position because the corresponding text character is an ubidi control removed from output by the option I18N_UBIDI_OPTION_REMOVE_CONTROLS.
When the visual output is altered by using options of i18n_ubidi_write_reordered() such as I18N_UBIDI_INSERT_LRM_FOR_NUMERIC, I18N_UBIDI_KEEP_BASE_COMBINING, I18N_UBIDI_OUTPUT_REVERSE, I18N_UBIDI_REMOVE_BIDI_CONTROLS, the visual position returned may not be correct. It is advised to use, when possible, reordering options such as I18N_UBIDI_OPTION_INSERT_MARKS and I18N_UBIDI_OPTION_REMOVE_CONTROLS.
Note that in right-to-left runs, this mapping places second surrogates before first ones (which is generally a bad idea) and combining characters before base characters. Use of i18n_ubidi_write_reordered(), optionally with the I18N_UBIDI_KEEP_BASE_COMBINING option can be considered instead of using the mapping, in order to avoid these issues.
[in] | ubidi | The paragraph or line i18n_ubidi_h object |
[in] | logical_index | The index of a character in the text |
[out] | visual_index | The visual position of this character |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_visual_map | ( | i18n_ubidi_h | ubidi, |
int32_t * | index_map | ||
) |
Gets a visual-to-logical index map (array) for the characters in the i18n_ubidi_h (paragraph or line) object.
Some values in the map may be I18N_UBIDI_MAP_NOWHERE if the corresponding text characters are ubidi marks inserted in the visual output by the option I18N_UBIDI_OPTION_INSERT_MARKS.
When the visual output is altered by using options of i18n_ubidi_write_reordered() such as I18N_UBIDI_INSERT_LRM_FOR_NUMERIC, I18N_UBIDI_KEEP_BASE_COMBINING, I18N_UBIDI_OUTPUT_REVERSE, I18N_UBIDI_REMOVE_BIDI_CONTROLS, the logical positions returned may not be correct. It is advised to use, when possible, reordering options such as I18N_UBIDI_OPTION_INSERT_MARKS and I18N_UBIDI_OPTION_REMOVE_CONTROLS.
[in] | ubidi | The paragraph or line i18n_ubidi_h object |
[out] | index_map | Pointer to an array of i18n_ubidi_get_result_length() indexes which will reflect the reordering of the characters. If option I18N_UBIDI_OPTION_REMOVE_CONTROLS is set, the number of elements allocated in index_map must be no less than i18n_ubidi_get_processed_length(). The array does not need to be initialized. |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_get_visual_run | ( | i18n_ubidi_h | ubidi, |
int32_t | run_index, | ||
int32_t * | logical_index, | ||
int32_t * | length, | ||
i18n_ubidi_direction_e * | direction | ||
) |
Gets one run's logical start, length, and directionality, which can be 0 for LTR or 1 for RTL.
In an RTL run, the character at the logical start is visually on the right of the displayed run. The length is the number of characters in the run. i18n_ubidi_count_runs() should be called before the runs are retrieved.
Note that in right-to-left runs, code like this places second surrogates before first ones (which is generally a bad idea) and combining characters before base characters.
Use of i18n_ubidi_write_reordered(), optionally with the I18N_UBIDI_KEEP_BASE_COMBINING option, can be considered in order to avoid these issues.
[in] | ubidi | The paragraph or line i18n_ubidi_h object |
[in] | run_index | The number of the run in visual order, in the range [0..i18n_ubidi_count_runs(ubidi)-1] |
[out] | logical_index | The first logical character index in the text. The pointer may be NULL if this index is not needed |
[out] | length | The number of characters (at least one) in the run. The pointer may be NULL if this is not needed. |
[out] | direction | The directionality of the run, I18N_UBIDI_LTR == 0 or I18N_UBIDI_RTL == 1, never I18N_UBIDI_MIXED, never I18N_UBIDI_NEUTRAL. |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_invert_map | ( | const int32_t * | src_map, |
int32_t | length, | ||
int32_t * | dest_map | ||
) |
Inverts an index map.
The index mapping of the first map is inverted and written to the second one.
[in] | src_map | An array with length elements which defines the original mapping from a source array containing length elements to a destination array. Some elements of the source array may have no mapping in the destination array. In that case, their value will be the special value I18N_UBIDI_MAP_NOWHERE. All elements must be >=0 or equal to I18N_UBIDI_MAP_NOWHERE. Some elements may have a value >= length, if the destination array has more elements than the source array. There must be no duplicate indexes (two or more elements with the same value except I18N_UBIDI_MAP_NOWHERE). |
[in] | length | The length of each array |
[out] | dest_map | An array with a number of elements equal to 1 + the highest value in src_map. dest_map will be filled with the inverse mapping. If element with index i in src_map has a value k different from I18N_UBIDI_MAP_NOWHERE, this means that element i of the source array maps to element k in the destination array. The inverse map will have value i in its k-th element. For all elements of the destination array which do not map to an element in the source array, the corresponding element in the inverse map will have a value equal to I18N_UBIDI_MAP_NOWHERE. |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_is_inverse | ( | i18n_ubidi_h | ubidi, |
i18n_ubool * | is_inverse | ||
) |
Gets whether the given i18n_ubidi_h object is set to perform the inverse ubidi algorithm.
Note: calling this function after setting the reordering mode with i18n_ubidi_set_reordering_mode() will return true
if the reordering mode was set to I18N_UBIDI_REORDER_INVERSE_NUMBERS_AS_L, false
for all other values.
[in] | ubidi | An i18n_ubidi_h object |
[out] | is_inverse | true if the ubidi object is set to perform the inverse ubidi algorithm by handling numbers as L |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_is_order_paragraphs_ltr | ( | i18n_ubidi_h | ubidi, |
i18n_ubool * | is_order | ||
) |
Gets whether the given i18n_ubidi_h object is set to allocate level 0 to block separators.
This function gets the information whether the given i18n_ubidi_h object is set to allocate level 0 to block separators. so that successive paragraphs progress from left to right.
[in] | ubidi | An i18n_ubidi_h object |
[out] | is_order | true if the ubidi object is set to allocate level 0 to block separators |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_order_paragraphs_ltr | ( | i18n_ubidi_h | ubidi, |
i18n_ubool | order_paragraphs_ltr | ||
) |
Sets whether block separators must be allocated level zero, so that successive paragraphs will progress from left to right.
This function must be called before i18n_ubidi_set_para(). Paragraph separators (B) may appear in the text. Setting them to level zero means that all paragraph separators (including one possibly appearing in the last text position) are kept in the reordered text after the text that they follow in the source text. When this feature is not enabled, a paragraph separator at the last position of the text before reordering will go to the first position of the reordered text when the paragraph level is odd.
[in] | ubidi | An i18n_ubidi_h object |
[in] | order_paragraphs_ltr | Specifies whether paragraph separators (B) must receive level 0, so that successive paragraphs progress from left to right |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_reorder_logical | ( | const i18n_ubidi_level_t * | levels, |
int32_t | length, | ||
int32_t * | index_map | ||
) |
Performs logical reordering.
This is a convenience function that does not use an i18n_ubidi_h object. It is intended to be used for when an application has determined the levels of objects (character sequences) and just needs to have them reordered (L2). This is equivalent to using i18n_ubidi_get_logical_map() on an i18n_ubidi_h object.
The index map will result in index_map[logical_index] == visual_index.
[in] | levels | An array with length levels that have been determined by the application |
[in] | length | The number of levels in the array, or, semantically, the number of objects to be reordered. length must be > 0. |
[out] | index_map | Pointer to an array of length indexes which will reflect the reordering of the characters. The array does not need to be initialized. |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_reorder_visual | ( | const i18n_ubidi_level_t * | levels, |
int32_t | length, | ||
int32_t * | index_map | ||
) |
Performs visual reordering.
This is a convenience function that does not use an i18n_ubidi_h object. It is intended to be used for when an application has determined the levels of objects (character sequences) and just needs to have them reordered (L2). This is equivalent to using i18n_ubidi_get_visual_map() on an i18n_ubidi_h handle. The index map will result in index_map[visual_index] == logical_index.
[in] | levels | An array with length levels that have been determined by the application |
[in] | length | The number of levels in the array, or, semantically, the number of objects to be reordered. length must be > 0. |
[out] | index_map | Pointer to an array of length indexes which will reflect the reordering of the characters. The array does not need to be initialized. |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_set_class_cb | ( | i18n_ubidi_h | ubidi, |
i18n_ubidi_class_cb | new_fn, | ||
const void * | new_context, | ||
i18n_ubidi_class_cb * | old_fn, | ||
const void ** | old_context | ||
) |
Sets the callback function and callback data used by the UBA implementation for ubidi class determination.
This may be useful for assigning ubidi classes to PUA characters, or for special application needs. For instance, an application may want to handle all spaces like L or R characters (according to the base direction) when creating the visual ordering of logical lines which are part of a report organized in columns: there should not be interaction between adjacent cells.
[in] | ubidi | The paragraph i18n_ubidi_h object |
[in] | new_fn | The new callback function pointer |
[in] | new_context | The new callback context pointer. This can be NULL . |
[out] | old_fn | The old callback function pointer. This can be NULL . |
[out] | old_context | The old callback's context. This can be NULL . |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_set_context | ( | i18n_ubidi_h | ubidi, |
const i18n_uchar * | prologue, | ||
int32_t | pro_length, | ||
const i18n_uchar * | epilogue, | ||
int32_t | epi_length | ||
) |
Sets the context before a call to i18n_ubidi_set_para().
i18n_ubidi_set_para() computes the left-right directionality for a given piece of text which is supplied as one of its arguments. Sometimes this piece of text (the "main text") should be considered in context, because text appearing before ("prologue") and/or after ("epilogue") the main text may affect the result of this computation.
This function specifies the prologue and/or the epilogue for the next call to i18n_ubidi_set_para(). The characters specified as prologue and epilogue should not be modified by the calling program until the call to i18n_ubidi_set_para() has returned. If successive calls to i18n_ubidi_set_para() all need specification of a context, i18n_ubidi_set_context() must be called before each call to i18n_ubidi_set_para(). In other words, a context is not "remembered" after the following successful call to i18n_ubidi_set_para().
If a call to i18n_ubidi_set_para() specifies I18N_UBIDI_DEFAULT_LTR or I18N_UBIDI_DEFAULT_RTL as para_level and is preceded by a call to i18n_ubidi_set_context() which specifies a prologue, the paragraph level will be computed taking into consideration the text in the prologue.
When i18n_ubidi_set_para() is called without a previous call to i18n_ubidi_set_context(), the main text is handled as if preceded and followed by strong directional characters at the current paragraph level. Calling i18n_ubidi_set_context() with specification of a prologue will change this behavior by handling the main text as if preceded by the last strong character appearing in the prologue, if any. Calling i18n_ubidi_set_context() with specification of an epilogue will change the behavior of i18n_ubidi_set_para() by handling the main text as if followed by the first strong character or digit appearing in the epilogue, if any.
Note 1: if i18n_ubidi_set_context() is called repeatedly without calling i18n_ubidi_set_para(), the earlier calls have no effect, only the last call will be remembered for the next call to i18n_ubidi_set_para().
Note 2: calling i18n_ubidi_set_context(ubidi, NULL, 0, NULL, 0) cancels any previous setting of non-empty prologue or epilogue. The next call to i18n_ubidi_set_para() will process no prologue or epilogue.
Note 3: users must be aware that even after setting the context before a call to i18n_ubidi_set_para() to perform e.g. a logical to visual transformation, the resulting string may not be identical to what it would have been if all the text, including prologue and epilogue, had been processed together. Example (upper case letters represent RTL characters):
prologue = "abc DE"
epilogue = none
main text = "FGH xyz"
para_level = I18N_UBIDI_LTR
display without prologue = "HGF xyz"
("HGF" is adjacent to "xyz")
display with prologue = "abc HGFED xyz"
("HGF" is not adjacent to "xyz")
[in] | ubidi | A paragraph i18n_ubidi_h object |
[in] | prologue | Pointer to the text which precedes the text that will be specified in a coming call to i18n_ubidi_set_para(). If there is no prologue to consider, then pro_length must be zero and this pointer can be NULL . |
[in] | pro_length | The length of the prologue; if pro_length == -1 then the prologue must be zero-terminated. Otherwise pro_length must be >= 0. If pro_length == 0, it means that there is no prologue to consider. |
[in] | epilogue | A pointer to the text which follows the text that will be specified in a coming call to i18n_ubidi_set_para(). If there is no epilogue to consider, then epi_length must be zero and this pointer can be NULL . |
[in] | epi_length | The length of the epilogue; if epi_length == -1 then the epilogue must be zero-terminated. Otherwise epi_length must be >= 0. If epi_length == 0, it means that there is no epilogue to consider. |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_set_inverse | ( | i18n_ubidi_h | ubidi, |
i18n_ubool | is_inverse | ||
) |
Modifies the operation of the ubidi algorithm such that it approximates an "inverse ubidi" algorithm.
This function must be called before i18n_ubidi_set_para().
The normal operation of the ubidi algorithm as described in the Unicode Technical Report is to take text stored in logical (keyboard, typing) order and to determine the reordering of it for visual rendering. Some legacy systems store text in visual order, and for operations with standard, Unicode-based algorithms, the text needs to be transformed to logical order. This is effectively the inverse algorithm of the described ubidi algorithm. Note that there is no standard algorithm for this "inverse ubidi" and that the current implementation provides only an approximation of "inverse ubidi".
With is_inverse set to true
, this function changes the behavior of some of the subsequent functions in a way that they can be used for the inverse ubidi algorithm. Specifically, runs of text with numeric characters will be treated in a special way and may need to be surrounded with LRM characters when they are written in reordered sequence.
Output runs should be retrieved using i18n_ubidi_get_visual_run(). Since the actual input for "inverse ubidi" is visually ordered text and i18n_ubidi_get_visual_run() gets the reordered runs, these are actually the runs of the logically ordered output. Calling this function with argument is_inverse set to true
is equivalent to calling i18n_ubidi_set_reordering_mode() with argument reordering_mode set to I18N_UBIDI_REORDER_INVERSE_NUMBERS_AS_L.
Calling this function with argument is_inverse set to false
is equivalent to calling i18n_ubidi_set_reordering_mode() with argument reordering_mode set to I18N_UBIDI_REORDER_DEFAULT.
[in] | ubidi | An i18n_ubidi_h object |
[in] | is_inverse | Specifies "forward" or "inverse" ubidi operation |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_set_line | ( | const i18n_ubidi_h | para_bidi, |
int32_t | start, | ||
int32_t | limit, | ||
i18n_ubidi_h | line_bidi | ||
) |
Sets an i18n_ubidi_h object to contain the reordering information, especially the resolved levels, for all the characters in a line of text.
This line of text is specified by referring to an i18n_ubidi_h object representing this information for a piece of text containing one or more paragraphs, and by specifying a range of indexes in this text.
In the new line object, the indexes will range from 0 to limit - start - 1.
This is used after calling i18n_ubidi_set_para() for a piece of text, and after line-breaking on that text. It is not necessary if each paragraph is treated as a single line.
After line-breaking, rules (L1) and (L2) for the treatment of trailing WS and for reordering are performed on an i18n_ubidi_h object that represents a line.
Important: line_bidi shares data with para_bidi. You must destroy or reuse line_bidi before para_bidi. In other words, you must destroy or reuse the i18n_ubidi_h object for a line before the object for its parent paragraph.
The text pointer that was stored in para_bidi is also copied, and start is added to it so that it points to the beginning of the line for this object.
[in] | para_bidi | The parent paragraph object. It must have been set by a successful call to i18n_ubidi_set_para() |
[in] | start | The line's first index into the text |
[in] | limit | The index after the last line's index (its last index + 1) |
[in] | line_bidi | The object that will now represent a line of the text |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_set_para | ( | i18n_ubidi_h | ubidi, |
const i18n_uchar * | text, | ||
int32_t | length, | ||
i18n_ubidi_level_t | para_level, | ||
i18n_ubidi_level_t * | embedding_levels | ||
) |
Performs the Unicode bidi algorithm.
It is defined in the Unicode Standard Annex #9, version 13, also described in The Unicode Standard, Version 4.0 .
This function takes a piece of plain text containing one or more paragraphs, with or without externally specified embedding levels from styled text and computes the left-right-directionality of each character.
If the entire text is all of the same directionality, then the function may not perform all the steps described by the algorithm, i.e., some levels may not be the same as if all steps were performed. This is not relevant for unidirectional text. For example, in pure LTR text with numbers the numbers would get a resolved level of 2 higher than the surrounding text according to the algorithm. This implementation may set all resolved levels to the same value in such a case.
The text can be composed of multiple paragraphs. Occurrence of a block separator in the text terminates a paragraph, and whatever comes next starts a new paragraph. The exception to this rule is when a Carriage Return (CR) is followed by a Line Feed (LF). Both CR and LF are block separators, but in that case, the pair of characters is considered as terminating the preceding paragraph, and a new paragraph will be started by a character coming after the LF.
[in] | ubidi | An i18n_ubidi_h object allocated with i18n_ubidi_create() which will be set to contain the reordering information, especially the resolved levels for all the characters in the text |
[in] | text | A pointer to the text that the ubidi algorithm will be performed on. This pointer is stored in the i18n_ubidi_h handle and can be retrieved with i18n_ubidi_get_text(). Note: The text must be (at least) length long. |
[in] | length | The length of the text; if length == -1 then the text must be zero-terminated |
[in] | para_level | Specifies the default level for the text; it is typically 0 (LTR) or 1 (RTL). If the function shall determine the paragraph level from the text, then para_level can be set to either I18N_UBIDI_DEFAULT_LTR or I18N_UBIDI_DEFAULT_RTL; if the text contains multiple paragraphs, the paragraph level shall be determined separately for each paragraph; if a paragraph does not include any strongly typed character, then the desired default is used (0 for LTR or 1 for RTL). Any other value between 0 and I18N_UBIDI_MAX_EXPLICIT_LEVEL is also valid, with odd levels indicating RTL. |
[in] | embedding_levels | May be used to preset the embedding and override levels, ignoring characters like LRE and PDF in the text. A level overrides the directional property of its corresponding (same index) character if the level has the I18N_UBIDI_LEVEL_OVERRIDE bit set. Caution: A copy of this pointer, not of the levels, will be stored in the i18n_ubidi_h object; the embedding_levels array must not be deallocated before the i18n_ubidi_h structure is destroyed or reused, and the embedding_levels should not be modified to avoid unexpected results on subsequent ubidi operations. However, the i18n_ubidi_set_para() and i18n_ubidi_set_line() functions may modify some or all of the levels. |
After the i18n_ubidi_h object is reused or destroyed, the caller must take care of the deallocation of the embedding_levels array.
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_set_reordering_mode | ( | i18n_ubidi_h | ubidi, |
i18n_ubidi_reordering_mode_e | reordering_mode | ||
) |
Modifies the operation of the ubidi algorithm such that it implements some variant to the basic ubidi algorithm or approximates an "inverse ubidi" algorithm, depending on different values of the "reordering mode".
This function must be called before i18n_ubidi_set_para(), and stays in effect until called again with a different argument.
The normal operation of the ubidi algorithm as described in the Unicode Standard Annex #9 is to take text stored in logical (keyboard, typing) order and to determine how to reorder it for visual rendering.
With the reordering_mode set to a value other than I18N_UBIDI_REORDER_DEFAULT, this function changes the behavior of some of the subsequent functions in a way such that they implement an inverse ubidi algorithm or some other algorithm variants.
Some legacy systems store text in visual order, and for operations with standard, Unicode-based algorithms, the text needs to be transformed into logical order. This is effectively the inverse algorithm of the described ubidi algorithm. Note that there is no standard algorithm for this "inverse ubidi", so a number of variants are implemented here.
In other cases, it may be desirable to emulate some variant of the Logical to Visual algorithm (e.g. one used in MS Windows), or perform a Logical to Logical transformation.
The source text should not contain ubidi control characters other than LRM or RLM.
true
.In all the reordering modes specifying an "inverse ubidi" algorithm (i.e. those with a name starting with I18N_UBIDI_REORDER_INVERSE), output runs should be retrieved using i18n_ubidi_get_visual_run(), and the output text with i18n_ubidi_write_reordered(). The caller should keep in mind that in "inverse ubidi" modes the input is actually visually ordered text and reordered output returned by i18n_ubidi_get_visual_run() or i18n_ubidi_write_reordered() are actually runs or character string of logically ordered output. For all the "inverse ubidi" modes, the source text should not contain ubidi control characters other than LRM or RLM.
Note that option I18N_UBIDI_OUTPUT_REVERSE of i18n_ubidi_write_reordered() has no useful meaning and should not be used in conjunction with any value of the reordering_mode specifying "inverse ubidi" or with value I18N_UBIDI_REORDER_RUNS_ONLY.
[in] | ubidi | An i18n_ubidi_h object |
[in] | reordering_mode | Specifies the required variant of the ubidi algorithm |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_set_reordering_options | ( | i18n_ubidi_h | ubidi, |
uint32_t | reordering_options | ||
) |
Specifies which of the reordering options should be applied during ubidi transformations.
[in] | ubidi | An i18n_ubidi_h object |
[in] | reordering_options | A bitwise 'or' combination of zero or more of: I18N_UBIDI_OPTION_DEFAULT, I18N_UBIDI_OPTION_INSERT_MARKS, I18N_UBIDI_OPTION_REMOVE_CONTROLS, I18N_UBIDI_OPTION_STREAMING. |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_write_reordered | ( | i18n_ubidi_h | ubidi, |
uint16_t | options, | ||
int32_t | dest_size, | ||
i18n_uchar * | dest, | ||
int32_t * | output_length | ||
) |
Takes an i18n_ubidi_h object containing the reordering information for a piece of text (one or more paragraphs) set by i18n_ubidi_set_para() or for a line of text set by i18n_ubidi_set_line() and write a reordered string to the destination buffer.
This function preserves the integrity of characters with multiple code units and (optionally) combining characters. Characters in RTL runs can be replaced by mirror-image characters in the destination buffer. Note that "real" mirroring has to be done in a rendering engine by glyph selection and that for many "mirrored" characters there are no Unicode characters as mirror-image equivalents. There are also options to insert or remove ubidi control characters; see the description of the dest_size and options parameters and of the option bit flags.
[in] | ubidi | An i18n_ubidi_h object that is set by i18n_ubidi_set_para() or i18n_ubidi_set_line() and contains the reordering information for the text that it was defined for, as well as a pointer to that text. |
[in] | options | A bit set of options for the reordering that control how the reordered text is written. The options include mirroring the characters on a code point basis and inserting LRM characters, which is used especially for transforming visually stored text to logically stored text (although this is still an imperfect implementation of an "inverse ubidi" algorithm because it uses the "forward ubidi" algorithm at its core). The available options are: I18N_UBIDI_DO_MIRRORING, I18N_UBIDI_INSERT_LRM_FOR_NUMERIC, I18N_UBIDI_KEEP_BASE_COMBINING, I18N_UBIDI_OUTPUT_REVERSE, I18N_UBIDI_REMOVE_BIDI_CONTROLS. |
[in] | dest_size | The size of the dest buffer, in number of i18n_uchars. If the I18N_UBIDI_INSERT_LRM_FOR_NUMERIC option is set, then the destination length could be as large as i18n_ubidi_get_length(ubidi)+2*i18n_ubidi_count_runs(ubidi). If the I18N_UBIDI_REMOVE_BIDI_CONTROLS option is set, then the destination length may be less than i18n_ubidi_get_length(ubidi). If none of these options is set, then the destination length will be exactly i18n_ubidi_get_processed_length(ubidi). |
[out] | dest | A pointer to where the reordered text is to be copied. The source text and the destination buffer dest must not overlap. |
[out] | output_length | The length of the output string |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |
int i18n_ubidi_write_reverse | ( | const i18n_uchar * | src, |
int32_t | src_length, | ||
uint16_t | options, | ||
int32_t | dest_size, | ||
i18n_uchar * | dest, | ||
int32_t * | output_length | ||
) |
Reverses a Right-To-Left run of Unicode text.
This function preserves the integrity of characters with multiple code units and (optionally) combining characters. Characters can be replaced by mirror-image characters in the destination buffer. Note that "real" mirroring has to be done in a rendering engine by glyph selection and that for many "mirrored" characters there are no Unicode characters as mirror-image equivalents. There are also options to insert or remove ubidi control characters.
This function is the implementation for reversing RTL runs as part of i18n_ubidi_write_reordered(). For detailed descriptions of the parameters, see there. Since no ubidi controls are inserted here, the output string length will never exceed src_length.
[in] | src | A pointer to the RTL run text |
[in] | src_length | The length of the RTL run |
[in] | options | A bit set of options for the reordering that control how the reordered text is written. See the options parameter in i18n_ubidi_write_reordered(). |
[in] | dest_size | The size of the dest buffer, in number of i18n_uchars. If the I18N_UBIDI_REMOVE_BIDI_CONTROLS option is set, then the destination length may be less than src_length. If this option is not set, then the destination length will be exactly src_length. |
[out] | dest | A pointer to where the reordered text is to be copied. src and dest arrays (of length src_length and dest_size, respectively) must not overlap. |
[out] | output_length | The length of the output string |
0
on success, otherwise a negative error value I18N_ERROR_NONE | Successful |
I18N_ERROR_INVALID_PARAMETER | Invalid function parameter |