ICU 72.1 72.1
|
C API: Transliterator. More...
#include "unicode/utypes.h"
#include "unicode/urep.h"
#include "unicode/parseerr.h"
#include "unicode/uenum.h"
#include "unicode/uset.h"
#include "unicode/localpointer.h"
Go to the source code of this file.
Data Structures | |
struct | UTransPosition |
Position structure for utrans_transIncremental() incremental transliteration. More... | |
Namespaces | |
namespace | icu |
File coll.h. | |
Typedefs | |
typedef void * | UTransliterator |
An opaque transliterator for use in C. More... | |
typedef enum UTransDirection | UTransDirection |
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator. More... | |
typedef struct UTransPosition | UTransPosition |
Position structure for utrans_transIncremental() incremental transliteration. More... | |
Enumerations | |
enum | UTransDirection { UTRANS_FORWARD , UTRANS_REVERSE } |
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator. More... | |
Functions | |
U_CAPI UTransliterator * | utrans_openU (const UChar *id, int32_t idLength, UTransDirection dir, const UChar *rules, int32_t rulesLength, UParseError *parseError, UErrorCode *pErrorCode) |
Open a custom transliterator, given a custom rules string OR a system transliterator, given its ID. More... | |
U_CAPI UTransliterator * | utrans_openInverse (const UTransliterator *trans, UErrorCode *status) |
Open an inverse of an existing transliterator. More... | |
U_CAPI UTransliterator * | utrans_clone (const UTransliterator *trans, UErrorCode *status) |
Create a copy of a transliterator. More... | |
U_CAPI void | utrans_close (UTransliterator *trans) |
Close a transliterator. More... | |
U_CAPI const UChar * | utrans_getUnicodeID (const UTransliterator *trans, int32_t *resultLength) |
Return the programmatic identifier for this transliterator. More... | |
U_CAPI void | utrans_register (UTransliterator *adoptedTrans, UErrorCode *status) |
Register an open transliterator with the system. More... | |
U_CAPI void | utrans_unregisterID (const UChar *id, int32_t idLength) |
Unregister a transliterator from the system. More... | |
U_CAPI void | utrans_setFilter (UTransliterator *trans, const UChar *filterPattern, int32_t filterPatternLen, UErrorCode *status) |
Set the filter used by a transliterator. More... | |
U_CAPI int32_t | utrans_countAvailableIDs (void) |
Return the number of system transliterators. More... | |
U_CAPI UEnumeration * | utrans_openIDs (UErrorCode *pErrorCode) |
Return a UEnumeration for the available transliterators. More... | |
U_CAPI void | utrans_trans (const UTransliterator *trans, UReplaceable *rep, const UReplaceableCallbacks *repFunc, int32_t start, int32_t *limit, UErrorCode *status) |
Transliterate a segment of a UReplaceable string. More... | |
U_CAPI void | utrans_transIncremental (const UTransliterator *trans, UReplaceable *rep, const UReplaceableCallbacks *repFunc, UTransPosition *pos, UErrorCode *status) |
Transliterate the portion of the UReplaceable text buffer that can be transliterated unambiguously. More... | |
U_CAPI void | utrans_transUChars (const UTransliterator *trans, UChar *text, int32_t *textLength, int32_t textCapacity, int32_t start, int32_t *limit, UErrorCode *status) |
Transliterate a segment of a UChar* string. More... | |
U_CAPI void | utrans_transIncrementalUChars (const UTransliterator *trans, UChar *text, int32_t *textLength, int32_t textCapacity, UTransPosition *pos, UErrorCode *status) |
Transliterate the portion of the UChar* text buffer that can be transliterated unambiguously. More... | |
U_CAPI int32_t | utrans_toRules (const UTransliterator *trans, UBool escapeUnprintable, UChar *result, int32_t resultLength, UErrorCode *status) |
Create a rule string that can be passed to utrans_openU to recreate this transliterator. More... | |
U_CAPI USet * | utrans_getSourceSet (const UTransliterator *trans, UBool ignoreFilter, USet *fillIn, UErrorCode *status) |
Returns the set of all characters that may be modified in the input text by this UTransliterator, optionally ignoring the transliterator's current filter. More... | |
UTransliterator * | utrans_open (const char *id, UTransDirection dir, const UChar *rules, int32_t rulesLength, UParseError *parseError, UErrorCode *status) |
Deprecated, use utrans_openU() instead. More... | |
int32_t | utrans_getID (const UTransliterator *trans, char *buf, int32_t bufCapacity) |
Deprecated, use utrans_getUnicodeID() instead. More... | |
void | utrans_unregister (const char *id) |
Deprecated, use utrans_unregisterID() instead. More... | |
int32_t | utrans_getAvailableID (int32_t index, char *buf, int32_t bufCapacity) |
Deprecated, use utrans_openIDs() instead. More... | |
C API: Transliterator.
The data structures and functions described in this header provide transliteration services. Transliteration services are implemented as C++ classes. The comments and documentation in this header assume the reader is familiar with the C++ headers translit.h and associated documentation.
A significant but incomplete subset of the C++ transliteration services are available to C code through this header. In order to access more complex transliteration services, refer to the C++ headers and documentation.
There are two sets of functions for working with transliterator IDs:
An old, deprecated set uses char * IDs, which works for true and pure identifiers that these APIs were designed for, for example "Cyrillic-Latin". It does not work when the ID contains filters ("[:Script=Cyrl:]") or even a complete set of rules because then the ID string contains more than just "invariant" characters (see utypes.h).
A new set of functions replaces the old ones and uses UChar * IDs, paralleling the UnicodeString IDs in the C++ API. (New in ICU 2.8.)
Definition in file utrans.h.
typedef enum UTransDirection UTransDirection |
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.
Specified when a transliterator is opened. An "A-B" transliterator transliterates A to B when operating in the forward direction, and B to A when operating in the reverse direction.
typedef void* UTransliterator |
An opaque transliterator for use in C.
Open with utrans_openxxx() and close with utrans_close() when done. Equivalent to the C++ class Transliterator and its subclasses.
typedef struct UTransPosition UTransPosition |
Position structure for utrans_transIncremental() incremental transliteration.
This structure defines two substrings of the text being transliterated. The first region, [contextStart, contextLimit), defines what characters the transliterator will read as context. The second region, [start, limit), defines what characters will actually be transliterated. The second region should be a subset of the first.
After a transliteration operation, some of the indices in this structure will be modified. See the field descriptions for details.
contextStart <= start <= limit <= contextLimit
Note: All index values in this structure must be at code point boundaries. That is, none of them may occur between two code units of a surrogate pair. If any index does split a surrogate pair, results are unspecified.
enum UTransDirection |
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.
Specified when a transliterator is opened. An "A-B" transliterator transliterates A to B when operating in the forward direction, and B to A when operating in the reverse direction.
U_CAPI UTransliterator * utrans_clone | ( | const UTransliterator * | trans, |
UErrorCode * | status | ||
) |
Create a copy of a transliterator.
Any non-NULL result from this function should later be closed with utrans_close().
trans | the transliterator to be copied. |
status | a pointer to the UErrorCode |
U_CAPI void utrans_close | ( | UTransliterator * | trans | ) |
Close a transliterator.
Any non-NULL pointer returned by utrans_openXxx() or utrans_clone() should eventually be closed.
trans | the transliterator to be closed. |
U_CAPI int32_t utrans_countAvailableIDs | ( | void | ) |
Return the number of system transliterators.
It is recommended to use utrans_openIDs() instead.
int32_t utrans_getAvailableID | ( | int32_t | index, |
char * | buf, | ||
int32_t | bufCapacity | ||
) |
Deprecated, use utrans_openIDs() instead.
Return the ID of the index-th system transliterator. The result is placed in the given buffer. If the given buffer is too small, the initial substring is copied to buf. The result in buf is always zero-terminated.
index | the number of the transliterator to return. Must satisfy 0 <= index < utrans_countAvailableIDs(). If index is out of range then it is treated as if it were 0. |
buf | the buffer in which to receive the ID. This may be NULL, in which case no characters are copied. |
bufCapacity | the capacity of the buffer. Ignored if buf is NULL. |
int32_t utrans_getID | ( | const UTransliterator * | trans, |
char * | buf, | ||
int32_t | bufCapacity | ||
) |
Deprecated, use utrans_getUnicodeID() instead.
Return the programmatic identifier for this transliterator. If this identifier is passed to utrans_open(), it will open a transliterator equivalent to this one, if the ID has been registered.
trans | the transliterator to return the ID of. |
buf | the buffer in which to receive the ID. This may be NULL, in which case no characters are copied. |
bufCapacity | the capacity of the buffer. Ignored if buf is NULL. |
U_CAPI USet * utrans_getSourceSet | ( | const UTransliterator * | trans, |
UBool | ignoreFilter, | ||
USet * | fillIn, | ||
UErrorCode * | status | ||
) |
Returns the set of all characters that may be modified in the input text by this UTransliterator, optionally ignoring the transliterator's current filter.
trans | The transliterator. |
ignoreFilter | If false, the returned set incorporates the UTransliterator's current filter; if the filter is changed, the return value of this function will change. If true, the returned set ignores the effect of the UTransliterator's current filter. |
fillIn | Pointer to a USet object to receive the modifiable characters set. Previous contents of fillIn are lost. If fillIn is NULL, then a new USet is created and returned. The caller owns the result and must dispose of it by calling uset_close. |
status | A pointer to the UErrorCode. |
U_CAPI const UChar * utrans_getUnicodeID | ( | const UTransliterator * | trans, |
int32_t * | resultLength | ||
) |
Return the programmatic identifier for this transliterator.
If this identifier is passed to utrans_openU(), it will open a transliterator equivalent to this one, if the ID has been registered.
trans | the transliterator to return the ID of. |
resultLength | pointer to an output variable receiving the length of the ID string; can be NULL |
UTransliterator * utrans_open | ( | const char * | id, |
UTransDirection | dir, | ||
const UChar * | rules, | ||
int32_t | rulesLength, | ||
UParseError * | parseError, | ||
UErrorCode * | status | ||
) |
Deprecated, use utrans_openU() instead.
Open a custom transliterator, given a custom rules string OR a system transliterator, given its ID.
Any non-NULL result from this function should later be closed with utrans_close().
id | a valid ID, as returned by utrans_getAvailableID() |
dir | the desired direction |
rules | the transliterator rules. See the C++ header rbt.h for rules syntax. If NULL then a system transliterator matching the ID is returned. |
rulesLength | the length of the rules, or -1 if the rules are zero-terminated. |
parseError | a pointer to a UParseError struct to receive the details of any parsing errors. This parameter may be NULL if no parsing error details are desired. |
status | a pointer to the UErrorCode |
U_CAPI UEnumeration * utrans_openIDs | ( | UErrorCode * | pErrorCode | ) |
Return a UEnumeration for the available transliterators.
pErrorCode | Pointer to the UErrorCode in/out parameter. |
U_CAPI UTransliterator * utrans_openInverse | ( | const UTransliterator * | trans, |
UErrorCode * | status | ||
) |
Open an inverse of an existing transliterator.
For this to work, the inverse must be registered with the system. For example, if the Transliterator "A-B" is opened, and then its inverse is opened, the result is the Transliterator "B-A", if such a transliterator is registered with the system. Otherwise the result is NULL and a failing UErrorCode is set. Any non-NULL result from this function should later be closed with utrans_close().
trans | the transliterator to open the inverse of. |
status | a pointer to the UErrorCode |
U_CAPI UTransliterator * utrans_openU | ( | const UChar * | id, |
int32_t | idLength, | ||
UTransDirection | dir, | ||
const UChar * | rules, | ||
int32_t | rulesLength, | ||
UParseError * | parseError, | ||
UErrorCode * | pErrorCode | ||
) |
Open a custom transliterator, given a custom rules string OR a system transliterator, given its ID.
Any non-NULL result from this function should later be closed with utrans_close().
id | a valid transliterator ID |
idLength | the length of the ID string, or -1 if NUL-terminated |
dir | the desired direction |
rules | the transliterator rules. See the C++ header rbt.h for rules syntax. If NULL then a system transliterator matching the ID is returned. |
rulesLength | the length of the rules, or -1 if the rules are NUL-terminated. |
parseError | a pointer to a UParseError struct to receive the details of any parsing errors. This parameter may be NULL if no parsing error details are desired. |
pErrorCode | a pointer to the UErrorCode |
U_CAPI void utrans_register | ( | UTransliterator * | adoptedTrans, |
UErrorCode * | status | ||
) |
Register an open transliterator with the system.
When utrans_open() is called with an ID string that is equal to that returned by utrans_getID(adoptedTrans,...), then utrans_clone(adoptedTrans,...) is returned.
NOTE: After this call the system owns the adoptedTrans and will close it. The user must not call utrans_close() on adoptedTrans.
adoptedTrans | a transliterator, typically the result of utrans_openRules(), to be registered with the system. |
status | a pointer to the UErrorCode |
U_CAPI void utrans_setFilter | ( | UTransliterator * | trans, |
const UChar * | filterPattern, | ||
int32_t | filterPatternLen, | ||
UErrorCode * | status | ||
) |
Set the filter used by a transliterator.
A filter can be used to make the transliterator pass certain characters through untouched. The filter is expressed using a UnicodeSet pattern. If the filterPattern is NULL or the empty string, then the transliterator will be reset to use no filter.
trans | the transliterator |
filterPattern | a pattern string, in the form accepted by UnicodeSet, specifying which characters to apply the transliteration to. May be NULL or the empty string to indicate no filter. |
filterPatternLen | the length of filterPattern, or -1 if filterPattern is zero-terminated |
status | a pointer to the UErrorCode |
U_CAPI int32_t utrans_toRules | ( | const UTransliterator * | trans, |
UBool | escapeUnprintable, | ||
UChar * | result, | ||
int32_t | resultLength, | ||
UErrorCode * | status | ||
) |
Create a rule string that can be passed to utrans_openU to recreate this transliterator.
trans | The transliterator |
escapeUnprintable | if true then convert unprintable characters to their hex escape representations, \uxxxx or \Uxxxxxxxx. Unprintable characters are those other than U+000A, U+0020..U+007E. |
result | A pointer to a buffer to receive the rules. |
resultLength | The maximum size of result. |
status | A pointer to the UErrorCode. In case of error status, the contents of result are undefined. |
U_CAPI void utrans_trans | ( | const UTransliterator * | trans, |
UReplaceable * | rep, | ||
const UReplaceableCallbacks * | repFunc, | ||
int32_t | start, | ||
int32_t * | limit, | ||
UErrorCode * | status | ||
) |
Transliterate a segment of a UReplaceable string.
The string is passed in as a UReplaceable pointer rep and a UReplaceableCallbacks function pointer struct repFunc. Functions in the repFunc struct will be called in order to modify the rep string.
trans | the transliterator |
rep | a pointer to the string. This will be passed to the repFunc functions. |
repFunc | a set of function pointers that will be used to modify the string pointed to by rep. |
start | the beginning index, inclusive; 0 <= start <= limit . |
limit | pointer to the ending index, exclusive; start <= limit <= repFunc->length(rep) . Upon return, *limit will contain the new limit index. The text previously occupying [start, limit) has been transliterated, possibly to a string of a different length, at [start, new-limit) , where new-limit is the return value. |
status | a pointer to the UErrorCode |
U_CAPI void utrans_transIncremental | ( | const UTransliterator * | trans, |
UReplaceable * | rep, | ||
const UReplaceableCallbacks * | repFunc, | ||
UTransPosition * | pos, | ||
UErrorCode * | status | ||
) |
Transliterate the portion of the UReplaceable text buffer that can be transliterated unambiguously.
This method is typically called after new text has been inserted, e.g. as a result of a keyboard event. The transliterator will try to transliterate characters of rep
between index.cursor
and index.limit
. Characters before index.cursor
will not be changed.
Upon return, values in index
will be updated. index.start
will be advanced to the first character that future calls to this method will read. index.cursor
and index.limit
will be adjusted to delimit the range of text that future calls to this method may change.
Typical usage of this method begins with an initial call with index.start
and index.limit
set to indicate the portion of text
to be transliterated, and index.cursor == index.start
. Thereafter, index
can be used without modification in future calls, provided that all changes to text
are made via this method.
This method assumes that future calls may be made that will insert new text into the buffer. As a result, it only performs unambiguous transliterations. After the last call to this method, there may be untransliterated text that is waiting for more input to resolve an ambiguity. In order to perform these pending transliterations, clients should call utrans_trans() with a start of index.start and a limit of index.end after the last call to this method has been made.
trans | the transliterator |
rep | a pointer to the string. This will be passed to the repFunc functions. |
repFunc | a set of function pointers that will be used to modify the string pointed to by rep. |
pos | a struct containing the start and limit indices of the text to be read and the text to be transliterated |
status | a pointer to the UErrorCode |
U_CAPI void utrans_transIncrementalUChars | ( | const UTransliterator * | trans, |
UChar * | text, | ||
int32_t * | textLength, | ||
int32_t | textCapacity, | ||
UTransPosition * | pos, | ||
UErrorCode * | status | ||
) |
Transliterate the portion of the UChar* text buffer that can be transliterated unambiguously.
See utrans_transIncremental(). The string is passed in in a UChar* buffer. The string is modified in place. If the result is longer than textCapacity, it is truncated. The actual length of the result is returned in *textLength, if textLength is non-NULL. *textLength may be greater than textCapacity, but only textCapacity UChars will be written to *text, including the zero terminator. See utrans_transIncremental() for usage details.
trans | the transliterator |
text | a pointer to a buffer containing the text to be transliterated on input and the result text on output. |
textLength | a pointer to the length of the string in text. If the length is -1 then the string is assumed to be zero-terminated. Upon return, the new length is stored in *textLength. If textLength is NULL then the string is assumed to be zero-terminated. |
textCapacity | the length of the text buffer |
pos | a struct containing the start and limit indices of the text to be read and the text to be transliterated |
status | a pointer to the UErrorCode |
U_CAPI void utrans_transUChars | ( | const UTransliterator * | trans, |
UChar * | text, | ||
int32_t * | textLength, | ||
int32_t | textCapacity, | ||
int32_t | start, | ||
int32_t * | limit, | ||
UErrorCode * | status | ||
) |
Transliterate a segment of a UChar* string.
The string is passed in in a UChar* buffer. The string is modified in place. If the result is longer than textCapacity, it is truncated. The actual length of the result is returned in *textLength, if textLength is non-NULL. *textLength may be greater than textCapacity, but only textCapacity UChars will be written to *text, including the zero terminator.
trans | the transliterator |
text | a pointer to a buffer containing the text to be transliterated on input and the result text on output. |
textLength | a pointer to the length of the string in text. If the length is -1 then the string is assumed to be zero-terminated. Upon return, the new length is stored in *textLength. If textLength is NULL then the string is assumed to be zero-terminated. |
textCapacity | the length of the text buffer |
start | the beginning index, inclusive; 0 <= start <= limit . |
limit | pointer to the ending index, exclusive; start <= limit <= repFunc->length(rep) . Upon return, *limit will contain the new limit index. The text previously occupying [start, limit) has been transliterated, possibly to a string of a different length, at [start, new-limit) , where new-limit is the return value. |
status | a pointer to the UErrorCode |
void utrans_unregister | ( | const char * | id | ) |
Deprecated, use utrans_unregisterID() instead.
Unregister a transliterator from the system. After this call the system will no longer recognize the given ID when passed to utrans_open(). If the id is invalid then nothing is done.
id | a zero-terminated ID |
Unregister a transliterator from the system.
After this call the system will no longer recognize the given ID when passed to utrans_open(). If the ID is invalid then nothing is done.
id | an ID to unregister |
idLength | the length of id, or -1 if id is zero-terminated |