ICU 72.1 72.1
ucsdet.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 **********************************************************************
5 * Copyright (C) 2005-2013, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * file name: ucsdet.h
9 * encoding: UTF-8
10 * indentation:4
11 *
12 * created on: 2005Aug04
13 * created by: Andy Heninger
14 *
15 * ICU Character Set Detection, API for C
16 *
17 * Draft version 18 Oct 2005
18 *
19 */
20
21#ifndef __UCSDET_H
22#define __UCSDET_H
23
24#include "unicode/utypes.h"
25
26#if !UCONFIG_NO_CONVERSION
27
28#include "unicode/uenum.h"
29
30#if U_SHOW_CPLUSPLUS_API
32#endif // U_SHOW_CPLUSPLUS_API
33
58struct UCharsetDetector;
64
65struct UCharsetMatch;
72
81U_CAPI UCharsetDetector * U_EXPORT2
83
93U_CAPI void U_EXPORT2
95
96#if U_SHOW_CPLUSPLUS_API
97
98U_NAMESPACE_BEGIN
99
110
111U_NAMESPACE_END
112
113#endif
114
130U_CAPI void U_EXPORT2
131ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status);
132
133
152U_CAPI void U_EXPORT2
153ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status);
154
155
181U_CAPI const UCharsetMatch * U_EXPORT2
183
184
215U_CAPI const UCharsetMatch ** U_EXPORT2
216ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status);
217
218
219
235U_CAPI const char * U_EXPORT2
237
261U_CAPI int32_t U_EXPORT2
263
293U_CAPI const char * U_EXPORT2
295
296
319U_CAPI int32_t U_EXPORT2
321 UChar *buf, int32_t cap, UErrorCode *status);
322
323
324
353U_CAPI UEnumeration * U_EXPORT2
355
367U_CAPI UBool U_EXPORT2
369
370
382U_CAPI UBool U_EXPORT2
384
385#ifndef U_HIDE_INTERNAL_API
399U_CAPI UEnumeration * U_EXPORT2
401
415U_CAPI void U_EXPORT2
416ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status);
417#endif /* U_HIDE_INTERNAL_API */
418
419#endif
420#endif /* __UCSDET_H */
421
422
"Smart pointer" class, closes a UCharsetDetector via ucsdet_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
U_CAPI const char * ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status)
Get the RFC 3066 code for the language of the input data.
U_CAPI UCharsetDetector * ucsdet_open(UErrorCode *status)
Open a charset detector.
U_CAPI UBool ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter)
Enable filtering of input text.
U_CAPI int32_t ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status)
Get a confidence number for the quality of the match of the byte data with the charset.
struct UCharsetMatch UCharsetMatch
Opaque structure representing a match that was identified from a charset detection operation.
Definition: ucsdet.h:71
U_CAPI void ucsdet_close(UCharsetDetector *ucsd)
Close a charset detector.
U_CAPI UBool ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd)
Test whether input filtering is enabled for this charset detector.
U_CAPI int32_t ucsdet_getUChars(const UCharsetMatch *ucsm, UChar *buf, int32_t cap, UErrorCode *status)
Get the entire input text as a UChar string, placing it into a caller-supplied buffer.
struct UCharsetDetector UCharsetDetector
Structure representing a charset detector.
Definition: ucsdet.h:63
U_CAPI void ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status)
Enable or disable individual charset encoding.
U_CAPI const char * ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status)
Get the name of the charset represented by a UCharsetMatch.
U_CAPI void ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status)
Set the declared encoding for charset detection.
U_CAPI const UCharsetMatch ** ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status)
Find all charset matches that appear to be consistent with the input, returning an array of results.
U_CAPI UEnumeration * ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status)
Get an iterator over the set of detectable charsets - over the charsets that are enabled by the speci...
U_CAPI const UCharsetMatch * ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status)
Return the charset that best matches the supplied input data.
U_CAPI UEnumeration * ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status)
Get an iterator over the set of all detectable charsets - over the charsets that are known to the cha...
U_CAPI void ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status)
Set the input byte data whose charset is to detected.
C API: String Enumeration.
struct UEnumeration UEnumeration
structure representing an enumeration object instance
Definition: uenum.h:44
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:412
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415