ICU 72.1 72.1
ubrk.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5* Copyright (C) 1996-2015, International Business Machines Corporation and others.
6* All Rights Reserved.
7******************************************************************************
8*/
9
10#ifndef UBRK_H
11#define UBRK_H
12
13#include "unicode/utypes.h"
14#include "unicode/uloc.h"
15#include "unicode/utext.h"
16
17#if U_SHOW_CPLUSPLUS_API
19#endif // U_SHOW_CPLUSPLUS_API
20
25#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
26# define UBRK_TYPEDEF_UBREAK_ITERATOR
32#endif
33
34#if !UCONFIG_NO_BREAK_ITERATION
35
36#include "unicode/parseerr.h"
37
102typedef enum UBreakIteratorType {
111
112#ifndef U_HIDE_DEPRECATED_API
126 UBRK_COUNT = 5
127#endif // U_HIDE_DEPRECATED_API
129
133#define UBRK_DONE ((int32_t) -1)
134
135
147typedef enum UWordBreak {
171
183typedef enum ULineBreakTag {
194
195
196
208typedef enum USentenceBreakTag {
225
226
244U_CAPI UBreakIterator* U_EXPORT2
246 const char *locale,
247 const UChar *text,
248 int32_t textLength,
249 UErrorCode *status);
250
266U_CAPI UBreakIterator* U_EXPORT2
268 int32_t rulesLength,
269 const UChar *text,
270 int32_t textLength,
271 UParseError *parseErr,
272 UErrorCode *status);
273
294U_CAPI UBreakIterator* U_EXPORT2
295ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
296 const UChar * text, int32_t textLength,
297 UErrorCode * status);
298
299#ifndef U_HIDE_DEPRECATED_API
300
322 const UBreakIterator *bi,
323 void *stackBuffer,
324 int32_t *pBufferSize,
325 UErrorCode *status);
326
327#endif /* U_HIDE_DEPRECATED_API */
328
336U_CAPI UBreakIterator * U_EXPORT2
338 UErrorCode *status);
339
340#ifndef U_HIDE_DEPRECATED_API
341
346#define U_BRK_SAFECLONE_BUFFERSIZE 1
347
348#endif /* U_HIDE_DEPRECATED_API */
349
356U_CAPI void U_EXPORT2
358
359#if U_SHOW_CPLUSPLUS_API
360
361U_NAMESPACE_BEGIN
362
373
374U_NAMESPACE_END
375
376#endif
377
390U_CAPI void U_EXPORT2
392 const UChar* text,
393 int32_t textLength,
394 UErrorCode* status);
395
396
414U_CAPI void U_EXPORT2
416 UText* text,
417 UErrorCode* status);
418
419
420
429U_CAPI int32_t U_EXPORT2
431
441U_CAPI int32_t U_EXPORT2
443
453U_CAPI int32_t U_EXPORT2
455
463U_CAPI int32_t U_EXPORT2
465
475U_CAPI int32_t U_EXPORT2
477
487U_CAPI int32_t U_EXPORT2
489 int32_t offset);
490
500U_CAPI int32_t U_EXPORT2
502 int32_t offset);
503
513U_CAPI const char* U_EXPORT2
514ubrk_getAvailable(int32_t index);
515
524U_CAPI int32_t U_EXPORT2
526
527
537U_CAPI UBool U_EXPORT2
538ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
539
549U_CAPI int32_t U_EXPORT2
551
569U_CAPI int32_t U_EXPORT2
570ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
571
581U_CAPI const char* U_EXPORT2
583
609U_CAPI void U_EXPORT2
611 UText *text,
612 UErrorCode *status);
613
614
640U_CAPI int32_t U_EXPORT2
642 uint8_t * binaryRules, int32_t rulesCapacity,
643 UErrorCode * status);
644
645#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
646
647#endif
"Smart pointer" class, closes a UBreakIterator via ubrk_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
C API: Parse Error Information.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
UText struct.
Definition: utext.h:1328
U_CAPI int32_t ubrk_preceding(UBreakIterator *bi, int32_t offset)
Set the iterator position to the first boundary preceding the specified offset.
UBreakIteratorType
The possible types of text boundaries.
Definition: ubrk.h:102
@ UBRK_WORD
Word breaks.
Definition: ubrk.h:106
@ UBRK_TITLE
Title Case breaks The iterator created using this type locates title boundaries as described for Unic...
Definition: ubrk.h:121
@ UBRK_CHARACTER
Character breaks.
Definition: ubrk.h:104
@ UBRK_COUNT
One more than the highest normal UBreakIteratorType value.
Definition: ubrk.h:126
@ UBRK_LINE
Line breaks.
Definition: ubrk.h:108
@ UBRK_SENTENCE
Sentence breaks.
Definition: ubrk.h:110
U_CAPI int32_t ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
Get the statuses from the break rules that determined the most recently returned break position.
U_CAPI int32_t ubrk_previous(UBreakIterator *bi)
Set the iterator position to the boundary preceding the current boundary.
U_CAPI const char * ubrk_getAvailable(int32_t index)
Get a locale for which text breaking information is available.
U_CAPI int32_t ubrk_following(UBreakIterator *bi, int32_t offset)
Advance the iterator to the first boundary following the specified offset.
U_CAPI int32_t ubrk_getBinaryRules(UBreakIterator *bi, uint8_t *binaryRules, int32_t rulesCapacity, UErrorCode *status)
Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
U_CAPI int32_t ubrk_first(UBreakIterator *bi)
Set the iterator position to zero, the start of the text being scanned.
U_CAPI int32_t ubrk_current(const UBreakIterator *bi)
Determine the most recently-returned text boundary.
U_CAPI int32_t ubrk_getRuleStatus(UBreakIterator *bi)
Return the status from the break rule that determined the most recently returned break position.
U_CAPI void ubrk_setText(UBreakIterator *bi, const UChar *text, int32_t textLength, UErrorCode *status)
Sets an existing iterator to point to a new piece of text.
U_CAPI UBreakIterator * ubrk_clone(const UBreakIterator *bi, UErrorCode *status)
Thread safe cloning operation.
U_CAPI void ubrk_setUText(UBreakIterator *bi, UText *text, UErrorCode *status)
Sets an existing iterator to point to a new piece of text.
U_CAPI UBool ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
Returns true if the specified position is a boundary position.
U_CAPI void ubrk_close(UBreakIterator *bi)
Close a UBreakIterator.
ULineBreakTag
Enum constants for the line break tags returned by getRuleStatus().
Definition: ubrk.h:183
@ UBRK_LINE_SOFT
Tag value for soft line breaks, positions at which a line break is acceptable but not required
Definition: ubrk.h:186
@ UBRK_LINE_HARD
Tag value for a hard, or mandatory line break
Definition: ubrk.h:190
@ UBRK_LINE_HARD_LIMIT
Upper bound for hard line breaks.
Definition: ubrk.h:192
@ UBRK_LINE_SOFT_LIMIT
Upper bound for soft line breaks.
Definition: ubrk.h:188
U_CAPI int32_t ubrk_next(UBreakIterator *bi)
Advance the iterator to the boundary following the current boundary.
U_CAPI void ubrk_refreshUText(UBreakIterator *bi, UText *text, UErrorCode *status)
Set the subject text string upon which the break iterator is operating without changing any other asp...
U_CAPI int32_t ubrk_last(UBreakIterator *bi)
Set the iterator position to the index immediately beyond the last character in the text being scanne...
U_CAPI UBreakIterator * ubrk_open(UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries for a specified locale.
U_CAPI UBreakIterator * ubrk_openRules(const UChar *rules, int32_t rulesLength, const UChar *text, int32_t textLength, UParseError *parseErr, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries using specified breaking rules.
USentenceBreakTag
Enum constants for the sentence break tags returned by getRuleStatus().
Definition: ubrk.h:208
@ UBRK_SENTENCE_TERM_LIMIT
Upper bound for tags for sentences ended by sentence terminators.
Definition: ubrk.h:215
@ UBRK_SENTENCE_SEP
Tag value for for sentences that do not contain an ending sentence terminator ('.
Definition: ubrk.h:220
@ UBRK_SENTENCE_TERM
Tag value for for sentences ending with a sentence terminator ('.
Definition: ubrk.h:213
@ UBRK_SENTENCE_SEP_LIMIT
Upper bound for tags for sentences ended by a separator.
Definition: ubrk.h:222
U_CAPI const char * ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode *status)
Return the locale of the break iterator.
UBreakIterator * ubrk_safeClone(const UBreakIterator *bi, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
Thread safe cloning operation.
struct UBreakIterator UBreakIterator
Opaque type representing an ICU Break iterator object.
Definition: ubrk.h:31
U_CAPI UBreakIterator * ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, const UChar *text, int32_t textLength, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
U_CAPI int32_t ubrk_countAvailable(void)
Determine how many locales have text breaking information available.
UWordBreak
Enum constants for the word break tags returned by getRuleStatus().
Definition: ubrk.h:147
@ UBRK_WORD_IDEO
Tag value for words containing ideographic characters, lower limit.
Definition: ubrk.h:167
@ UBRK_WORD_NUMBER
Tag value for words that appear to be numbers, lower limit.
Definition: ubrk.h:154
@ UBRK_WORD_NONE_LIMIT
Upper bound for tags for uncategorized words.
Definition: ubrk.h:152
@ UBRK_WORD_LETTER_LIMIT
Tag value for words containing letters, upper limit
Definition: ubrk.h:161
@ UBRK_WORD_KANA
Tag value for words containing kana characters, lower limit.
Definition: ubrk.h:163
@ UBRK_WORD_KANA_LIMIT
Tag value for words containing kana characters, upper limit.
Definition: ubrk.h:165
@ UBRK_WORD_IDEO_LIMIT
Tag value for words containing ideographic characters, upper limit.
Definition: ubrk.h:169
@ UBRK_WORD_NONE
Tag value for "words" that do not fit into any of other categories.
Definition: ubrk.h:150
@ UBRK_WORD_NUMBER_LIMIT
Tag value for words that appear to be numbers, upper limit.
Definition: ubrk.h:156
@ UBRK_WORD_LETTER
Tag value for words that contain letters, excluding hiragana, katakana or ideographic characters,...
Definition: ubrk.h:159
C API: Locale ID functionality similar to C++ class Locale.
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition: uloc.h:338
#define U_DEPRECATED
This is used to declare a function as a deprecated public ICU C API
Definition: umachine.h:116
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:412
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415