ICU 72.1 72.1
ustring.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 1998-2014, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8*
9* File ustring.h
10*
11* Modification History:
12*
13* Date Name Description
14* 12/07/98 bertrand Creation.
15******************************************************************************
16*/
17
18#ifndef USTRING_H
19#define USTRING_H
20
21#include "unicode/utypes.h"
22#include "unicode/putil.h"
23#include "unicode/uiter.h"
24
30#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
31# define UBRK_TYPEDEF_UBREAK_ITERATOR
34#endif
35
92U_CAPI int32_t U_EXPORT2
93u_strlen(const UChar *s);
109U_CAPI int32_t U_EXPORT2
110u_countChar32(const UChar *s, int32_t length);
111
130U_CAPI UBool U_EXPORT2
131u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
132
143U_CAPI UChar* U_EXPORT2
145 const UChar *src);
146
161U_CAPI UChar* U_EXPORT2
163 const UChar *src,
164 int32_t n);
165
186U_CAPI UChar * U_EXPORT2
187u_strstr(const UChar *s, const UChar *substring);
188
210U_CAPI UChar * U_EXPORT2
211u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
212
230U_CAPI UChar * U_EXPORT2
231u_strchr(const UChar *s, UChar c);
232
250U_CAPI UChar * U_EXPORT2
252
273U_CAPI UChar * U_EXPORT2
274u_strrstr(const UChar *s, const UChar *substring);
275
297U_CAPI UChar * U_EXPORT2
298u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
299
317U_CAPI UChar * U_EXPORT2
318u_strrchr(const UChar *s, UChar c);
319
337U_CAPI UChar * U_EXPORT2
339
352U_CAPI UChar * U_EXPORT2
353u_strpbrk(const UChar *string, const UChar *matchSet);
354
368U_CAPI int32_t U_EXPORT2
369u_strcspn(const UChar *string, const UChar *matchSet);
370
384U_CAPI int32_t U_EXPORT2
385u_strspn(const UChar *string, const UChar *matchSet);
386
412U_CAPI UChar * U_EXPORT2
414 const UChar *delim,
415 UChar **saveState);
416
427U_CAPI int32_t U_EXPORT2
428u_strcmp(const UChar *s1,
429 const UChar *s2);
430
442U_CAPI int32_t U_EXPORT2
443u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
444
472U_CAPI int32_t U_EXPORT2
473u_strCompare(const UChar *s1, int32_t length1,
474 const UChar *s2, int32_t length2,
475 UBool codePointOrder);
476
497U_CAPI int32_t U_EXPORT2
498u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
499
540U_CAPI int32_t U_EXPORT2
541u_strCaseCompare(const UChar *s1, int32_t length1,
542 const UChar *s2, int32_t length2,
543 uint32_t options,
544 UErrorCode *pErrorCode);
545
558U_CAPI int32_t U_EXPORT2
559u_strncmp(const UChar *ucs1,
560 const UChar *ucs2,
561 int32_t n);
562
576U_CAPI int32_t U_EXPORT2
577u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
578
598U_CAPI int32_t U_EXPORT2
599u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
600
622U_CAPI int32_t U_EXPORT2
623u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
624
646U_CAPI int32_t U_EXPORT2
647u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
648
657U_CAPI UChar* U_EXPORT2
659 const UChar *src);
660
672U_CAPI UChar* U_EXPORT2
674 const UChar *src,
675 int32_t n);
676
677#if !UCONFIG_NO_CONVERSION
678
689U_CAPI UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
690 const char *src );
691
705 const char *src,
706 int32_t n);
707
718U_CAPI char* U_EXPORT2 u_austrcpy(char *dst,
719 const UChar *src );
720
733U_CAPI char* U_EXPORT2 u_austrncpy(char *dst,
734 const UChar *src,
735 int32_t n );
736
737#endif
738
747U_CAPI UChar* U_EXPORT2
748u_memcpy(UChar *dest, const UChar *src, int32_t count);
749
758U_CAPI UChar* U_EXPORT2
759u_memmove(UChar *dest, const UChar *src, int32_t count);
760
770U_CAPI UChar* U_EXPORT2
771u_memset(UChar *dest, UChar c, int32_t count);
772
784U_CAPI int32_t U_EXPORT2
785u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
786
800U_CAPI int32_t U_EXPORT2
801u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
802
820U_CAPI UChar* U_EXPORT2
821u_memchr(const UChar *s, UChar c, int32_t count);
822
840U_CAPI UChar* U_EXPORT2
841u_memchr32(const UChar *s, UChar32 c, int32_t count);
842
860U_CAPI UChar* U_EXPORT2
861u_memrchr(const UChar *s, UChar c, int32_t count);
862
880U_CAPI UChar* U_EXPORT2
881u_memrchr32(const UChar *s, UChar32 c, int32_t count);
882
930#if defined(U_DECLARE_UTF16)
931# define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs)
933# define U_STRING_INIT(var, cs, length)
934#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
935# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
937# define U_STRING_INIT(var, cs, length)
938#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
939# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
941# define U_STRING_INIT(var, cs, length)
942#else
943# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
945# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
946#endif
947
995U_CAPI int32_t U_EXPORT2
996u_unescape(const char *src,
997 UChar *dest, int32_t destCapacity);
998
1012typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
1014
1043U_CAPI UChar32 U_EXPORT2
1045 int32_t *offset,
1046 int32_t length,
1047 void *context);
1048
1069U_CAPI int32_t U_EXPORT2
1070u_strToUpper(UChar *dest, int32_t destCapacity,
1071 const UChar *src, int32_t srcLength,
1072 const char *locale,
1073 UErrorCode *pErrorCode);
1074
1095U_CAPI int32_t U_EXPORT2
1096u_strToLower(UChar *dest, int32_t destCapacity,
1097 const UChar *src, int32_t srcLength,
1098 const char *locale,
1099 UErrorCode *pErrorCode);
1100
1101#if !UCONFIG_NO_BREAK_ITERATION
1102
1141U_CAPI int32_t U_EXPORT2
1142u_strToTitle(UChar *dest, int32_t destCapacity,
1143 const UChar *src, int32_t srcLength,
1144 UBreakIterator *titleIter,
1145 const char *locale,
1146 UErrorCode *pErrorCode);
1147
1148#endif
1149
1174U_CAPI int32_t U_EXPORT2
1175u_strFoldCase(UChar *dest, int32_t destCapacity,
1176 const UChar *src, int32_t srcLength,
1177 uint32_t options,
1178 UErrorCode *pErrorCode);
1179
1180#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
1203U_CAPI wchar_t* U_EXPORT2
1204u_strToWCS(wchar_t *dest,
1205 int32_t destCapacity,
1206 int32_t *pDestLength,
1207 const UChar *src,
1208 int32_t srcLength,
1209 UErrorCode *pErrorCode);
1232U_CAPI UChar* U_EXPORT2
1234 int32_t destCapacity,
1235 int32_t *pDestLength,
1236 const wchar_t *src,
1237 int32_t srcLength,
1238 UErrorCode *pErrorCode);
1239#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */
1240
1263U_CAPI char* U_EXPORT2
1264u_strToUTF8(char *dest,
1265 int32_t destCapacity,
1266 int32_t *pDestLength,
1267 const UChar *src,
1268 int32_t srcLength,
1269 UErrorCode *pErrorCode);
1270
1293U_CAPI UChar* U_EXPORT2
1295 int32_t destCapacity,
1296 int32_t *pDestLength,
1297 const char *src,
1298 int32_t srcLength,
1299 UErrorCode *pErrorCode);
1300
1336U_CAPI char* U_EXPORT2
1338 int32_t destCapacity,
1339 int32_t *pDestLength,
1340 const UChar *src,
1341 int32_t srcLength,
1342 UChar32 subchar, int32_t *pNumSubstitutions,
1343 UErrorCode *pErrorCode);
1344
1381U_CAPI UChar* U_EXPORT2
1383 int32_t destCapacity,
1384 int32_t *pDestLength,
1385 const char *src,
1386 int32_t srcLength,
1387 UChar32 subchar, int32_t *pNumSubstitutions,
1388 UErrorCode *pErrorCode);
1389
1441U_CAPI UChar * U_EXPORT2
1443 int32_t destCapacity,
1444 int32_t *pDestLength,
1445 const char *src,
1446 int32_t srcLength,
1447 UErrorCode *pErrorCode);
1448
1471U_CAPI UChar32* U_EXPORT2
1473 int32_t destCapacity,
1474 int32_t *pDestLength,
1475 const UChar *src,
1476 int32_t srcLength,
1477 UErrorCode *pErrorCode);
1478
1501U_CAPI UChar* U_EXPORT2
1503 int32_t destCapacity,
1504 int32_t *pDestLength,
1505 const UChar32 *src,
1506 int32_t srcLength,
1507 UErrorCode *pErrorCode);
1508
1544U_CAPI UChar32* U_EXPORT2
1546 int32_t destCapacity,
1547 int32_t *pDestLength,
1548 const UChar *src,
1549 int32_t srcLength,
1550 UChar32 subchar, int32_t *pNumSubstitutions,
1551 UErrorCode *pErrorCode);
1552
1588U_CAPI UChar* U_EXPORT2
1590 int32_t destCapacity,
1591 int32_t *pDestLength,
1592 const UChar32 *src,
1593 int32_t srcLength,
1594 UChar32 subchar, int32_t *pNumSubstitutions,
1595 UErrorCode *pErrorCode);
1596
1629U_CAPI char* U_EXPORT2
1631 char *dest,
1632 int32_t destCapacity,
1633 int32_t *pDestLength,
1634 const UChar *src,
1635 int32_t srcLength,
1636 UErrorCode *pErrorCode);
1637
1679U_CAPI UChar* U_EXPORT2
1681 UChar *dest,
1682 int32_t destCapacity,
1683 int32_t *pDestLength,
1684 const char *src,
1685 int32_t srcLength,
1686 UChar32 subchar, int32_t *pNumSubstitutions,
1687 UErrorCode *pErrorCode);
1688
1689#endif
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:885
C API: Platform Utilities.
C API for code unit iteration.
Definition: uiter.h:341
struct UBreakIterator UBreakIterator
Opaque type representing an ICU Break iterator object.
Definition: ubrk.h:31
C API: Unicode Character Iteration.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:461
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:86
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:412
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:85
U_CAPI UChar * u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
Find the last occurrence of a substring in a string.
U_CAPI int32_t u_strcmp(const UChar *s1, const UChar *s2)
Compare two Unicode strings for bitwise equality (code unit order).
U_CAPI UChar32 u_unescapeAt(UNESCAPE_CHAR_AT charAt, int32_t *offset, int32_t length, void *context)
Unescape a single sequence.
U_CAPI UChar * u_strpbrk(const UChar *string, const UChar *matchSet)
Locates the first occurrence in the string string of any of the characters in the string matchSet.
U_CAPI wchar_t * u_strToWCS(wchar_t *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-16 string to a wchar_t string.
U_CAPI int32_t u_strToLower(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
Lowercase the characters in a string.
U_CAPI char * u_strToUTF8(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-8.
U_CAPI int32_t u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count)
Compare the first count UChars of each buffer.
U_CAPI UChar * u_memchr(const UChar *s, UChar c, int32_t count)
Find the first occurrence of a BMP code point in a string.
U_CAPI UChar * u_strFromWCS(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const wchar_t *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a wchar_t string to UTF-16.
U_CAPI char * u_austrcpy(char *dst, const UChar *src)
Copy ustring to a byte string encoded in the default codepage.
U_CAPI UChar * u_strrchr32(const UChar *s, UChar32 c)
Find the last occurrence of a code point in a string.
U_CAPI int32_t u_strspn(const UChar *string, const UChar *matchSet)
Returns the number of consecutive characters in string, beginning with the first, that occur somewher...
U_CAPI UChar * u_strFromUTF8(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-8 string to UTF-16.
U_CAPI UChar * u_strncat(UChar *dst, const UChar *src, int32_t n)
Concatenate two ustrings.
U_CAPI int32_t u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options)
Compare two strings case-insensitively using full case folding.
U_CAPI int32_t u_strlen(const UChar *s)
Determine the length of an array of UChar.
U_CAPI int32_t u_strToTitle(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBreakIterator *titleIter, const char *locale, UErrorCode *pErrorCode)
Titlecase a string.
U_CAPI int32_t u_strncmp(const UChar *ucs1, const UChar *ucs2, int32_t n)
Compare two ustrings for bitwise equality.
U_CAPI UChar * u_memrchr(const UChar *s, UChar c, int32_t count)
Find the last occurrence of a BMP code point in a string.
U_CAPI int32_t u_unescape(const char *src, UChar *dest, int32_t destCapacity)
Unescape a string of characters and write the resulting Unicode characters to the destination buffer.
UChar(* UNESCAPE_CHAR_AT)(int32_t offset, void *context)
Callback function for u_unescapeAt() that returns a character of the source text given an offset and ...
Definition: ustring.h:1012
U_CAPI char * u_strToJavaModifiedUTF8(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a 16-bit Unicode string to Java Modified UTF-8.
U_CAPI char * u_strToUTF8WithSub(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-8.
U_CAPI int32_t u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n)
Compare two Unicode strings in code point order.
U_CAPI UChar * u_strFromUTF8Lenient(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-8 string to UTF-16.
U_CAPI UChar32 * u_strToUTF32(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-32.
U_CAPI UChar * u_uastrncpy(UChar *dst, const char *src, int32_t n)
Copy a byte string encoded in the default codepage to a ustring.
U_CAPI UChar * u_memset(UChar *dest, UChar c, int32_t count)
Initialize count characters of dest to c.
U_CAPI UChar * u_strchr(const UChar *s, UChar c)
Find the first occurrence of a BMP code point in a string.
U_CAPI UChar * u_strFromUTF8WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-8 string to UTF-16.
U_CAPI int32_t u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options)
Compare two strings case-insensitively using full case folding.
U_CAPI UChar * u_memmove(UChar *dest, const UChar *src, int32_t count)
Synonym for memmove(), but with UChars only.
U_CAPI int32_t u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder)
Compare two Unicode strings (binary order) as presented by UCharIterator objects.
U_CAPI UChar * u_strcat(UChar *dst, const UChar *src)
Concatenate two ustrings.
U_CAPI int32_t u_strcmpCodePointOrder(const UChar *s1, const UChar *s2)
Compare two Unicode strings in code point order.
U_CAPI UChar * u_strFromUTF32WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-32 string to UTF-16.
U_CAPI UChar * u_uastrcpy(UChar *dst, const char *src)
Copy a byte string encoded in the default codepage to a ustring.
U_CAPI UChar * u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
Find the first occurrence of a substring in a string.
U_CAPI int32_t u_strToUpper(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
Uppercase the characters in a string.
U_CAPI char * u_austrncpy(char *dst, const UChar *src, int32_t n)
Copy ustring to a byte string encoded in the default codepage.
U_CAPI UChar * u_memrchr32(const UChar *s, UChar32 c, int32_t count)
Find the last occurrence of a code point in a string.
U_CAPI int32_t u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options)
Compare two strings case-insensitively using full case folding.
U_CAPI UChar * u_strrstr(const UChar *s, const UChar *substring)
Find the last occurrence of a substring in a string.
U_CAPI UBool u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number)
Check if the string contains more Unicode code points than a certain number.
U_CAPI UChar * u_strncpy(UChar *dst, const UChar *src, int32_t n)
Copy a ustring.
U_CAPI UChar * u_strtok_r(UChar *src, const UChar *delim, UChar **saveState)
The string tokenizer API allows an application to break a string into tokens.
U_CAPI int32_t u_strcspn(const UChar *string, const UChar *matchSet)
Returns the number of consecutive characters in string, beginning with the first, that do not occur s...
U_CAPI UChar * u_memchr32(const UChar *s, UChar32 c, int32_t count)
Find the first occurrence of a code point in a string.
U_CAPI int32_t u_strCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, UBool codePointOrder)
Compare two Unicode strings (binary order).
U_CAPI UChar32 * u_strToUTF32WithSub(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-32.
U_CAPI UChar * u_memcpy(UChar *dest, const UChar *src, int32_t count)
Synonym for memcpy(), but with UChars only.
U_CAPI int32_t u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count)
Compare two Unicode strings in code point order.
U_CAPI UChar * u_strFromJavaModifiedUTF8WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a Java Modified UTF-8 string to a 16-bit Unicode string.
U_CAPI int32_t u_strFoldCase(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, uint32_t options, UErrorCode *pErrorCode)
Case-folds the characters in a string.
U_CAPI UChar * u_strstr(const UChar *s, const UChar *substring)
Find the first occurrence of a substring in a string.
U_CAPI int32_t u_countChar32(const UChar *s, int32_t length)
Count Unicode code points in the length UChar code units of the string.
U_CAPI UChar * u_strFromUTF32(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-32 string to UTF-16.
U_CAPI UChar * u_strchr32(const UChar *s, UChar32 c)
Find the first occurrence of a code point in a string.
U_CAPI int32_t u_strCaseCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compare two strings case-insensitively using full case folding.
U_CAPI UChar * u_strcpy(UChar *dst, const UChar *src)
Copy a ustring.
U_CAPI UChar * u_strrchr(const UChar *s, UChar c)
Find the last occurrence of a BMP code point in a string.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415