Subversion
svn_utf.h
Go to the documentation of this file.
1/**
2 * @copyright
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 * ====================================================================
21 * @endcopyright
22 *
23 * @file svn_utf.h
24 * @brief UTF-8 conversion routines
25 *
26 * Whenever a conversion routine cannot convert to or from UTF-8, the
27 * error returned has code @c APR_EINVAL.
28 */
29
30
31
32#ifndef SVN_UTF_H
33#define SVN_UTF_H
34
35#include <apr_pools.h>
36#include <apr_xlate.h> /* for APR_*_CHARSET */
37
38#include "svn_types.h"
39#include "svn_string.h"
40
41#ifdef __cplusplus
42extern "C" {
43#endif /* __cplusplus */
44
45#define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET
46#define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET
47
48/**
49 * Initialize the UTF-8 encoding/decoding routines.
50 * Allocate cached translation handles in a subpool of @a pool.
51 *
52 * If @a assume_native_utf8 is TRUE, the native character set is
53 * assumed to be UTF-8, i.e. conversion is a no-op. This is useful
54 * in contexts where the native character set is ASCII but UTF-8
55 * should be used regardless (e.g. for mod_dav_svn which runs within
56 * httpd and always uses the "C" locale).
57 *
58 * @note It is optional to call this function, but if it is used, no other
59 * svn function may be in use in other threads during the call of this
60 * function or when @a pool is cleared or destroyed.
61 * Initializing the UTF-8 routines will improve performance.
62 *
63 * @since New in 1.8.
64 */
65void
67 apr_pool_t *pool);
68
69/**
70 * Like svn_utf_initialize2() but without the ability to force the
71 * native encoding to UTF-8.
72 *
73 * @deprecated Provided for backward compatibility with the 1.7 API.
74 */
76void
77svn_utf_initialize(apr_pool_t *pool);
78
79/** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src;
80 * allocate @a *dest in @a pool.
81 */
84 const svn_stringbuf_t *src,
85 apr_pool_t *pool);
86
87
88/** Set @a *dest to a utf8-encoded string from native string @a src; allocate
89 * @a *dest in @a pool.
90 */
93 const svn_string_t *src,
94 apr_pool_t *pool);
95
96
97/** Set @a *dest to a utf8-encoded C string from native C string @a src;
98 * allocate @a *dest in @a pool.
99 */
101svn_utf_cstring_to_utf8(const char **dest,
102 const char *src,
103 apr_pool_t *pool);
104
105
106/** Set @a *dest to a utf8 encoded C string from @a frompage encoded C
107 * string @a src; allocate @a *dest in @a pool.
108 *
109 * @since New in 1.4.
110 */
113 const char *src,
114 const char *frompage,
115 apr_pool_t *pool);
116
117
118/** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is
119 * ignored.
120 *
121 * @deprecated Provided for backward compatibility with the 1.3 API.
122 */
126 const char *src,
127 const char *frompage,
128 const char *convset_key,
129 apr_pool_t *pool);
130
131
132/** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src;
133 * allocate @a *dest in @a pool.
134 */
137 const svn_stringbuf_t *src,
138 apr_pool_t *pool);
139
140
141/** Set @a *dest to a natively-encoded string from utf8 string @a src;
142 * allocate @a *dest in @a pool.
143 */
146 const svn_string_t *src,
147 apr_pool_t *pool);
148
149
150/** Set @a *dest to a natively-encoded C string from utf8 C string @a src;
151 * allocate @a *dest in @a pool.
152 */
155 const char *src,
156 apr_pool_t *pool);
157
158
159/** Set @a *dest to a @a topage encoded C string from utf8 encoded C string
160 * @a src; allocate @a *dest in @a pool.
161 *
162 * @since New in 1.4.
163 */
166 const char *src,
167 const char *topage,
168 apr_pool_t *pool);
169
170
171/** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is
172 * ignored.
173 *
174 * @deprecated Provided for backward compatibility with the 1.3 API.
175 */
179 const char *src,
180 const char *topage,
181 const char *convset_key,
182 apr_pool_t *pool);
183
184
185/** Return a fuzzily native-encoded C string from utf8 C string @a src,
186 * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii
187 * characters the same, and substitutes "?\\XXX" for others, where XXX
188 * is the unsigned decimal code for that character.
189 *
190 * This function cannot error; it is guaranteed to return something.
191 * First it will recode as described above and then attempt to convert
192 * the (new) 7-bit UTF-8 string to native encoding. If that fails, it
193 * will return the raw fuzzily recoded string, which may or may not be
194 * meaningful in the client's locale, but is (presumably) better than
195 * nothing.
196 *
197 * ### Notes:
198 *
199 * Improvement is possible, even imminent. The original problem was
200 * that if you converted a UTF-8 string (say, a log message) into a
201 * locale that couldn't represent all the characters, you'd just get a
202 * static placeholder saying "[unconvertible log message]". Then
203 * Justin Erenkrantz pointed out how on platforms that didn't support
204 * conversion at all, "svn log" would still fail completely when it
205 * encountered unconvertible data.
206 *
207 * Now for both cases, the caller can at least fall back on this
208 * function, which converts the message as best it can, substituting
209 * "?\\XXX" escape codes for the non-ascii characters.
210 *
211 * Ultimately, some callers may prefer the iconv "//TRANSLIT" option,
212 * so when we can detect that at configure time, things will change.
213 * Also, this should (?) be moved to apr/apu eventually.
214 *
215 * See https://issues.apache.org/jira/browse/SVN-807 for
216 * details.
217 */
218const char *
220 apr_pool_t *pool);
221
222
223/** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src;
224 * allocate @a *dest in @a pool.
225 */
228 const svn_stringbuf_t *src,
229 apr_pool_t *pool);
230
231
232/** Set @a *dest to a natively-encoded C string from utf8 string @a src;
233 * allocate @a *dest in @a pool.
234 */
237 const svn_string_t *src,
238 apr_pool_t *pool);
239
240/** Return the display width of UTF-8-encoded C string @a cstr.
241 * If the string is not printable or invalid UTF-8, return -1.
242 *
243 * @since New in 1.8.
244 */
245int
247
248#ifdef __cplusplus
249}
250#endif /* __cplusplus */
251
252#endif /* SVN_UTF_H */
Subversion error object.
Definition: svn_types.h:181
A simple counted string.
Definition: svn_string.h:97
A buffered string, capable of appending without an allocation and copy for each append.
Definition: svn_string.h:105
Counted-length strings for Subversion, plus some C string goodies.
Subversion's data types.
int svn_boolean_t
YABT: Yet Another Boolean Type.
Definition: svn_types.h:141
#define SVN_DEPRECATED
Macro used to mark deprecated functions.
Definition: svn_types.h:62
svn_error_t * svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest, const svn_stringbuf_t *src, apr_pool_t *pool)
Set *dest to a natively-encoded stringbuf from utf8 stringbuf src; allocate *dest in pool.
svn_error_t * svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest, const svn_stringbuf_t *src, apr_pool_t *pool)
Set *dest to a utf8-encoded stringbuf from native stringbuf src; allocate *dest in pool.
svn_error_t * svn_utf_cstring_from_utf8_stringbuf(const char **dest, const svn_stringbuf_t *src, apr_pool_t *pool)
Set *dest to a natively-encoded C string from utf8 stringbuf src; allocate *dest in pool.
svn_error_t * svn_utf_cstring_from_utf8(const char **dest, const char *src, apr_pool_t *pool)
Set *dest to a natively-encoded C string from utf8 C string src; allocate *dest in pool.
svn_error_t * svn_utf_cstring_from_utf8_ex(const char **dest, const char *src, const char *topage, const char *convset_key, apr_pool_t *pool)
Like svn_utf_cstring_from_utf8_ex2() but with convset_key which is ignored.
int svn_utf_cstring_utf8_width(const char *cstr)
Return the display width of UTF-8-encoded C string cstr.
svn_error_t * svn_utf_string_from_utf8(const svn_string_t **dest, const svn_string_t *src, apr_pool_t *pool)
Set *dest to a natively-encoded string from utf8 string src; allocate *dest in pool.
svn_error_t * svn_utf_cstring_from_utf8_string(const char **dest, const svn_string_t *src, apr_pool_t *pool)
Set *dest to a natively-encoded C string from utf8 string src; allocate *dest in pool.
svn_error_t * svn_utf_string_to_utf8(const svn_string_t **dest, const svn_string_t *src, apr_pool_t *pool)
Set *dest to a utf8-encoded string from native string src; allocate *dest in pool.
svn_error_t * svn_utf_cstring_to_utf8(const char **dest, const char *src, apr_pool_t *pool)
Set *dest to a utf8-encoded C string from native C string src; allocate *dest in pool.
svn_error_t * svn_utf_cstring_to_utf8_ex(const char **dest, const char *src, const char *frompage, const char *convset_key, apr_pool_t *pool)
Like svn_utf_cstring_to_utf8_ex2() but with convset_key which is ignored.
const char * svn_utf_cstring_from_utf8_fuzzy(const char *src, apr_pool_t *pool)
Return a fuzzily native-encoded C string from utf8 C string src, allocated in pool.
void svn_utf_initialize2(svn_boolean_t assume_native_utf8, apr_pool_t *pool)
Initialize the UTF-8 encoding/decoding routines.
svn_error_t * svn_utf_cstring_from_utf8_ex2(const char **dest, const char *src, const char *topage, apr_pool_t *pool)
Set *dest to a topage encoded C string from utf8 encoded C string src; allocate *dest in pool.
void svn_utf_initialize(apr_pool_t *pool)
Like svn_utf_initialize2() but without the ability to force the native encoding to UTF-8.
svn_error_t * svn_utf_cstring_to_utf8_ex2(const char **dest, const char *src, const char *frompage, apr_pool_t *pool)
Set *dest to a utf8 encoded C string from frompage encoded C string src; allocate *dest in pool.