blob: 5e7b043c4c934c995715852f3dab70ec14f2dd10 [file] [log] [blame]
Victor Chang73229502020-09-17 13:39:19 +01001// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2003-2014, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: udataswp.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2003jun05
16* created by: Markus W. Scherer
17*
18* Definitions for ICU data transformations for different platforms,
19* changing between big- and little-endian data and/or between
20* charset families (ASCII<->EBCDIC).
21*/
22
23#ifndef __UDATASWP_H__
24#define __UDATASWP_H__
25
26#include <stdarg.h>
27#include "unicode/utypes.h"
28
29/* forward declaration */
30
31U_CDECL_BEGIN
32
33struct UDataSwapper;
34typedef struct UDataSwapper UDataSwapper;
35
36/**
37 * Function type for data transformation.
38 * Transforms data, or just returns the length of the data if
39 * the input length is -1.
40 * Swap functions assume that their data pointers are aligned properly.
41 *
42 * Quick implementation outline:
43 * (best to copy and adapt and existing swapper implementation)
44 * check that the data looks like the expected format
45 * if(length<0) {
46 * preflight:
47 * never dereference outData
48 * read inData and determine the data size
49 * assume that inData is long enough for this
50 * } else {
51 * outData can be NULL if length==0
52 * inData==outData (in-place swapping) possible but not required!
53 * verify that length>=(actual size)
54 * if there is a chance that not every byte up to size is reached
55 * due to padding etc.:
56 * if(inData!=outData) {
57 * memcpy(outData, inData, actual size);
58 * }
59 * swap contents
60 * }
61 * return actual size
62 *
63 * Further implementation notes:
64 * - read integers from inData before swapping them
65 * because in-place swapping can make them unreadable
66 * - compareInvChars compares a local Unicode string with already-swapped
67 * output charset strings
68 *
69 * @param ds Pointer to UDataSwapper containing global data about the
70 * transformation and function pointers for handling primitive
71 * types.
72 * @param inData Pointer to the input data to be transformed or examined.
73 * @param length Length of the data, counting bytes. May be -1 for preflighting.
74 * If length>=0, then transform the data.
75 * If length==-1, then only determine the length of the data.
76 * The length cannot be determined from the data itself for all
77 * types of data (e.g., not for simple arrays of integers).
78 * @param outData Pointer to the output data buffer.
79 * If length>=0 (transformation), then the output buffer must
80 * have a capacity of at least length.
81 * If length==-1, then outData will not be used and can be NULL.
82 * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
83 * fulfill U_SUCCESS on input.
84 * @return The actual length of the data.
85 *
86 * @see UDataSwapper
87 * @internal ICU 2.8
88 */
89typedef int32_t U_CALLCONV
90UDataSwapFn(const UDataSwapper *ds,
91 const void *inData, int32_t length, void *outData,
92 UErrorCode *pErrorCode);
93
94/**
95 * Convert one uint16_t from input to platform endianness.
96 * @internal ICU 2.8
97 */
98typedef uint16_t U_CALLCONV
99UDataReadUInt16(uint16_t x);
100
101/**
102 * Convert one uint32_t from input to platform endianness.
103 * @internal ICU 2.8
104 */
105typedef uint32_t U_CALLCONV
106UDataReadUInt32(uint32_t x);
107
108/**
109 * Convert one uint16_t from platform to input endianness.
110 * @internal ICU 2.8
111 */
112typedef void U_CALLCONV
113UDataWriteUInt16(uint16_t *p, uint16_t x);
114
115/**
116 * Convert one uint32_t from platform to input endianness.
117 * @internal ICU 2.8
118 */
119typedef void U_CALLCONV
120UDataWriteUInt32(uint32_t *p, uint32_t x);
121
122/**
123 * Compare invariant-character strings, one in the output data and the
124 * other one caller-provided in Unicode.
125 * An output data string is compared because strings are usually swapped
126 * before the rest of the data, to allow for sorting of string tables
127 * according to the output charset.
128 * You can use -1 for the length parameters of NUL-terminated strings as usual.
129 * Returns Unicode code point order for invariant characters.
130 * @internal ICU 2.8
131 */
132typedef int32_t U_CALLCONV
133UDataCompareInvChars(const UDataSwapper *ds,
134 const char *outString, int32_t outLength,
135 const UChar *localString, int32_t localLength);
136
137/**
138 * Function for message output when an error occurs during data swapping.
139 * A format string and variable number of arguments are passed
140 * like for vprintf().
141 *
142 * @param context A function-specific context pointer.
143 * @param fmt The format string.
144 * @param args The arguments for format string inserts.
145 *
146 * @internal ICU 2.8
147 */
148typedef void U_CALLCONV
149UDataPrintError(void *context, const char *fmt, va_list args);
150
151struct UDataSwapper {
152 /** Input endianness. @internal ICU 2.8 */
153 UBool inIsBigEndian;
154 /** Input charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
155 uint8_t inCharset;
156 /** Output endianness. @internal ICU 2.8 */
157 UBool outIsBigEndian;
158 /** Output charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
159 uint8_t outCharset;
160
161 /* basic functions for reading data values */
162
163 /** Convert one uint16_t from input to platform endianness. @internal ICU 2.8 */
164 UDataReadUInt16 *readUInt16;
165 /** Convert one uint32_t from input to platform endianness. @internal ICU 2.8 */
166 UDataReadUInt32 *readUInt32;
167 /** Compare an invariant-character output string with a local one. @internal ICU 2.8 */
168 UDataCompareInvChars *compareInvChars;
169
170 /* basic functions for writing data values */
171
172 /** Convert one uint16_t from platform to input endianness. @internal ICU 2.8 */
173 UDataWriteUInt16 *writeUInt16;
174 /** Convert one uint32_t from platform to input endianness. @internal ICU 2.8 */
175 UDataWriteUInt32 *writeUInt32;
176
177 /* basic functions for data transformations */
178
179 /** Transform an array of 16-bit integers. @internal ICU 2.8 */
180 UDataSwapFn *swapArray16;
181 /** Transform an array of 32-bit integers. @internal ICU 2.8 */
182 UDataSwapFn *swapArray32;
183 /** Transform an array of 64-bit integers. @internal ICU 53 */
184 UDataSwapFn *swapArray64;
185 /** Transform an invariant-character string. @internal ICU 2.8 */
186 UDataSwapFn *swapInvChars;
187
188 /**
189 * Function for message output when an error occurs during data swapping.
190 * Can be NULL.
191 * @internal ICU 2.8
192 */
193 UDataPrintError *printError;
194 /** Context pointer for printError. @internal ICU 2.8 */
195 void *printErrorContext;
196};
197
198U_CDECL_END
199
200U_CAPI UDataSwapper * U_EXPORT2
201udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
202 UBool outIsBigEndian, uint8_t outCharset,
203 UErrorCode *pErrorCode);
204
205/**
206 * Open a UDataSwapper for the given input data and the specified output
207 * characteristics.
208 * Values of -1 for any of the characteristics mean the local platform's
209 * characteristics.
210 *
211 * @see udata_swap
212 * @internal ICU 2.8
213 */
214U_CAPI UDataSwapper * U_EXPORT2
215udata_openSwapperForInputData(const void *data, int32_t length,
216 UBool outIsBigEndian, uint8_t outCharset,
217 UErrorCode *pErrorCode);
218
219U_CAPI void U_EXPORT2
220udata_closeSwapper(UDataSwapper *ds);
221
222/**
223 * Read the beginning of an ICU data piece, recognize magic bytes,
224 * swap the structure.
225 * Set a U_UNSUPPORTED_ERROR if it does not look like an ICU data piece.
226 *
227 * @return The size of the data header, in bytes.
228 *
229 * @internal ICU 2.8
230 */
231U_CAPI int32_t U_EXPORT2
232udata_swapDataHeader(const UDataSwapper *ds,
233 const void *inData, int32_t length, void *outData,
234 UErrorCode *pErrorCode);
235
236/**
237 * Convert one int16_t from input to platform endianness.
238 * @internal ICU 2.8
239 */
240U_CAPI int16_t U_EXPORT2
241udata_readInt16(const UDataSwapper *ds, int16_t x);
242
243/**
244 * Convert one int32_t from input to platform endianness.
245 * @internal ICU 2.8
246 */
247U_CAPI int32_t U_EXPORT2
248udata_readInt32(const UDataSwapper *ds, int32_t x);
249
250/**
251 * Swap a block of invariant, NUL-terminated strings, but not padding
252 * bytes after the last string.
253 * @internal
254 */
255U_CAPI int32_t U_EXPORT2
256udata_swapInvStringBlock(const UDataSwapper *ds,
257 const void *inData, int32_t length, void *outData,
258 UErrorCode *pErrorCode);
259
260U_CAPI void U_EXPORT2
261udata_printError(const UDataSwapper *ds,
262 const char *fmt,
263 ...);
264
265/* internal exports from putil.c -------------------------------------------- */
266
267/* declared here to keep them out of the public putil.h */
268
269/**
270 * Swap invariant char * strings ASCII->EBCDIC.
271 * @internal
272 */
273U_CAPI int32_t U_EXPORT2
274uprv_ebcdicFromAscii(const UDataSwapper *ds,
275 const void *inData, int32_t length, void *outData,
276 UErrorCode *pErrorCode);
277
278/**
279 * Copy invariant ASCII char * strings and verify they are invariant.
280 * @internal
281 */
282U_CFUNC int32_t
283uprv_copyAscii(const UDataSwapper *ds,
284 const void *inData, int32_t length, void *outData,
285 UErrorCode *pErrorCode);
286
287/**
288 * Swap invariant char * strings EBCDIC->ASCII.
289 * @internal
290 */
291U_CFUNC int32_t
292uprv_asciiFromEbcdic(const UDataSwapper *ds,
293 const void *inData, int32_t length, void *outData,
294 UErrorCode *pErrorCode);
295
296/**
297 * Copy invariant EBCDIC char * strings and verify they are invariant.
298 * @internal
299 */
300U_CFUNC int32_t
301uprv_copyEbcdic(const UDataSwapper *ds,
302 const void *inData, int32_t length, void *outData,
303 UErrorCode *pErrorCode);
304
305/**
306 * Compare ASCII invariant char * with Unicode invariant UChar *
307 * @internal
308 */
309U_CFUNC int32_t
310uprv_compareInvAscii(const UDataSwapper *ds,
311 const char *outString, int32_t outLength,
312 const UChar *localString, int32_t localLength);
313
314/**
315 * Compare EBCDIC invariant char * with Unicode invariant UChar *
316 * @internal
317 */
318U_CFUNC int32_t
319uprv_compareInvEbcdic(const UDataSwapper *ds,
320 const char *outString, int32_t outLength,
321 const UChar *localString, int32_t localLength);
322
323/**
324 * \def uprv_compareInvWithUChar
325 * Compare an invariant-character strings with a UChar string
326 * @internal
327 */
328#if U_CHARSET_FAMILY==U_ASCII_FAMILY
329# define uprv_compareInvWithUChar uprv_compareInvAscii
330#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
331# define uprv_compareInvWithUChar uprv_compareInvEbcdic
332#else
333# error Unknown charset family!
334#endif
335
336// utrie_swap.cpp -----------------------------------------------------------***
337
338/**
339 * Swaps a serialized UTrie.
340 * @internal
341 */
342U_CAPI int32_t U_EXPORT2
343utrie_swap(const UDataSwapper *ds,
344 const void *inData, int32_t length, void *outData,
345 UErrorCode *pErrorCode);
346
347/**
348 * Swaps a serialized UTrie2.
349 * @internal
350 */
351U_CAPI int32_t U_EXPORT2
352utrie2_swap(const UDataSwapper *ds,
353 const void *inData, int32_t length, void *outData,
354 UErrorCode *pErrorCode);
355
356/**
357 * Swaps a serialized UCPTrie.
358 * @internal
359 */
360U_CAPI int32_t U_EXPORT2
361ucptrie_swap(const UDataSwapper *ds,
362 const void *inData, int32_t length, void *outData,
363 UErrorCode *pErrorCode);
364
365/**
366 * Swaps a serialized UTrie, UTrie2, or UCPTrie.
367 * @internal
368 */
369U_CAPI int32_t U_EXPORT2
370utrie_swapAnyVersion(const UDataSwapper *ds,
371 const void *inData, int32_t length, void *outData,
372 UErrorCode *pErrorCode);
373
374/* material... -------------------------------------------------------------- */
375
376#if 0
377
378/* udata.h */
379
380/**
381 * Public API function in udata.c
382 *
383 * Same as udata_openChoice() but automatically swaps the data.
384 * isAcceptable, if not NULL, may accept data with endianness and charset family
385 * different from the current platform's properties.
386 * If the data is acceptable and the platform properties do not match, then
387 * the swap function is called to swap an allocated version of the data.
388 * Preflighting may or may not be performed depending on whether the size of
389 * the loaded data item is known.
390 *
391 * @param isAcceptable Same as for udata_openChoice(). May be NULL.
392 *
393 * @internal ICU 2.8
394 */
395U_CAPI UDataMemory * U_EXPORT2
396udata_openSwap(const char *path, const char *type, const char *name,
397 UDataMemoryIsAcceptable *isAcceptable, void *isAcceptableContext,
398 UDataSwapFn *swap,
399 UDataPrintError *printError, void *printErrorContext,
400 UErrorCode *pErrorCode);
401
402#endif
403
404#endif