blob: 9b7a9bd114172e285afd026f1d19df6ae0818270 [file] [log] [blame]
Victor Chang73229502020-09-17 13:39:19 +01001// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 1999-2015, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: uinvchar.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:2
14*
15* created on: 2004sep14
16* created by: Markus W. Scherer
17*
18* Definitions for handling invariant characters, moved here from putil.c
19* for better modularization.
20*/
21
22#ifndef __UINVCHAR_H__
23#define __UINVCHAR_H__
24
25#include "unicode/utypes.h"
26#ifdef __cplusplus
27#include "unicode/unistr.h"
28#endif
29
30/**
31 * Check if a char string only contains invariant characters.
32 * See utypes.h for details.
33 *
34 * @param s Input string pointer.
35 * @param length Length of the string, can be -1 if NUL-terminated.
Victor Changce4bf3c2021-01-19 16:34:24 +000036 * @return true if s contains only invariant characters.
Victor Chang73229502020-09-17 13:39:19 +010037 *
38 * @internal (ICU 2.8)
39 */
Victor Changce4bf3c2021-01-19 16:34:24 +000040U_CAPI UBool U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +010041uprv_isInvariantString(const char *s, int32_t length);
42
43/**
44 * Check if a Unicode string only contains invariant characters.
45 * See utypes.h for details.
46 *
47 * @param s Input string pointer.
48 * @param length Length of the string, can be -1 if NUL-terminated.
Victor Changce4bf3c2021-01-19 16:34:24 +000049 * @return true if s contains only invariant characters.
Victor Chang73229502020-09-17 13:39:19 +010050 *
51 * @internal (ICU 2.8)
52 */
Victor Changce4bf3c2021-01-19 16:34:24 +000053U_CAPI UBool U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +010054uprv_isInvariantUString(const UChar *s, int32_t length);
55
56/**
57 * \def U_UPPER_ORDINAL
58 * Get the ordinal number of an uppercase invariant character
59 * @internal
60 */
61#if U_CHARSET_FAMILY==U_ASCII_FAMILY
62# define U_UPPER_ORDINAL(x) ((x)-'A')
63#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
64# define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \
65 (((x) < 'S') ? ((x)-'J'+9) : \
66 ((x)-'S'+18)))
67#else
68# error Unknown charset family!
69#endif
70
71#ifdef __cplusplus
72
73U_NAMESPACE_BEGIN
74
75/**
76 * Like U_UPPER_ORDINAL(x) but with validation.
77 * Returns 0..25 for A..Z else a value outside 0..25.
78 */
79inline int32_t uprv_upperOrdinal(int32_t c) {
80#if U_CHARSET_FAMILY==U_ASCII_FAMILY
81 return c - 'A';
82#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
83 // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8).
84 // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
85 if (c <= 'I') { return c - 'A'; } // A-I --> 0-8
86 if (c < 'J') { return -1; }
87 if (c <= 'R') { return c - 'J' + 9; } // J-R --> 9..17
88 if (c < 'S') { return -1; }
89 return c - 'S' + 18; // S-Z --> 18..25
90#else
91# error Unknown charset family!
92#endif
93}
94
95// Like U_UPPER_ORDINAL(x) but for lowercase and with validation.
96// Returns 0..25 for a..z else a value outside 0..25.
97inline int32_t uprv_lowerOrdinal(int32_t c) {
98#if U_CHARSET_FAMILY==U_ASCII_FAMILY
99 return c - 'a';
100#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
101 // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8).
102 // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
103 if (c <= 'i') { return c - 'a'; } // a-i --> 0-8
104 if (c < 'j') { return -1; }
105 if (c <= 'r') { return c - 'j' + 9; } // j-r --> 9..17
106 if (c < 's') { return -1; }
107 return c - 's' + 18; // s-z --> 18..25
108#else
109# error Unknown charset family!
110#endif
111}
112
113U_NAMESPACE_END
114
115#endif
116
117/**
118 * Returns true if c == '@' is possible.
119 * The @ sign is variant, and the @ sign used on one
120 * EBCDIC machine won't be compiled the same way on other EBCDIC based machines.
121 * @internal
122 */
123U_CFUNC UBool
124uprv_isEbcdicAtSign(char c);
125
126/**
127 * \def uprv_isAtSign
128 * Returns true if c == '@' is possible.
129 * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign().
130 * @internal
131 */
132#if U_CHARSET_FAMILY==U_ASCII_FAMILY
133# define uprv_isAtSign(c) ((c)=='@')
134#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
135# define uprv_isAtSign(c) uprv_isEbcdicAtSign(c)
136#else
137# error Unknown charset family!
138#endif
139
140/**
141 * Compare two EBCDIC invariant-character strings in ASCII order.
142 * @internal
143 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000144U_CAPI int32_t U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100145uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2);
146
147/**
148 * \def uprv_compareInvCharsAsAscii
149 * Compare two invariant-character strings in ASCII order.
150 * @internal
151 */
152#if U_CHARSET_FAMILY==U_ASCII_FAMILY
153# define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2)
154#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
155# define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2)
156#else
157# error Unknown charset family!
158#endif
159
160/**
161 * Converts an EBCDIC invariant character to ASCII.
162 * @internal
163 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000164U_CAPI char U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100165uprv_ebcdicToAscii(char c);
166
167/**
168 * \def uprv_invCharToAscii
169 * Converts an invariant character to ASCII.
170 * @internal
171 */
172#if U_CHARSET_FAMILY==U_ASCII_FAMILY
173# define uprv_invCharToAscii(c) (c)
174#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
175# define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c)
176#else
177# error Unknown charset family!
178#endif
179
180/**
181 * Converts an EBCDIC invariant character to lowercase ASCII.
182 * @internal
183 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000184U_CAPI char U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100185uprv_ebcdicToLowercaseAscii(char c);
186
187/**
188 * \def uprv_invCharToLowercaseAscii
189 * Converts an invariant character to lowercase ASCII.
190 * @internal
191 */
192#if U_CHARSET_FAMILY==U_ASCII_FAMILY
193# define uprv_invCharToLowercaseAscii uprv_asciitolower
194#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
195# define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii
196#else
197# error Unknown charset family!
198#endif
199
200/**
201 * Copy EBCDIC to ASCII
202 * @internal
203 * @see uprv_strncpy
204 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000205U_CAPI uint8_t* U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100206uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
207
208
209/**
210 * Copy ASCII to EBCDIC
211 * @internal
212 * @see uprv_strncpy
213 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000214U_CAPI uint8_t* U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100215uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
216
217
218
219#endif