blob: e8bd52c6ae39d77ed83473948acead684f660bd4 [file] [log] [blame]
Victor Chang73229502020-09-17 13:39:19 +01001// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 2014, International Business Machines
6* Corporation and others. All Rights Reserved.
7*******************************************************************************
8* norm2allmodes.h
9*
10* created on: 2014sep07
11* created by: Markus W. Scherer
12*/
13
14#ifndef __NORM2ALLMODES_H__
15#define __NORM2ALLMODES_H__
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_NORMALIZATION
20
21#include "unicode/edits.h"
22#include "unicode/normalizer2.h"
23#include "unicode/stringoptions.h"
24#include "unicode/unistr.h"
25#include "cpputils.h"
26#include "normalizer2impl.h"
27
28U_NAMESPACE_BEGIN
29
30// Intermediate class:
31// Has Normalizer2Impl and does boilerplate argument checking and setup.
32class Normalizer2WithImpl : public Normalizer2 {
33public:
34 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
35 virtual ~Normalizer2WithImpl();
36
37 // normalize
38 virtual UnicodeString &
39 normalize(const UnicodeString &src,
40 UnicodeString &dest,
41 UErrorCode &errorCode) const {
42 if(U_FAILURE(errorCode)) {
43 dest.setToBogus();
44 return dest;
45 }
46 const UChar *sArray=src.getBuffer();
47 if(&dest==&src || sArray==NULL) {
48 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
49 dest.setToBogus();
50 return dest;
51 }
52 dest.remove();
53 ReorderingBuffer buffer(impl, dest);
54 if(buffer.init(src.length(), errorCode)) {
55 normalize(sArray, sArray+src.length(), buffer, errorCode);
56 }
57 return dest;
58 }
59 virtual void
60 normalize(const UChar *src, const UChar *limit,
61 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
62
63 // normalize and append
64 virtual UnicodeString &
65 normalizeSecondAndAppend(UnicodeString &first,
66 const UnicodeString &second,
67 UErrorCode &errorCode) const {
Victor Changce4bf3c2021-01-19 16:34:24 +000068 return normalizeSecondAndAppend(first, second, true, errorCode);
Victor Chang73229502020-09-17 13:39:19 +010069 }
70 virtual UnicodeString &
71 append(UnicodeString &first,
72 const UnicodeString &second,
73 UErrorCode &errorCode) const {
Victor Changce4bf3c2021-01-19 16:34:24 +000074 return normalizeSecondAndAppend(first, second, false, errorCode);
Victor Chang73229502020-09-17 13:39:19 +010075 }
76 UnicodeString &
77 normalizeSecondAndAppend(UnicodeString &first,
78 const UnicodeString &second,
79 UBool doNormalize,
80 UErrorCode &errorCode) const {
81 uprv_checkCanGetBuffer(first, errorCode);
82 if(U_FAILURE(errorCode)) {
83 return first;
84 }
85 const UChar *secondArray=second.getBuffer();
86 if(&first==&second || secondArray==NULL) {
87 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
88 return first;
89 }
90 int32_t firstLength=first.length();
91 UnicodeString safeMiddle;
92 {
93 ReorderingBuffer buffer(impl, first);
94 if(buffer.init(firstLength+second.length(), errorCode)) {
95 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
96 safeMiddle, buffer, errorCode);
97 }
98 } // The ReorderingBuffer destructor finalizes the first string.
99 if(U_FAILURE(errorCode)) {
100 // Restore the modified suffix of the first string.
101 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
102 }
103 return first;
104 }
105 virtual void
106 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
107 UnicodeString &safeMiddle,
108 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
109 virtual UBool
110 getDecomposition(UChar32 c, UnicodeString &decomposition) const {
111 UChar buffer[4];
112 int32_t length;
113 const UChar *d=impl.getDecomposition(c, buffer, length);
114 if(d==NULL) {
Victor Changce4bf3c2021-01-19 16:34:24 +0000115 return false;
Victor Chang73229502020-09-17 13:39:19 +0100116 }
117 if(d==buffer) {
118 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
119 } else {
Victor Changce4bf3c2021-01-19 16:34:24 +0000120 decomposition.setTo(false, d, length); // read-only alias
Victor Chang73229502020-09-17 13:39:19 +0100121 }
Victor Changce4bf3c2021-01-19 16:34:24 +0000122 return true;
Victor Chang73229502020-09-17 13:39:19 +0100123 }
124 virtual UBool
125 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
126 UChar buffer[30];
127 int32_t length;
128 const UChar *d=impl.getRawDecomposition(c, buffer, length);
129 if(d==NULL) {
Victor Changce4bf3c2021-01-19 16:34:24 +0000130 return false;
Victor Chang73229502020-09-17 13:39:19 +0100131 }
132 if(d==buffer) {
133 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
134 } else {
Victor Changce4bf3c2021-01-19 16:34:24 +0000135 decomposition.setTo(false, d, length); // read-only alias
Victor Chang73229502020-09-17 13:39:19 +0100136 }
Victor Changce4bf3c2021-01-19 16:34:24 +0000137 return true;
Victor Chang73229502020-09-17 13:39:19 +0100138 }
139 virtual UChar32
140 composePair(UChar32 a, UChar32 b) const {
141 return impl.composePair(a, b);
142 }
143
144 virtual uint8_t
145 getCombiningClass(UChar32 c) const {
146 return impl.getCC(impl.getNorm16(c));
147 }
148
149 // quick checks
150 virtual UBool
151 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
152 if(U_FAILURE(errorCode)) {
Victor Changce4bf3c2021-01-19 16:34:24 +0000153 return false;
Victor Chang73229502020-09-17 13:39:19 +0100154 }
155 const UChar *sArray=s.getBuffer();
156 if(sArray==NULL) {
157 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
Victor Changce4bf3c2021-01-19 16:34:24 +0000158 return false;
Victor Chang73229502020-09-17 13:39:19 +0100159 }
160 const UChar *sLimit=sArray+s.length();
161 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
162 }
163 virtual UNormalizationCheckResult
164 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
165 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
166 }
167 virtual int32_t
168 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
169 if(U_FAILURE(errorCode)) {
170 return 0;
171 }
172 const UChar *sArray=s.getBuffer();
173 if(sArray==NULL) {
174 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
175 return 0;
176 }
177 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
178 }
179 virtual const UChar *
180 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
181
182 virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
183 return UNORM_YES;
184 }
185
186 const Normalizer2Impl &impl;
187};
188
189class DecomposeNormalizer2 : public Normalizer2WithImpl {
190public:
191 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
192 virtual ~DecomposeNormalizer2();
193
194private:
195 virtual void
196 normalize(const UChar *src, const UChar *limit,
197 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
198 impl.decompose(src, limit, &buffer, errorCode);
199 }
200 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
201 virtual void
202 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
203 UnicodeString &safeMiddle,
204 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
205 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
206 }
207 virtual const UChar *
208 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
209 return impl.decompose(src, limit, NULL, errorCode);
210 }
211 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
212 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
213 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
214 }
215 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); }
216 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); }
217 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
218};
219
220class ComposeNormalizer2 : public Normalizer2WithImpl {
221public:
222 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
223 Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
224 virtual ~ComposeNormalizer2();
225
226private:
227 virtual void
228 normalize(const UChar *src, const UChar *limit,
229 ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
Victor Changce4bf3c2021-01-19 16:34:24 +0000230 impl.compose(src, limit, onlyContiguous, true, buffer, errorCode);
Victor Chang73229502020-09-17 13:39:19 +0100231 }
232 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
233
234 void
235 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
236 Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
237 if (U_FAILURE(errorCode)) {
238 return;
239 }
240 if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
241 edits->reset();
242 }
243 const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
244 impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
245 &sink, edits, errorCode);
246 sink.Flush();
247 }
248
249 virtual void
250 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
251 UnicodeString &safeMiddle,
252 ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
253 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
254 }
255
256 virtual UBool
257 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
258 if(U_FAILURE(errorCode)) {
Victor Changce4bf3c2021-01-19 16:34:24 +0000259 return false;
Victor Chang73229502020-09-17 13:39:19 +0100260 }
261 const UChar *sArray=s.getBuffer();
262 if(sArray==NULL) {
263 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
Victor Changce4bf3c2021-01-19 16:34:24 +0000264 return false;
Victor Chang73229502020-09-17 13:39:19 +0100265 }
266 UnicodeString temp;
267 ReorderingBuffer buffer(impl, temp);
268 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
Victor Changce4bf3c2021-01-19 16:34:24 +0000269 return false;
Victor Chang73229502020-09-17 13:39:19 +0100270 }
Victor Changce4bf3c2021-01-19 16:34:24 +0000271 return impl.compose(sArray, sArray+s.length(), onlyContiguous, false, buffer, errorCode);
Victor Chang73229502020-09-17 13:39:19 +0100272 }
273 virtual UBool
274 isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
275 if(U_FAILURE(errorCode)) {
Victor Changce4bf3c2021-01-19 16:34:24 +0000276 return false;
Victor Chang73229502020-09-17 13:39:19 +0100277 }
278 const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
279 return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
280 }
281 virtual UNormalizationCheckResult
282 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
283 if(U_FAILURE(errorCode)) {
284 return UNORM_MAYBE;
285 }
286 const UChar *sArray=s.getBuffer();
287 if(sArray==NULL) {
288 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
289 return UNORM_MAYBE;
290 }
291 UNormalizationCheckResult qcResult=UNORM_YES;
292 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
293 return qcResult;
294 }
295 virtual const UChar *
296 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE {
297 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
298 }
299 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
300 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
301 return impl.getCompQuickCheck(impl.getNorm16(c));
302 }
303 virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
304 return impl.hasCompBoundaryBefore(c);
305 }
306 virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
307 return impl.hasCompBoundaryAfter(c, onlyContiguous);
308 }
309 virtual UBool isInert(UChar32 c) const U_OVERRIDE {
310 return impl.isCompInert(c, onlyContiguous);
311 }
312
313 const UBool onlyContiguous;
314};
315
316class FCDNormalizer2 : public Normalizer2WithImpl {
317public:
318 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
319 virtual ~FCDNormalizer2();
320
321private:
322 virtual void
323 normalize(const UChar *src, const UChar *limit,
324 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
325 impl.makeFCD(src, limit, &buffer, errorCode);
326 }
327 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
328 virtual void
329 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
330 UnicodeString &safeMiddle,
331 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
332 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
333 }
334 virtual const UChar *
335 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
336 return impl.makeFCD(src, limit, NULL, errorCode);
337 }
338 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
339 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
340 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
341 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
342};
343
344struct Norm2AllModes : public UMemory {
345 Norm2AllModes(Normalizer2Impl *i)
Victor Changce4bf3c2021-01-19 16:34:24 +0000346 : impl(i), comp(*i, false), decomp(*i), fcd(*i), fcc(*i, true) {}
Victor Chang73229502020-09-17 13:39:19 +0100347 ~Norm2AllModes();
348
349 static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
350 static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
351 static Norm2AllModes *createInstance(const char *packageName,
352 const char *name,
353 UErrorCode &errorCode);
354
355 static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
356 static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
357 static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
358
359 Normalizer2Impl *impl;
360 ComposeNormalizer2 comp;
361 DecomposeNormalizer2 decomp;
362 FCDNormalizer2 fcd;
363 ComposeNormalizer2 fcc;
364};
365
366U_NAMESPACE_END
367
368#endif // !UCONFIG_NO_NORMALIZATION
369#endif // __NORM2ALLMODES_H__