blob: 23344a47efdbc799597b0978c50d83428230acc1 [file] [log] [blame]
Hal Canaryf107a2f2018-07-25 16:52:48 -04001// Copyright 2018 Google LLC.
2// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
3#ifndef SkUTF_DEFINED
4#define SkUTF_DEFINED
5
Brian Osman417d2992019-11-18 17:19:42 -05006#include "include/core/SkTypes.h"
Hal Canaryf107a2f2018-07-25 16:52:48 -04007#include <cstddef>
8#include <cstdint>
Julia Lavrovab6b7fff2020-09-11 13:59:49 +00009#include <memory>
Hal Canaryf107a2f2018-07-25 16:52:48 -040010
11typedef int32_t SkUnichar;
12
13namespace SkUTF {
14
15/** Given a sequence of UTF-8 bytes, return the number of unicode codepoints.
16 If the sequence is invalid UTF-8, return -1.
17*/
Brian Osman417d2992019-11-18 17:19:42 -050018SK_SPI int CountUTF8(const char* utf8, size_t byteLength);
Hal Canaryf107a2f2018-07-25 16:52:48 -040019
20/** Given a sequence of aligned UTF-16 characters in machine-endian form,
21 return the number of unicode codepoints. If the sequence is invalid
22 UTF-16, return -1.
23*/
Brian Osman417d2992019-11-18 17:19:42 -050024SK_SPI int CountUTF16(const uint16_t* utf16, size_t byteLength);
Hal Canaryf107a2f2018-07-25 16:52:48 -040025
26/** Given a sequence of aligned UTF-32 characters in machine-endian form,
27 return the number of unicode codepoints. If the sequence is invalid
28 UTF-32, return -1.
29*/
Brian Osman417d2992019-11-18 17:19:42 -050030SK_SPI int CountUTF32(const int32_t* utf32, size_t byteLength);
Hal Canaryf107a2f2018-07-25 16:52:48 -040031
32/** Given a sequence of UTF-8 bytes, return the first unicode codepoint.
33 The pointer will be incremented to point at the next codepoint's start. If
34 invalid UTF-8 is encountered, set *ptr to end and return -1.
35*/
Brian Osman417d2992019-11-18 17:19:42 -050036SK_SPI SkUnichar NextUTF8(const char** ptr, const char* end);
Hal Canaryf107a2f2018-07-25 16:52:48 -040037
38/** Given a sequence of aligned UTF-16 characters in machine-endian form,
39 return the first unicode codepoint. The pointer will be incremented to
40 point at the next codepoint's start. If invalid UTF-16 is encountered,
41 set *ptr to end and return -1.
42*/
Brian Osman417d2992019-11-18 17:19:42 -050043SK_SPI SkUnichar NextUTF16(const uint16_t** ptr, const uint16_t* end);
Hal Canaryf107a2f2018-07-25 16:52:48 -040044
45/** Given a sequence of aligned UTF-32 characters in machine-endian form,
46 return the first unicode codepoint. The pointer will be incremented to
47 point at the next codepoint's start. If invalid UTF-32 is encountered,
48 set *ptr to end and return -1.
49*/
Brian Osman417d2992019-11-18 17:19:42 -050050SK_SPI SkUnichar NextUTF32(const int32_t** ptr, const int32_t* end);
Hal Canaryf107a2f2018-07-25 16:52:48 -040051
52constexpr unsigned kMaxBytesInUTF8Sequence = 4;
53
54/** Convert the unicode codepoint into UTF-8. If `utf8` is non-null, place the
55 result in that array. Return the number of bytes in the result. If `utf8`
56 is null, simply return the number of bytes that would be used. For invalid
57 unicode codepoints, return 0.
58*/
Brian Osman417d2992019-11-18 17:19:42 -050059SK_SPI size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr);
Hal Canaryf107a2f2018-07-25 16:52:48 -040060
61/** Convert the unicode codepoint into UTF-16. If `utf16` is non-null, place
62 the result in that array. Return the number of UTF-16 code units in the
63 result (1 or 2). If `utf16` is null, simply return the number of code
64 units that would be used. For invalid unicode codepoints, return 0.
65*/
Brian Osman417d2992019-11-18 17:19:42 -050066SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr);
Hal Canaryf107a2f2018-07-25 16:52:48 -040067
Julia Lavrova90787fe2020-07-20 17:32:03 +000068/** Returns the number of resulting UTF16 values needed to convert the src utf8 sequence.
69 * If dst is not null, it is filled with the corresponding values up to its capacity.
70 * If there is an error, -1 is returned and the dst[] buffer is undefined.
71 */
72SK_SPI int UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength);
73
Julia Lavrovab6b7fff2020-09-11 13:59:49 +000074/** Returns the number of resulting UTF8 values needed to convert the src utf16 sequence.
75 * If dst is not null, it is filled with the corresponding values up to its capacity.
76 * If there is an error, -1 is returned and the dst[] buffer is undefined.
77 */
78SK_SPI int UTF16ToUTF8(char dst[], int dstCapacity, const uint16_t src[], size_t srcLength);
79
Hal Canaryf107a2f2018-07-25 16:52:48 -040080} // namespace SkUTF
81
82#endif // SkUTF_DEFINED