Hal Canary | f107a2f | 2018-07-25 16:52:48 -0400 | [diff] [blame] | 1 | // Copyright 2018 Google LLC. |
| 2 | // Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. |
| 3 | #ifndef SkUTF_DEFINED |
| 4 | #define SkUTF_DEFINED |
| 5 | |
Brian Osman | 417d299 | 2019-11-18 17:19:42 -0500 | [diff] [blame] | 6 | #include "include/core/SkTypes.h" |
Hal Canary | f107a2f | 2018-07-25 16:52:48 -0400 | [diff] [blame] | 7 | #include <cstddef> |
| 8 | #include <cstdint> |
Julia Lavrova | b6b7fff | 2020-09-11 13:59:49 +0000 | [diff] [blame] | 9 | #include <memory> |
Hal Canary | f107a2f | 2018-07-25 16:52:48 -0400 | [diff] [blame] | 10 | |
| 11 | typedef int32_t SkUnichar; |
| 12 | |
| 13 | namespace SkUTF { |
| 14 | |
| 15 | /** Given a sequence of UTF-8 bytes, return the number of unicode codepoints. |
| 16 | If the sequence is invalid UTF-8, return -1. |
| 17 | */ |
Brian Osman | 417d299 | 2019-11-18 17:19:42 -0500 | [diff] [blame] | 18 | SK_SPI int CountUTF8(const char* utf8, size_t byteLength); |
Hal Canary | f107a2f | 2018-07-25 16:52:48 -0400 | [diff] [blame] | 19 | |
| 20 | /** Given a sequence of aligned UTF-16 characters in machine-endian form, |
| 21 | return the number of unicode codepoints. If the sequence is invalid |
| 22 | UTF-16, return -1. |
| 23 | */ |
Brian Osman | 417d299 | 2019-11-18 17:19:42 -0500 | [diff] [blame] | 24 | SK_SPI int CountUTF16(const uint16_t* utf16, size_t byteLength); |
Hal Canary | f107a2f | 2018-07-25 16:52:48 -0400 | [diff] [blame] | 25 | |
| 26 | /** Given a sequence of aligned UTF-32 characters in machine-endian form, |
| 27 | return the number of unicode codepoints. If the sequence is invalid |
| 28 | UTF-32, return -1. |
| 29 | */ |
Brian Osman | 417d299 | 2019-11-18 17:19:42 -0500 | [diff] [blame] | 30 | SK_SPI int CountUTF32(const int32_t* utf32, size_t byteLength); |
Hal Canary | f107a2f | 2018-07-25 16:52:48 -0400 | [diff] [blame] | 31 | |
| 32 | /** Given a sequence of UTF-8 bytes, return the first unicode codepoint. |
| 33 | The pointer will be incremented to point at the next codepoint's start. If |
| 34 | invalid UTF-8 is encountered, set *ptr to end and return -1. |
| 35 | */ |
Brian Osman | 417d299 | 2019-11-18 17:19:42 -0500 | [diff] [blame] | 36 | SK_SPI SkUnichar NextUTF8(const char** ptr, const char* end); |
Hal Canary | f107a2f | 2018-07-25 16:52:48 -0400 | [diff] [blame] | 37 | |
| 38 | /** Given a sequence of aligned UTF-16 characters in machine-endian form, |
| 39 | return the first unicode codepoint. The pointer will be incremented to |
| 40 | point at the next codepoint's start. If invalid UTF-16 is encountered, |
| 41 | set *ptr to end and return -1. |
| 42 | */ |
Brian Osman | 417d299 | 2019-11-18 17:19:42 -0500 | [diff] [blame] | 43 | SK_SPI SkUnichar NextUTF16(const uint16_t** ptr, const uint16_t* end); |
Hal Canary | f107a2f | 2018-07-25 16:52:48 -0400 | [diff] [blame] | 44 | |
| 45 | /** Given a sequence of aligned UTF-32 characters in machine-endian form, |
| 46 | return the first unicode codepoint. The pointer will be incremented to |
| 47 | point at the next codepoint's start. If invalid UTF-32 is encountered, |
| 48 | set *ptr to end and return -1. |
| 49 | */ |
Brian Osman | 417d299 | 2019-11-18 17:19:42 -0500 | [diff] [blame] | 50 | SK_SPI SkUnichar NextUTF32(const int32_t** ptr, const int32_t* end); |
Hal Canary | f107a2f | 2018-07-25 16:52:48 -0400 | [diff] [blame] | 51 | |
| 52 | constexpr unsigned kMaxBytesInUTF8Sequence = 4; |
| 53 | |
| 54 | /** Convert the unicode codepoint into UTF-8. If `utf8` is non-null, place the |
| 55 | result in that array. Return the number of bytes in the result. If `utf8` |
| 56 | is null, simply return the number of bytes that would be used. For invalid |
| 57 | unicode codepoints, return 0. |
| 58 | */ |
Brian Osman | 417d299 | 2019-11-18 17:19:42 -0500 | [diff] [blame] | 59 | SK_SPI size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr); |
Hal Canary | f107a2f | 2018-07-25 16:52:48 -0400 | [diff] [blame] | 60 | |
| 61 | /** Convert the unicode codepoint into UTF-16. If `utf16` is non-null, place |
| 62 | the result in that array. Return the number of UTF-16 code units in the |
| 63 | result (1 or 2). If `utf16` is null, simply return the number of code |
| 64 | units that would be used. For invalid unicode codepoints, return 0. |
| 65 | */ |
Brian Osman | 417d299 | 2019-11-18 17:19:42 -0500 | [diff] [blame] | 66 | SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr); |
Hal Canary | f107a2f | 2018-07-25 16:52:48 -0400 | [diff] [blame] | 67 | |
Julia Lavrova | 90787fe | 2020-07-20 17:32:03 +0000 | [diff] [blame] | 68 | /** Returns the number of resulting UTF16 values needed to convert the src utf8 sequence. |
| 69 | * If dst is not null, it is filled with the corresponding values up to its capacity. |
| 70 | * If there is an error, -1 is returned and the dst[] buffer is undefined. |
| 71 | */ |
| 72 | SK_SPI int UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength); |
| 73 | |
Julia Lavrova | b6b7fff | 2020-09-11 13:59:49 +0000 | [diff] [blame] | 74 | /** Returns the number of resulting UTF8 values needed to convert the src utf16 sequence. |
| 75 | * If dst is not null, it is filled with the corresponding values up to its capacity. |
| 76 | * If there is an error, -1 is returned and the dst[] buffer is undefined. |
| 77 | */ |
| 78 | SK_SPI int UTF16ToUTF8(char dst[], int dstCapacity, const uint16_t src[], size_t srcLength); |
| 79 | |
Hal Canary | f107a2f | 2018-07-25 16:52:48 -0400 | [diff] [blame] | 80 | } // namespace SkUTF |
| 81 | |
| 82 | #endif // SkUTF_DEFINED |