blob: 385102aadb0ddbd665f8363ab1a1f1cf3aaee94a [file] [log] [blame]
Hal Canaryf107a2f2018-07-25 16:52:48 -04001// Copyright 2018 Google LLC.
2// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
3#ifndef SkUTF_DEFINED
4#define SkUTF_DEFINED
5
6#include <cstddef>
7#include <cstdint>
8
9typedef int32_t SkUnichar;
10
11namespace SkUTF {
12
13/** Given a sequence of UTF-8 bytes, return the number of unicode codepoints.
14 If the sequence is invalid UTF-8, return -1.
15*/
16int CountUTF8(const char* utf8, size_t byteLength);
17
18/** Given a sequence of aligned UTF-16 characters in machine-endian form,
19 return the number of unicode codepoints. If the sequence is invalid
20 UTF-16, return -1.
21*/
22int CountUTF16(const uint16_t* utf16, size_t byteLength);
23
24/** Given a sequence of aligned UTF-32 characters in machine-endian form,
25 return the number of unicode codepoints. If the sequence is invalid
26 UTF-32, return -1.
27*/
28int CountUTF32(const int32_t* utf32, size_t byteLength);
29
30/** Given a sequence of UTF-8 bytes, return the first unicode codepoint.
31 The pointer will be incremented to point at the next codepoint's start. If
32 invalid UTF-8 is encountered, set *ptr to end and return -1.
33*/
34SkUnichar NextUTF8(const char** ptr, const char* end);
35
36/** Given a sequence of aligned UTF-16 characters in machine-endian form,
37 return the first unicode codepoint. The pointer will be incremented to
38 point at the next codepoint's start. If invalid UTF-16 is encountered,
39 set *ptr to end and return -1.
40*/
41SkUnichar NextUTF16(const uint16_t** ptr, const uint16_t* end);
42
43/** Given a sequence of aligned UTF-32 characters in machine-endian form,
44 return the first unicode codepoint. The pointer will be incremented to
45 point at the next codepoint's start. If invalid UTF-32 is encountered,
46 set *ptr to end and return -1.
47*/
48SkUnichar NextUTF32(const int32_t** ptr, const int32_t* end);
49
50constexpr unsigned kMaxBytesInUTF8Sequence = 4;
51
52/** Convert the unicode codepoint into UTF-8. If `utf8` is non-null, place the
53 result in that array. Return the number of bytes in the result. If `utf8`
54 is null, simply return the number of bytes that would be used. For invalid
55 unicode codepoints, return 0.
56*/
57size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr);
58
59/** Convert the unicode codepoint into UTF-16. If `utf16` is non-null, place
60 the result in that array. Return the number of UTF-16 code units in the
61 result (1 or 2). If `utf16` is null, simply return the number of code
62 units that would be used. For invalid unicode codepoints, return 0.
63*/
64size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr);
65
66} // namespace SkUTF
67
68#endif // SkUTF_DEFINED