blob: b23e6419f2816328908f6b385d32d5f58e9bc762 [file] [log] [blame]
Sergey Mashkov0b12ffc2017-07-26 12:59:16 +03001package kotlinx.coroutines.experimental.io.internal
2
Sergey Mashkov6d059932017-10-06 18:46:23 +03003import kotlinx.io.core.*
4import java.nio.*
5import java.nio.charset.*
Sergey Mashkov0b12ffc2017-07-26 12:59:16 +03006
7/**
8 * Decodes all the bytes to ASCII characters until end of buffer applying every character to [consumer]
9 * It stops processing if a non-ascii character encountered and returns `false`
10 * @return `false` if a non-ascii character encountered or `true` if all bytes were processed
11 */
12internal inline fun ByteBuffer.decodeASCII(consumer: (Char) -> Boolean): Boolean {
13 while (hasRemaining()) {
14 val v = get().toInt() and 0xff
15 if (v and 0x80 != 0 || !consumer(v.toChar())) {
16 position(position() - 1)
17 return false
18 }
19 }
20
21 return true
22}
23
24/**
25 * Decodes all the bytes to utf8 applying every character on [consumer] until or consumer return `false`.
26 * If a consumer returned false then a character will be pushed back (including all surrogates will be pushed back as well)
27 * and [decodeUTF8] returns 0
28 * @return number of bytes required to decode incomplete utf8 character or 0 if all bytes were processed
Sergey Mashkova9a936d2017-08-23 14:39:41 +030029 * or -1 if consumer rejected loop
Sergey Mashkov0b12ffc2017-07-26 12:59:16 +030030 */
31internal inline fun ByteBuffer.decodeUTF8(consumer: (Char) -> Boolean): Int {
32 var byteCount = 0
33 var value = 0
34 var lastByteCount = 0
35
36 while (hasRemaining()) {
37 val v = get().toInt() and 0xff
38 when {
39 v and 0x80 == 0 -> {
40 if (byteCount != 0) throw MalformedInputException(0)
41 if (!consumer(v.toChar())) {
42 position(position() - 1)
43 return -1
44 }
45 }
46 byteCount == 0 -> {
47 // first unicode byte
48
49 var mask = 0x80
50 value = v
51
52 for (i in 1..6) { // TODO do we support 6 bytes unicode?
53 if (value and mask != 0) {
54 value = value and mask.inv()
55 mask = mask shr 1
56 byteCount++
57 } else {
58 break
59 }
60 }
61
62 lastByteCount = byteCount
63 byteCount--
64
65 if (byteCount > remaining()) {
66 position(position() - 1) // return one byte back
67 return lastByteCount
68 }
69 }
70 else -> {
71 // trailing unicode byte
72 value = (value shl 6) or (v and 0x7f)
73 byteCount--
74
75 if (byteCount == 0) {
76 if (isBmpCodePoint(value)) {
77 if (!consumer(value.toChar())) {
78 position(position() - lastByteCount)
79 return -1
80 }
81 } else if (!isValidCodePoint(value)) {
82 throw IllegalArgumentException("Malformed code-point ${Integer.toHexString(value)} found")
83 } else {
84 if (!consumer(highSurrogate(value).toChar()) ||
85 !consumer(lowSurrogate(value).toChar())) {
86 position(position() - lastByteCount)
87 return -1
88 }
89 }
90
91 value = 0
92 }
93 }
94 }
95 }
96
97 return 0
98}
99
Sergey Mashkov0b12ffc2017-07-26 12:59:16 +0300100private const val MaxCodePoint = 0X10ffff
101private const val MinLowSurrogate = 0xdc00
102private const val MinHighSurrogate = 0xd800
103private const val MinSupplementary = 0x10000
104private const val HighSurrogateMagic = MinHighSurrogate - (MinSupplementary ushr 10)
105
106private fun isBmpCodePoint(cp: Int) = cp ushr 16 == 0
107private fun isValidCodePoint(codePoint: Int) = codePoint <= MaxCodePoint
108private fun lowSurrogate(cp: Int) = (cp and 0x3ff) + MinLowSurrogate
109private fun highSurrogate(cp: Int) = (cp ushr 10) + HighSurrogateMagic