blob: 0730ffa6f197b924654ce13c36a01be07e671ac6 [file] [log] [blame]
Joe Tsai879b18d2018-08-03 17:22:24 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8 "io"
9 "math/bits"
10 "strconv"
11 "unicode"
12 "unicode/utf16"
13 "unicode/utf8"
14
Damien Neile89e6242019-05-13 23:55:40 -070015 "google.golang.org/protobuf/internal/errors"
Joe Tsai36dc22d2019-07-09 23:20:27 -070016 "google.golang.org/protobuf/internal/strs"
Joe Tsai879b18d2018-08-03 17:22:24 -070017)
18
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080019func appendString(out []byte, in string) ([]byte, error) {
Joe Tsai879b18d2018-08-03 17:22:24 -070020 out = append(out, '"')
Herbie Ong1e096912019-04-16 00:14:03 -070021 i := indexNeedEscapeInString(in)
Joe Tsai879b18d2018-08-03 17:22:24 -070022 in, out = in[i:], append(out, in[:i]...)
23 for len(in) > 0 {
24 switch r, n := utf8.DecodeRuneInString(in); {
25 case r == utf8.RuneError && n == 1:
Damien Neil8c86fc52019-06-19 09:28:29 -070026 return out, errors.InvalidUTF8("")
Joe Tsai879b18d2018-08-03 17:22:24 -070027 case r < ' ' || r == '"' || r == '\\':
28 out = append(out, '\\')
29 switch r {
30 case '"', '\\':
31 out = append(out, byte(r))
32 case '\b':
33 out = append(out, 'b')
34 case '\f':
35 out = append(out, 'f')
36 case '\n':
37 out = append(out, 'n')
38 case '\r':
39 out = append(out, 'r')
40 case '\t':
41 out = append(out, 't')
42 default:
43 out = append(out, 'u')
44 out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
45 out = strconv.AppendUint(out, uint64(r), 16)
46 }
47 in = in[n:]
48 default:
Herbie Ong1e096912019-04-16 00:14:03 -070049 i := indexNeedEscapeInString(in[n:])
Joe Tsai879b18d2018-08-03 17:22:24 -070050 in, out = in[n+i:], append(out, in[:n+i]...)
51 }
52 }
53 out = append(out, '"')
Damien Neil8c86fc52019-06-19 09:28:29 -070054 return out, nil
Joe Tsai879b18d2018-08-03 17:22:24 -070055}
56
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080057func (d *Decoder) parseString(in []byte) (string, int, error) {
Joe Tsai879b18d2018-08-03 17:22:24 -070058 in0 := in
59 if len(in) == 0 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080060 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -070061 }
62 if in[0] != '"' {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080063 return "", 0, d.newSyntaxError("invalid character %q at start of string", in[0])
Joe Tsai879b18d2018-08-03 17:22:24 -070064 }
65 in = in[1:]
Herbie Ong1e096912019-04-16 00:14:03 -070066 i := indexNeedEscapeInBytes(in)
Joe Tsai879b18d2018-08-03 17:22:24 -070067 in, out := in[i:], in[:i:i] // set cap to prevent mutations
68 for len(in) > 0 {
69 switch r, n := utf8.DecodeRune(in); {
70 case r == utf8.RuneError && n == 1:
Damien Neil8c86fc52019-06-19 09:28:29 -070071 return "", 0, d.newSyntaxError("invalid UTF-8 in string")
Joe Tsai879b18d2018-08-03 17:22:24 -070072 case r < ' ':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080073 return "", 0, d.newSyntaxError("invalid character %q in string", r)
Joe Tsai879b18d2018-08-03 17:22:24 -070074 case r == '"':
75 in = in[1:]
76 n := len(in0) - len(in)
Damien Neil8c86fc52019-06-19 09:28:29 -070077 return string(out), n, nil
Joe Tsai879b18d2018-08-03 17:22:24 -070078 case r == '\\':
79 if len(in) < 2 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080080 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -070081 }
82 switch r := in[1]; r {
83 case '"', '\\', '/':
84 in, out = in[2:], append(out, r)
85 case 'b':
86 in, out = in[2:], append(out, '\b')
87 case 'f':
88 in, out = in[2:], append(out, '\f')
89 case 'n':
90 in, out = in[2:], append(out, '\n')
91 case 'r':
92 in, out = in[2:], append(out, '\r')
93 case 't':
94 in, out = in[2:], append(out, '\t')
95 case 'u':
96 if len(in) < 6 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080097 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -070098 }
99 v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
100 if err != nil {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800101 return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:6])
Joe Tsai879b18d2018-08-03 17:22:24 -0700102 }
103 in = in[6:]
104
105 r := rune(v)
106 if utf16.IsSurrogate(r) {
107 if len(in) < 6 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800108 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -0700109 }
110 v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
111 r = utf16.DecodeRune(r, rune(v))
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800112 if in[0] != '\\' || in[1] != 'u' ||
113 r == unicode.ReplacementChar || err != nil {
114 return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:6])
Joe Tsai879b18d2018-08-03 17:22:24 -0700115 }
116 in = in[6:]
117 }
118 out = append(out, string(r)...)
119 default:
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800120 return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:2])
Joe Tsai879b18d2018-08-03 17:22:24 -0700121 }
122 default:
Herbie Ong1e096912019-04-16 00:14:03 -0700123 i := indexNeedEscapeInBytes(in[n:])
Joe Tsai879b18d2018-08-03 17:22:24 -0700124 in, out = in[n+i:], append(out, in[:n+i]...)
125 }
126 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800127 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -0700128}
129
Herbie Ong1e096912019-04-16 00:14:03 -0700130// indexNeedEscapeInString returns the index of the character that needs
131// escaping. If no characters need escaping, this returns the input length.
132func indexNeedEscapeInString(s string) int {
Joe Tsai879b18d2018-08-03 17:22:24 -0700133 for i, r := range s {
134 if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
135 return i
136 }
137 }
138 return len(s)
139}
Joe Tsai36dc22d2019-07-09 23:20:27 -0700140func indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) }