blob: 1d89acaf12784322a2faba52ecb18730e4cc07a9 [file] [log] [blame]
Joe Tsai879b18d2018-08-03 17:22:24 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8 "io"
9 "math/bits"
10 "strconv"
11 "unicode"
12 "unicode/utf16"
13 "unicode/utf8"
14
Damien Neile89e6242019-05-13 23:55:40 -070015 "google.golang.org/protobuf/internal/errors"
Joe Tsai879b18d2018-08-03 17:22:24 -070016)
17
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080018func appendString(out []byte, in string) ([]byte, error) {
Joe Tsai879b18d2018-08-03 17:22:24 -070019 var nerr errors.NonFatal
20 out = append(out, '"')
Herbie Ong1e096912019-04-16 00:14:03 -070021 i := indexNeedEscapeInString(in)
Joe Tsai879b18d2018-08-03 17:22:24 -070022 in, out = in[i:], append(out, in[:i]...)
23 for len(in) > 0 {
24 switch r, n := utf8.DecodeRuneInString(in); {
25 case r == utf8.RuneError && n == 1:
26 nerr.AppendInvalidUTF8("")
27 in, out = in[1:], append(out, in[0]) // preserve invalid byte
28 case r < ' ' || r == '"' || r == '\\':
29 out = append(out, '\\')
30 switch r {
31 case '"', '\\':
32 out = append(out, byte(r))
33 case '\b':
34 out = append(out, 'b')
35 case '\f':
36 out = append(out, 'f')
37 case '\n':
38 out = append(out, 'n')
39 case '\r':
40 out = append(out, 'r')
41 case '\t':
42 out = append(out, 't')
43 default:
44 out = append(out, 'u')
45 out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
46 out = strconv.AppendUint(out, uint64(r), 16)
47 }
48 in = in[n:]
49 default:
Herbie Ong1e096912019-04-16 00:14:03 -070050 i := indexNeedEscapeInString(in[n:])
Joe Tsai879b18d2018-08-03 17:22:24 -070051 in, out = in[n+i:], append(out, in[:n+i]...)
52 }
53 }
54 out = append(out, '"')
55 return out, nerr.E
56}
57
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080058func (d *Decoder) parseString(in []byte) (string, int, error) {
Joe Tsai879b18d2018-08-03 17:22:24 -070059 var nerr errors.NonFatal
60 in0 := in
61 if len(in) == 0 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080062 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -070063 }
64 if in[0] != '"' {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080065 return "", 0, d.newSyntaxError("invalid character %q at start of string", in[0])
Joe Tsai879b18d2018-08-03 17:22:24 -070066 }
67 in = in[1:]
Herbie Ong1e096912019-04-16 00:14:03 -070068 i := indexNeedEscapeInBytes(in)
Joe Tsai879b18d2018-08-03 17:22:24 -070069 in, out := in[i:], in[:i:i] // set cap to prevent mutations
70 for len(in) > 0 {
71 switch r, n := utf8.DecodeRune(in); {
72 case r == utf8.RuneError && n == 1:
73 nerr.AppendInvalidUTF8("")
74 in, out = in[1:], append(out, in[0]) // preserve invalid byte
75 case r < ' ':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080076 return "", 0, d.newSyntaxError("invalid character %q in string", r)
Joe Tsai879b18d2018-08-03 17:22:24 -070077 case r == '"':
78 in = in[1:]
79 n := len(in0) - len(in)
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080080 return string(out), n, nerr.E
Joe Tsai879b18d2018-08-03 17:22:24 -070081 case r == '\\':
82 if len(in) < 2 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080083 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -070084 }
85 switch r := in[1]; r {
86 case '"', '\\', '/':
87 in, out = in[2:], append(out, r)
88 case 'b':
89 in, out = in[2:], append(out, '\b')
90 case 'f':
91 in, out = in[2:], append(out, '\f')
92 case 'n':
93 in, out = in[2:], append(out, '\n')
94 case 'r':
95 in, out = in[2:], append(out, '\r')
96 case 't':
97 in, out = in[2:], append(out, '\t')
98 case 'u':
99 if len(in) < 6 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800100 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -0700101 }
102 v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
103 if err != nil {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800104 return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:6])
Joe Tsai879b18d2018-08-03 17:22:24 -0700105 }
106 in = in[6:]
107
108 r := rune(v)
109 if utf16.IsSurrogate(r) {
110 if len(in) < 6 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800111 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -0700112 }
113 v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
114 r = utf16.DecodeRune(r, rune(v))
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800115 if in[0] != '\\' || in[1] != 'u' ||
116 r == unicode.ReplacementChar || err != nil {
117 return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:6])
Joe Tsai879b18d2018-08-03 17:22:24 -0700118 }
119 in = in[6:]
120 }
121 out = append(out, string(r)...)
122 default:
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800123 return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:2])
Joe Tsai879b18d2018-08-03 17:22:24 -0700124 }
125 default:
Herbie Ong1e096912019-04-16 00:14:03 -0700126 i := indexNeedEscapeInBytes(in[n:])
Joe Tsai879b18d2018-08-03 17:22:24 -0700127 in, out = in[n+i:], append(out, in[:n+i]...)
128 }
129 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800130 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -0700131}
132
Herbie Ong1e096912019-04-16 00:14:03 -0700133// indexNeedEscapeInString returns the index of the character that needs
134// escaping. If no characters need escaping, this returns the input length.
135func indexNeedEscapeInString(s string) int {
Joe Tsai879b18d2018-08-03 17:22:24 -0700136 for i, r := range s {
137 if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
138 return i
139 }
140 }
141 return len(s)
142}
Herbie Ong1e096912019-04-16 00:14:03 -0700143
144// indexNeedEscapeInBytes returns the index of the character that needs
145// escaping. If no characters need escaping, this returns the input length.
146// TODO: Remove this duplicate function when https://golang.org/issue/31506 gets
147// resolved.
148func indexNeedEscapeInBytes(b []byte) int {
149 for i := 0; i < len(b); {
150 r, n := utf8.DecodeRune(b[i:])
151 if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
152 return i
153 }
154 i += n
155 }
156 return len(b)
157}