blob: b63dd5c8d3580a17b0bdf728b8e33197c44ba280 [file] [log] [blame]
Joe Tsai879b18d2018-08-03 17:22:24 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8 "io"
9 "math/bits"
10 "strconv"
11 "unicode"
12 "unicode/utf16"
13 "unicode/utf8"
14
Damien Neile89e6242019-05-13 23:55:40 -070015 "google.golang.org/protobuf/internal/errors"
Joe Tsai879b18d2018-08-03 17:22:24 -070016)
17
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080018func appendString(out []byte, in string) ([]byte, error) {
Joe Tsai879b18d2018-08-03 17:22:24 -070019 out = append(out, '"')
Herbie Ong1e096912019-04-16 00:14:03 -070020 i := indexNeedEscapeInString(in)
Joe Tsai879b18d2018-08-03 17:22:24 -070021 in, out = in[i:], append(out, in[:i]...)
22 for len(in) > 0 {
23 switch r, n := utf8.DecodeRuneInString(in); {
24 case r == utf8.RuneError && n == 1:
Damien Neil8c86fc52019-06-19 09:28:29 -070025 return out, errors.InvalidUTF8("")
Joe Tsai879b18d2018-08-03 17:22:24 -070026 case r < ' ' || r == '"' || r == '\\':
27 out = append(out, '\\')
28 switch r {
29 case '"', '\\':
30 out = append(out, byte(r))
31 case '\b':
32 out = append(out, 'b')
33 case '\f':
34 out = append(out, 'f')
35 case '\n':
36 out = append(out, 'n')
37 case '\r':
38 out = append(out, 'r')
39 case '\t':
40 out = append(out, 't')
41 default:
42 out = append(out, 'u')
43 out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
44 out = strconv.AppendUint(out, uint64(r), 16)
45 }
46 in = in[n:]
47 default:
Herbie Ong1e096912019-04-16 00:14:03 -070048 i := indexNeedEscapeInString(in[n:])
Joe Tsai879b18d2018-08-03 17:22:24 -070049 in, out = in[n+i:], append(out, in[:n+i]...)
50 }
51 }
52 out = append(out, '"')
Damien Neil8c86fc52019-06-19 09:28:29 -070053 return out, nil
Joe Tsai879b18d2018-08-03 17:22:24 -070054}
55
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080056func (d *Decoder) parseString(in []byte) (string, int, error) {
Joe Tsai879b18d2018-08-03 17:22:24 -070057 in0 := in
58 if len(in) == 0 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080059 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -070060 }
61 if in[0] != '"' {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080062 return "", 0, d.newSyntaxError("invalid character %q at start of string", in[0])
Joe Tsai879b18d2018-08-03 17:22:24 -070063 }
64 in = in[1:]
Herbie Ong1e096912019-04-16 00:14:03 -070065 i := indexNeedEscapeInBytes(in)
Joe Tsai879b18d2018-08-03 17:22:24 -070066 in, out := in[i:], in[:i:i] // set cap to prevent mutations
67 for len(in) > 0 {
68 switch r, n := utf8.DecodeRune(in); {
69 case r == utf8.RuneError && n == 1:
Damien Neil8c86fc52019-06-19 09:28:29 -070070 return "", 0, d.newSyntaxError("invalid UTF-8 in string")
Joe Tsai879b18d2018-08-03 17:22:24 -070071 case r < ' ':
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080072 return "", 0, d.newSyntaxError("invalid character %q in string", r)
Joe Tsai879b18d2018-08-03 17:22:24 -070073 case r == '"':
74 in = in[1:]
75 n := len(in0) - len(in)
Damien Neil8c86fc52019-06-19 09:28:29 -070076 return string(out), n, nil
Joe Tsai879b18d2018-08-03 17:22:24 -070077 case r == '\\':
78 if len(in) < 2 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080079 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -070080 }
81 switch r := in[1]; r {
82 case '"', '\\', '/':
83 in, out = in[2:], append(out, r)
84 case 'b':
85 in, out = in[2:], append(out, '\b')
86 case 'f':
87 in, out = in[2:], append(out, '\f')
88 case 'n':
89 in, out = in[2:], append(out, '\n')
90 case 'r':
91 in, out = in[2:], append(out, '\r')
92 case 't':
93 in, out = in[2:], append(out, '\t')
94 case 'u':
95 if len(in) < 6 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -080096 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -070097 }
98 v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
99 if err != nil {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800100 return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:6])
Joe Tsai879b18d2018-08-03 17:22:24 -0700101 }
102 in = in[6:]
103
104 r := rune(v)
105 if utf16.IsSurrogate(r) {
106 if len(in) < 6 {
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800107 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -0700108 }
109 v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
110 r = utf16.DecodeRune(r, rune(v))
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800111 if in[0] != '\\' || in[1] != 'u' ||
112 r == unicode.ReplacementChar || err != nil {
113 return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:6])
Joe Tsai879b18d2018-08-03 17:22:24 -0700114 }
115 in = in[6:]
116 }
117 out = append(out, string(r)...)
118 default:
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800119 return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:2])
Joe Tsai879b18d2018-08-03 17:22:24 -0700120 }
121 default:
Herbie Ong1e096912019-04-16 00:14:03 -0700122 i := indexNeedEscapeInBytes(in[n:])
Joe Tsai879b18d2018-08-03 17:22:24 -0700123 in, out = in[n+i:], append(out, in[:n+i]...)
124 }
125 }
Herbie Ongd3f8f2d2019-03-06 00:28:23 -0800126 return "", 0, io.ErrUnexpectedEOF
Joe Tsai879b18d2018-08-03 17:22:24 -0700127}
128
Herbie Ong1e096912019-04-16 00:14:03 -0700129// indexNeedEscapeInString returns the index of the character that needs
130// escaping. If no characters need escaping, this returns the input length.
131func indexNeedEscapeInString(s string) int {
Joe Tsai879b18d2018-08-03 17:22:24 -0700132 for i, r := range s {
133 if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
134 return i
135 }
136 }
137 return len(s)
138}
Herbie Ong1e096912019-04-16 00:14:03 -0700139
140// indexNeedEscapeInBytes returns the index of the character that needs
141// escaping. If no characters need escaping, this returns the input length.
142// TODO: Remove this duplicate function when https://golang.org/issue/31506 gets
143// resolved.
144func indexNeedEscapeInBytes(b []byte) int {
145 for i := 0; i < len(b); {
146 r, n := utf8.DecodeRune(b[i:])
147 if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
148 return i
149 }
150 i += n
151 }
152 return len(b)
153}