blob: fc5d53ff0fb01e54e8d03bd38012269dc26dcbdf [file] [log] [blame]
alandonovanac23acb2020-06-11 17:56:15 -04001// Copyright 2020 The Bazel Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package starlarkjson defines utilities for converting Starlark values
6// to/from JSON strings. The most recent IETF standard for JSON is
7// https://www.ietf.org/rfc/rfc7159.txt.
8package starlarkjson // import "go.starlark.net/starlarkjson"
9
10import (
11 "bytes"
12 "encoding/json"
13 "fmt"
14 "log"
15 "math"
16 "math/big"
17 "sort"
18 "strconv"
19 "strings"
20 "unicode/utf8"
21
22 "go.starlark.net/starlark"
23 "go.starlark.net/starlarkstruct"
24)
25
26// Module json is a Starlark module of JSON-related functions.
27//
28// json = module(
29// encode,
30// decode,
31// indent,
32// )
33//
34// def encode(x):
35//
36// The encode function accepts one required positional argument,
37// which it converts to JSON by cases:
38// - A Starlark value that implements Go's standard json.Marshal
39// interface defines its own JSON encoding.
40// - None, True, and False are converted to null, true, and false, respectively.
41// - Starlark int values, no matter how large, are encoded as decimal integers.
42// Some decoders may not be able to decode very large integers.
43// - Starlark float values are encoded using decimal point notation,
44// even if the value is an integer.
45// It is an error to encode a non-finite floating-point value.
46// - Starlark strings are encoded as JSON strings, using UTF-16 escapes.
47// - a Starlark IterableMapping (e.g. dict) is encoded as a JSON object.
48// It is an error if any key is not a string.
49// - any other Starlark Iterable (e.g. list, tuple) is encoded as a JSON array.
50// - a Starlark HasAttrs (e.g. struct) is encoded as a JSON object.
51// It an application-defined type matches more than one the cases describe above,
52// (e.g. it implements both Iterable and HasFields), the first case takes precedence.
53// Encoding any other value yields an error.
54//
55// def decode(x):
56//
57// The decode function accepts one positional parameter, a JSON string.
58// It returns the Starlark value that the string denotes.
59// - Numbers are parsed as int or float, depending on whether they
60// contain a decimal point.
61// - JSON objects are parsed as new unfrozen Starlark dicts.
62// - JSON arrays are parsed as new unfrozen Starlark lists.
63// Decoding fails if x is not a valid JSON string.
64//
65// def indent(str, *, prefix="", indent="\t"):
66//
67// The indent function pretty-prints a valid JSON encoding,
68// and returns a string containing the indented form.
69// It accepts one required positional parameter, the JSON string,
70// and two optional keyword-only string parameters, prefix and indent,
71// that specify a prefix of each new line, and the unit of indentation.
72//
73var Module = &starlarkstruct.Module{
74 Name: "json",
75 Members: starlark.StringDict{
76 "encode": starlark.NewBuiltin("json.encode", encode),
77 "decode": starlark.NewBuiltin("json.decode", decode),
78 "indent": starlark.NewBuiltin("json.indent", indent),
79 },
80}
81
82func encode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
83 var x starlark.Value
84 if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &x); err != nil {
85 return nil, err
86 }
87
88 buf := new(bytes.Buffer)
89
90 var quoteSpace [128]byte
91 quote := func(s string) {
92 // Non-trivial escaping is handled by Go's encoding/json.
93 if isPrintableASCII(s) {
94 buf.Write(strconv.AppendQuote(quoteSpace[:0], s))
95 } else {
96 // TODO(adonovan): opt: RFC 8259 mandates UTF-8 for JSON.
97 // Can we avoid this call?
98 data, _ := json.Marshal(s)
99 buf.Write(data)
100 }
101 }
102
103 var emit func(x starlark.Value) error
104 emit = func(x starlark.Value) error {
105 switch x := x.(type) {
106 case json.Marshaler:
107 // Application-defined starlark.Value types
108 // may define their own JSON encoding.
109 data, err := x.MarshalJSON()
110 if err != nil {
111 return err
112 }
113 buf.Write(data)
114
115 case starlark.NoneType:
116 buf.WriteString("null")
117
118 case starlark.Bool:
119 if x {
120 buf.WriteString("true")
121 } else {
122 buf.WriteString("false")
123 }
124
125 case starlark.Int:
126 fmt.Fprint(buf, x)
127
128 case starlark.Float:
129 if !isFinite(float64(x)) {
130 return fmt.Errorf("cannot encode non-finite float %v", x)
131 }
132 fmt.Fprintf(buf, "%g", x) // always contains a decimal point
133
134 case starlark.String:
135 quote(string(x))
136
137 case starlark.IterableMapping:
138 // e.g. dict (must have string keys)
139 buf.WriteByte('{')
alandonovane292e662020-11-13 16:44:10 -0500140 items := x.Items()
141 for _, item := range items {
142 if _, ok := item[0].(starlark.String); !ok {
143 return fmt.Errorf("%s has %s key, want string", x.Type(), item[0].Type())
144 }
145 }
146 sort.Slice(items, func(i, j int) bool {
147 return items[i][0].(starlark.String) < items[j][0].(starlark.String)
148 })
149 for i, item := range items {
alandonovanac23acb2020-06-11 17:56:15 -0400150 if i > 0 {
151 buf.WriteByte(',')
152 }
alandonovane292e662020-11-13 16:44:10 -0500153 k, _ := starlark.AsString(item[0])
154 quote(k)
alandonovanac23acb2020-06-11 17:56:15 -0400155 buf.WriteByte(':')
alandonovane292e662020-11-13 16:44:10 -0500156 if err := emit(item[1]); err != nil {
157 return fmt.Errorf("in %s key %s: %v", x.Type(), item[0], err)
alandonovanac23acb2020-06-11 17:56:15 -0400158 }
159 }
160 buf.WriteByte('}')
161
162 case starlark.Iterable:
163 // e.g. tuple, list
164 buf.WriteByte('[')
165 iter := x.Iterate()
166 defer iter.Done()
167 var elem starlark.Value
168 for i := 0; iter.Next(&elem); i++ {
169 if i > 0 {
170 buf.WriteByte(',')
171 }
172 if err := emit(elem); err != nil {
173 return fmt.Errorf("at %s index %d: %v", x.Type(), i, err)
174 }
175 }
176 buf.WriteByte(']')
177
178 case starlark.HasAttrs:
179 // e.g. struct
180 buf.WriteByte('{')
181 var names []string
182 names = append(names, x.AttrNames()...)
183 sort.Strings(names)
184 for i, name := range names {
185 v, err := x.Attr(name)
186 if err != nil || v == nil {
187 log.Fatalf("internal error: dir(%s) includes %q but value has no .%s field", x.Type(), name, name)
188 }
189 if i > 0 {
190 buf.WriteByte(',')
191 }
192 quote(name)
193 buf.WriteByte(':')
194 if err := emit(v); err != nil {
195 return fmt.Errorf("in field .%s: %v", name, err)
196 }
197 }
198 buf.WriteByte('}')
199
200 default:
201 return fmt.Errorf("cannot encode %s as JSON", x.Type())
202 }
203 return nil
204 }
205
206 if err := emit(x); err != nil {
207 return nil, fmt.Errorf("%s: %v", b.Name(), err)
208 }
209 return starlark.String(buf.String()), nil
210}
211
212// isPrintableASCII reports whether s contains only printable ASCII.
213func isPrintableASCII(s string) bool {
214 for i := 0; i < len(s); i++ {
215 b := s[i]
216 if b < 0x20 || b >= 0x80 {
217 return false
218 }
219 }
220 return true
221}
222
223// isFinite reports whether f represents a finite rational value.
224// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0).
225func isFinite(f float64) bool {
226 return math.Abs(f) <= math.MaxFloat64
227}
228
229func indent(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
230 prefix, indent := "", "\t" // keyword-only
231 if err := starlark.UnpackArgs(b.Name(), nil, kwargs,
232 "prefix?", &prefix,
233 "indent?", &indent,
234 ); err != nil {
235 return nil, err
236 }
237 var str string // positional-only
238 if err := starlark.UnpackPositionalArgs(b.Name(), args, nil, 1, &str); err != nil {
239 return nil, err
240 }
241
242 buf := new(bytes.Buffer)
243 if err := json.Indent(buf, []byte(str), prefix, indent); err != nil {
244 return nil, fmt.Errorf("%s: %v", b.Name(), err)
245 }
246 return starlark.String(buf.String()), nil
247}
248
249func decode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (_ starlark.Value, err error) {
250 var s string
251 if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &s); err != nil {
252 return nil, err
253 }
254
255 // The decoder necessarily makes certain representation choices
256 // such as list vs tuple, struct vs dict, int vs float.
257 // In principle, we could parameterize it to allow the caller to
258 // control the returned types, but there's no compelling need yet.
259
260 // Use panic/recover with a distinguished type (failure) for error handling.
261 type failure string
262 fail := func(format string, args ...interface{}) {
263 panic(failure(fmt.Sprintf(format, args...)))
264 }
265
266 i := 0
267
268 // skipSpace consumes leading spaces, and reports whether there is more input.
269 skipSpace := func() bool {
270 for ; i < len(s); i++ {
271 b := s[i]
272 if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
273 return true
274 }
275 }
276 return false
277 }
278
279 // next consumes leading spaces and returns the first non-space.
280 // It panics if at EOF.
281 next := func() byte {
282 if skipSpace() {
283 return s[i]
284 }
285 fail("unexpected end of file")
286 panic("unreachable")
287 }
288
289 // parse returns the next JSON value from the input.
290 // It consumes leading but not trailing whitespace.
291 // It panics on error.
292 var parse func() starlark.Value
293 parse = func() starlark.Value {
294 b := next()
295 switch b {
296 case '"':
297 // string
298
299 // Find end of quotation.
300 // Also, record whether trivial unquoting is safe.
301 // Non-trivial unquoting is handled by Go's encoding/json.
302 safe := true
303 closed := false
304 j := i + 1
305 for ; j < len(s); j++ {
306 b := s[j]
307 if b == '\\' {
308 safe = false
309 j++ // skip x in \x
310 } else if b == '"' {
311 closed = true
312 j++ // skip '"'
313 break
314 } else if b >= utf8.RuneSelf {
315 safe = false
316 }
317 }
318 if !closed {
319 fail("unclosed string literal")
320 }
321
322 r := s[i:j]
323 i = j
324
325 // unquote
326 if safe {
327 r = r[1 : len(r)-1]
328 } else if err := json.Unmarshal([]byte(r), &r); err != nil {
329 fail("%s", err)
330 }
331 return starlark.String(r)
332
333 case 'n':
334 if strings.HasPrefix(s[i:], "null") {
335 i += len("null")
336 return starlark.None
337 }
338
339 case 't':
340 if strings.HasPrefix(s[i:], "true") {
341 i += len("true")
342 return starlark.True
343 }
344
345 case 'f':
346 if strings.HasPrefix(s[i:], "false") {
347 i += len("false")
348 return starlark.False
349 }
350
351 case '[':
352 // array
353 var elems []starlark.Value
354
355 i++ // '['
356 b = next()
357 if b != ']' {
358 for {
359 elem := parse()
360 elems = append(elems, elem)
361 b = next()
362 if b != ',' {
363 if b != ']' {
364 fail("got %q, want ',' or ']'", b)
365 }
366 break
367 }
368 i++ // ','
369 }
370 }
371 i++ // ']'
372 return starlark.NewList(elems)
373
374 case '{':
375 // object
376 dict := new(starlark.Dict)
377
378 i++ // '{'
379 b = next()
380 if b != '}' {
381 for {
382 key := parse()
383 if _, ok := key.(starlark.String); !ok {
384 fail("got %s for object key, want string", key.Type())
385 }
386 b = next()
387 if b != ':' {
388 fail("after object key, got %q, want ':' ", b)
389 }
390 i++ // ':'
391 value := parse()
392 dict.SetKey(key, value) // can't fail
393 b = next()
394 if b != ',' {
395 if b != '}' {
396 fail("in object, got %q, want ',' or '}'", b)
397 }
398 break
399 }
400 i++ // ','
401 }
402 }
403 i++ // '}'
404 return dict
405
406 default:
407 // number?
408 if isdigit(b) || b == '-' {
409 // scan literal. Allow [0-9+-eE.] for now.
410 float := false
411 var j int
412 for j = i + 1; j < len(s); j++ {
413 b = s[j]
414 if isdigit(b) {
415 // ok
416 } else if b == '.' ||
417 b == 'e' ||
418 b == 'E' ||
419 b == '+' ||
420 b == '-' {
421 float = true
422 } else {
423 break
424 }
425 }
426 num := s[i:j]
427 i = j
428
429 // Unlike most C-like languages,
430 // JSON disallows a leading zero before a digit.
431 digits := num
432 if num[0] == '-' {
433 digits = num[1:]
434 }
435 if digits == "" || digits[0] == '0' && len(digits) > 1 && isdigit(digits[1]) {
436 fail("invalid number: %s", num)
437 }
438
439 // parse literal
440 if float {
441 x, err := strconv.ParseFloat(num, 64)
442 if err != nil {
443 fail("invalid number: %s", num)
444 }
445 return starlark.Float(x)
446 } else {
447 x, ok := new(big.Int).SetString(num, 10)
448 if !ok {
449 fail("invalid number: %s", num)
450 }
451 return starlark.MakeBigInt(x)
452 }
453 }
454 }
455 fail("unexpected character %q", b)
456 panic("unreachable")
457 }
458 defer func() {
459 x := recover()
460 switch x := x.(type) {
461 case failure:
462 err = fmt.Errorf("json.decode: at offset %d, %s", i, x)
463 case nil:
464 // nop
465 default:
466 panic(x) // unexpected panic
467 }
468 }()
469 x := parse()
470 if skipSpace() {
471 fail("unexpected character %q after value", s[i])
472 }
473 return x, nil
474}
475
476func isdigit(b byte) bool {
477 return b >= '0' && b <= '9'
478}