Add a Python parser for GN types to gn_helpers.

Some scripts take serialized GN values including strings and lists. These are serialized and escaped according to GN rules. In particular, the string escaping is not possible to duplicate with Python's "ast" module. This simple parser will convert values properly.

Add unit tests for new code, as well as existing code in gn_helpers.

Fix omissions in existing ToGNString code.

BUG=573132,571022

Review URL: https://codereview.chromium.org/1553993002

Cr-Commit-Position: refs/heads/master@{#371000}


CrOS-Libchrome-Original-Commit: 9dffb54bc426d84e4a2a1a6fb2989bf53237f5a2
diff --git a/build/gn_helpers.py b/build/gn_helpers.py
index 3b0647d..07a7c3e 100644
--- a/build/gn_helpers.py
+++ b/build/gn_helpers.py
@@ -2,8 +2,10 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
-"""Helper functions useful when writing scripts that are run from GN's
-exec_script function."""
+"""Helper functions useful when writing scripts that integrate with GN.
+
+The main functions are ToGNString and FromGNString which convert between
+serialized GN veriables and Python variables."""
 
 class GNException(Exception):
   pass
@@ -18,7 +20,14 @@
   if isinstance(value, str):
     if value.find('\n') >= 0:
       raise GNException("Trying to print a string with a newline in it.")
-    return '"' + value.replace('"', '\\"') + '"'
+    return '"' + \
+        value.replace('\\', '\\\\').replace('"', '\\"').replace('$', '\\$') + \
+        '"'
+
+  if isinstance(value, bool):
+    if value:
+      return "true"
+    return "false"
 
   if isinstance(value, list):
     return '[ %s ]' % ', '.join(ToGNString(v) for v in value)
@@ -37,3 +46,231 @@
     return str(value)
 
   raise GNException("Unsupported type when printing to GN.")
+
+
+def FromGNString(input):
+  """Converts the input string from a GN serialized value to Python values.
+
+  For details on supported types see GNValueParser.Parse() below.
+
+  If your GN script did:
+    something = [ "file1", "file2" ]
+    args = [ "--values=$something" ]
+  The command line would look something like:
+    --values="[ \"file1\", \"file2\" ]"
+  Which when interpreted as a command line gives the value:
+    [ "file1", "file2" ]
+
+  You can parse this into a Python list using GN rules with:
+    input_values = FromGNValues(options.values)
+  Although the Python 'ast' module will parse many forms of such input, it
+  will not handle GN escaping properly, nor GN booleans. You should use this
+  function instead.
+
+
+  A NOTE ON STRING HANDLING:
+
+  If you just pass a string on the command line to your Python script, or use
+  string interpolation on a string variable, the strings will not be quoted:
+    str = "asdf"
+    args = [ str, "--value=$str" ]
+  Will yield the command line:
+    asdf --value=asdf
+  The unquoted asdf string will not be valid input to this function, which
+  accepts only quoted strings like GN scripts. In such cases, you can just use
+  the Python string literal directly.
+
+  The main use cases for this is for other types, in particular lists. When
+  using string interpolation on a list (as in the top example) the embedded
+  strings will be quoted and escaped according to GN rules so the list can be
+  re-parsed to get the same result."""
+  parser = GNValueParser(input)
+  return parser.Parse()
+
+
+def UnescapeGNString(value):
+  """Given a string with GN escaping, returns the unescaped string.
+
+  Be careful not to feed with input from a Python parsing function like
+  'ast' because it will do Python unescaping, which will be incorrect when
+  fed into the GN unescaper."""
+  result = ''
+  i = 0
+  while i < len(value):
+    if value[i] == '\\':
+      if i < len(value) - 1:
+        next_char = value[i + 1]
+        if next_char in ('$', '"', '\\'):
+          # These are the escaped characters GN supports.
+          result += next_char
+          i += 1
+        else:
+          # Any other backslash is a literal.
+          result += '\\'
+    else:
+      result += value[i]
+    i += 1
+  return result
+
+
+def _IsDigitOrMinus(char):
+  return char in "-0123456789"
+
+
+class GNValueParser(object):
+  """Duplicates GN parsing of values and converts to Python types.
+
+  Normally you would use the wrapper function FromGNValue() below.
+
+  If you expect input as a specific type, you can also call one of the Parse*
+  functions directly. All functions throw GNException on invalid input. """
+  def __init__(self, string):
+    self.input = string
+    self.cur = 0
+
+  def IsDone(self):
+    return self.cur == len(self.input)
+
+  def ConsumeWhitespace(self):
+    while not self.IsDone() and self.input[self.cur] in ' \t\n':
+      self.cur += 1
+
+  def Parse(self):
+    """Converts a string representing a printed GN value to the Python type.
+
+    See additional usage notes on FromGNString above.
+
+    - GN booleans ('true', 'false') will be converted to Python booleans.
+
+    - GN numbers ('123') will be converted to Python numbers.
+
+    - GN strings (double-quoted as in '"asdf"') will be converted to Python
+      strings with GN escaping rules. GN string interpolation (embedded
+      variables preceeded by $) are not supported and will be returned as
+      literals.
+
+    - GN lists ('[1, "asdf", 3]') will be converted to Python lists.
+
+    - GN scopes ('{ ... }') are not supported."""
+    result = self._ParseAllowTrailing()
+    self.ConsumeWhitespace()
+    if not self.IsDone():
+      raise GNException("Trailing input after parsing:\n  " +
+                        self.input[self.cur:])
+    return result
+
+  def _ParseAllowTrailing(self):
+    """Internal version of Parse that doesn't check for trailing stuff."""
+    self.ConsumeWhitespace()
+    if self.IsDone():
+      raise GNException("Expected input to parse.")
+
+    next_char = self.input[self.cur]
+    if next_char == '[':
+      return self.ParseList()
+    elif _IsDigitOrMinus(next_char):
+      return self.ParseNumber()
+    elif next_char == '"':
+      return self.ParseString()
+    elif self._ConstantFollows('true'):
+      return True
+    elif self._ConstantFollows('false'):
+      return False
+    else:
+      raise GNException("Unexpected token: " + self.input[self.cur:])
+
+  def ParseNumber(self):
+    self.ConsumeWhitespace()
+    if self.IsDone():
+      raise GNException('Expected number but got nothing.')
+
+    begin = self.cur
+
+    # The first character can include a negative sign.
+    if not self.IsDone() and _IsDigitOrMinus(self.input[self.cur]):
+      self.cur += 1
+    while not self.IsDone() and self.input[self.cur].isdigit():
+      self.cur += 1
+
+    number_string = self.input[begin:self.cur]
+    if not len(number_string) or number_string == '-':
+      raise GNException("Not a valid number.")
+    return int(number_string)
+
+  def ParseString(self):
+    self.ConsumeWhitespace()
+    if self.IsDone():
+      raise GNException('Expected string but got nothing.')
+
+    if self.input[self.cur] != '"':
+      raise GNException('Expected string beginning in a " but got:\n  ' +
+                        self.input[self.cur:])
+    self.cur += 1  # Skip over quote.
+
+    begin = self.cur
+    while not self.IsDone() and self.input[self.cur] != '"':
+      if self.input[self.cur] == '\\':
+        self.cur += 1  # Skip over the backslash.
+        if self.IsDone():
+          raise GNException("String ends in a backslash in:\n  " +
+                            self.input)
+      self.cur += 1
+
+    if self.IsDone():
+      raise GNException('Unterminated string:\n  ' + self.input[begin:])
+
+    end = self.cur
+    self.cur += 1  # Consume trailing ".
+
+    return UnescapeGNString(self.input[begin:end])
+
+  def ParseList(self):
+    self.ConsumeWhitespace()
+    if self.IsDone():
+      raise GNException('Expected list but got nothing.')
+
+    # Skip over opening '['.
+    if self.input[self.cur] != '[':
+      raise GNException("Expected [ for list but got:\n  " +
+                        self.input[self.cur:])
+    self.cur += 1
+    self.ConsumeWhitespace()
+    if self.IsDone():
+      raise GNException("Unterminated list:\n  " + self.input)
+
+    list_result = []
+    previous_had_trailing_comma = True
+    while not self.IsDone():
+      if self.input[self.cur] == ']':
+        self.cur += 1  # Skip over ']'.
+        return list_result
+
+      if not previous_had_trailing_comma:
+        raise GNException("List items not separated by comma.")
+
+      list_result += [ self._ParseAllowTrailing() ]
+      self.ConsumeWhitespace()
+      if self.IsDone():
+        break
+
+      # Consume comma if there is one.
+      previous_had_trailing_comma = self.input[self.cur] == ','
+      if previous_had_trailing_comma:
+        # Consume comma.
+        self.cur += 1
+        self.ConsumeWhitespace()
+
+    raise GNException("Unterminated list:\n  " + self.input)
+
+  def _ConstantFollows(self, constant):
+    """Returns true if the given constant follows immediately at the current
+    location in the input. If it does, the text is consumed and the function
+    returns true. Otherwise, returns false and the current position is
+    unchanged."""
+    end = self.cur + len(constant)
+    if end >= len(self.input):
+      return False  # Not enough room.
+    if self.input[self.cur:end] == constant:
+      self.cur = end
+      return True
+    return False