Add --archive option to init to sync using git archive

This significantly reduces sync time and used brandwidth as only
a tar of each project's revision is checked out, but git is not
accessible from projects anymore.

This is relevant when git is not needed in projects but sync
speed/brandwidth may be important like on CI servers when building
several versions from scratch regularly for example.

Archive is not supported over http/https.

Change-Id: I48c3c7de2cd5a1faec33e295fcdafbc7807d0e4d
Signed-off-by: Julien Campergue <julien.campergue@parrot.com>
diff --git a/project.py b/project.py
index b01a52b..0d49111 100644
--- a/project.py
+++ b/project.py
@@ -23,6 +23,7 @@
 import stat
 import subprocess
 import sys
+import tarfile
 import tempfile
 import time
 
@@ -982,15 +983,62 @@
 
 ## Sync ##
 
+  def _ExtractArchive(self, tarpath, path=None):
+    """Extract the given tar on its current location
+
+    Args:
+        - tarpath: The path to the actual tar file
+
+    """
+    try:
+      with tarfile.open(tarpath, 'r') as tar:
+        tar.extractall(path=path)
+        return True
+    except (IOError, tarfile.TarError) as e:
+      print("error: Cannot extract archive %s: "
+            "%s" % (tarpath, str(e)), file=sys.stderr)
+    return False
+
   def Sync_NetworkHalf(self,
       quiet=False,
       is_new=None,
       current_branch_only=False,
       clone_bundle=True,
-      no_tags=False):
+      no_tags=False,
+      archive=False):
     """Perform only the network IO portion of the sync process.
        Local working directory/branch state is not affected.
     """
+    if archive and not isinstance(self, MetaProject):
+      if self.remote.url.startswith(('http://', 'https://')):
+        print("error: %s: Cannot fetch archives from http/https "
+              "remotes." % self.name, file=sys.stderr)
+        return False
+
+      name = self.relpath.replace('\\', '/')
+      name = name.replace('/', '_')
+      tarpath = '%s.tar' % name
+      topdir = self.manifest.topdir
+
+      try:
+        self._FetchArchive(tarpath, cwd=topdir)
+      except GitError as e:
+        print('error: %s' % str(e), file=sys.stderr)
+        return False
+
+      # From now on, we only need absolute tarpath
+      tarpath = os.path.join(topdir, tarpath)
+
+      if not self._ExtractArchive(tarpath, path=topdir):
+        return False
+      try:
+        os.remove(tarpath)
+      except OSError as e:
+        print("warn: Cannot remove archive %s: "
+              "%s" % (tarpath, str(e)), file=sys.stderr)
+      self._CopyFiles()
+      return True
+
     if is_new is None:
       is_new = not self.Exists
     if is_new:
@@ -1573,6 +1621,19 @@
 
 ## Direct Git Commands ##
 
+  def _FetchArchive(self, tarpath, cwd=None):
+    cmd = ['archive', '-v', '-o', tarpath]
+    cmd.append('--remote=%s' % self.remote.url)
+    cmd.append('--prefix=%s/' % self.relpath)
+    cmd.append(self.revisionExpr)
+
+    command = GitCommand(self, cmd, cwd=cwd,
+                         capture_stdout=True,
+                         capture_stderr=True)
+
+    if command.Wait() != 0:
+      raise GitError('git archive %s: %s' % (self.name, command.stderr))
+
   def _RemoteFetch(self, name=None,
                    current_branch_only=False,
                    initial=False,