blob: 9e8be1daf843146c1195ddd2302fe9d846f87d92 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Guido van Rossume830e551995-06-20 19:31:37 +00002
Guido van Rossumfdc5adc1998-01-04 02:03:12 +00003"""Mirror a remote ftp subtree into a local directory tree.
4
5usage: ftpmirror [-v] [-q] [-i] [-m] [-n] [-r] [-s pat]
6 [-l username [-p passwd [-a account]]]
Skip Montanaro7dfb02d2002-12-05 02:37:23 +00007 hostname[:port] [remotedir [localdir]]
Guido van Rossumfdc5adc1998-01-04 02:03:12 +00008-v: verbose
9-q: quiet
10-i: interactive mode
11-m: macintosh server (NCSA telnet 2.4) (implies -n -s '*.o')
12-n: don't log in
13-r: remove local files/directories no longer pertinent
Martin v. Löwisa4debff2001-06-07 17:17:00 +000014-l username [-p passwd [-a account]]: login info (default .netrc or anonymous)
Guido van Rossumfdc5adc1998-01-04 02:03:12 +000015-s pat: skip files matching pattern
Skip Montanaro7dfb02d2002-12-05 02:37:23 +000016hostname: remote host w/ optional port separated by ':'
Guido van Rossumfdc5adc1998-01-04 02:03:12 +000017remotedir: remote directory (default initial)
18localdir: local directory (default current)
19"""
20
Guido van Rossume830e551995-06-20 19:31:37 +000021import os
22import sys
23import time
24import getopt
Guido van Rossume830e551995-06-20 19:31:37 +000025import ftplib
Martin v. Löwisa4debff2001-06-07 17:17:00 +000026import netrc
Guido van Rossume830e551995-06-20 19:31:37 +000027from fnmatch import fnmatch
28
Guido van Rossumfdc5adc1998-01-04 02:03:12 +000029# Print usage message and exit
Guido van Rossume830e551995-06-20 19:31:37 +000030def usage(*args):
Tim Peters70c43782001-01-17 08:48:39 +000031 sys.stdout = sys.stderr
Collin Winter6afaeb72007-08-03 17:06:41 +000032 for msg in args: print(msg)
33 print(__doc__)
Tim Peters70c43782001-01-17 08:48:39 +000034 sys.exit(2)
Guido van Rossume830e551995-06-20 19:31:37 +000035
36verbose = 1 # 0 for -q, 2 for -v
37interactive = 0
38mac = 0
39rmok = 0
40nologin = 0
Guido van Rossumd2966cb1996-02-05 18:49:00 +000041skippats = ['.', '..', '.mirrorinfo']
Guido van Rossume830e551995-06-20 19:31:37 +000042
Guido van Rossumfdc5adc1998-01-04 02:03:12 +000043# Main program: parse command line and start processing
Guido van Rossume830e551995-06-20 19:31:37 +000044def main():
Tim Peters70c43782001-01-17 08:48:39 +000045 global verbose, interactive, mac, rmok, nologin
46 try:
47 opts, args = getopt.getopt(sys.argv[1:], 'a:bil:mnp:qrs:v')
Guido van Rossumb940e112007-01-10 16:19:56 +000048 except getopt.error as msg:
Tim Peters70c43782001-01-17 08:48:39 +000049 usage(msg)
50 login = ''
51 passwd = ''
52 account = ''
Martin v. Löwisa4debff2001-06-07 17:17:00 +000053 if not args: usage('hostname missing')
54 host = args[0]
Skip Montanaro7dfb02d2002-12-05 02:37:23 +000055 port = 0
56 if ':' in host:
57 host, port = host.split(':', 1)
Skip Montanaroa5f8c422002-12-05 02:43:14 +000058 port = int(port)
Martin v. Löwisa4debff2001-06-07 17:17:00 +000059 try:
60 auth = netrc.netrc().authenticators(host)
61 if auth is not None:
62 login, account, passwd = auth
63 except (netrc.NetrcParseError, IOError):
64 pass
Tim Peters70c43782001-01-17 08:48:39 +000065 for o, a in opts:
66 if o == '-l': login = a
67 if o == '-p': passwd = a
68 if o == '-a': account = a
69 if o == '-v': verbose = verbose + 1
70 if o == '-q': verbose = 0
71 if o == '-i': interactive = 1
72 if o == '-m': mac = 1; nologin = 1; skippats.append('*.o')
73 if o == '-n': nologin = 1
74 if o == '-r': rmok = 1
75 if o == '-s': skippats.append(a)
Tim Peters70c43782001-01-17 08:48:39 +000076 remotedir = ''
77 localdir = ''
78 if args[1:]:
79 remotedir = args[1]
80 if args[2:]:
81 localdir = args[2]
82 if args[3:]: usage('too many arguments')
83 #
84 f = ftplib.FTP()
Collin Winter6afaeb72007-08-03 17:06:41 +000085 if verbose: print("Connecting to '%s%s'..." % (host,
86 (port and ":%d"%port or "")))
Skip Montanaro7dfb02d2002-12-05 02:37:23 +000087 f.connect(host,port)
Tim Peters70c43782001-01-17 08:48:39 +000088 if not nologin:
89 if verbose:
Collin Winter6afaeb72007-08-03 17:06:41 +000090 print('Logging in as %r...' % (login or 'anonymous'))
Tim Peters70c43782001-01-17 08:48:39 +000091 f.login(login, passwd, account)
Collin Winter6afaeb72007-08-03 17:06:41 +000092 if verbose: print('OK.')
Tim Peters70c43782001-01-17 08:48:39 +000093 pwd = f.pwd()
Collin Winter6afaeb72007-08-03 17:06:41 +000094 if verbose > 1: print('PWD =', repr(pwd))
Tim Peters70c43782001-01-17 08:48:39 +000095 if remotedir:
Collin Winter6afaeb72007-08-03 17:06:41 +000096 if verbose > 1: print('cwd(%s)' % repr(remotedir))
Tim Peters70c43782001-01-17 08:48:39 +000097 f.cwd(remotedir)
Collin Winter6afaeb72007-08-03 17:06:41 +000098 if verbose > 1: print('OK.')
Tim Peters70c43782001-01-17 08:48:39 +000099 pwd = f.pwd()
Collin Winter6afaeb72007-08-03 17:06:41 +0000100 if verbose > 1: print('PWD =', repr(pwd))
Tim Peters70c43782001-01-17 08:48:39 +0000101 #
102 mirrorsubdir(f, localdir)
Guido van Rossume830e551995-06-20 19:31:37 +0000103
Guido van Rossumfdc5adc1998-01-04 02:03:12 +0000104# Core logic: mirror one subdirectory (recursively)
Guido van Rossume830e551995-06-20 19:31:37 +0000105def mirrorsubdir(f, localdir):
Tim Peters70c43782001-01-17 08:48:39 +0000106 pwd = f.pwd()
107 if localdir and not os.path.isdir(localdir):
Collin Winter6afaeb72007-08-03 17:06:41 +0000108 if verbose: print('Creating local directory', repr(localdir))
Tim Peters70c43782001-01-17 08:48:39 +0000109 try:
110 makedir(localdir)
Guido van Rossumb940e112007-01-10 16:19:56 +0000111 except os.error as msg:
Collin Winter6afaeb72007-08-03 17:06:41 +0000112 print("Failed to establish local directory", repr(localdir))
Tim Peters70c43782001-01-17 08:48:39 +0000113 return
114 infofilename = os.path.join(localdir, '.mirrorinfo')
115 try:
116 text = open(infofilename, 'r').read()
Guido van Rossumb940e112007-01-10 16:19:56 +0000117 except IOError as msg:
Tim Peters70c43782001-01-17 08:48:39 +0000118 text = '{}'
119 try:
120 info = eval(text)
121 except (SyntaxError, NameError):
Collin Winter6afaeb72007-08-03 17:06:41 +0000122 print('Bad mirror info in', repr(infofilename))
Tim Peters70c43782001-01-17 08:48:39 +0000123 info = {}
124 subdirs = []
125 listing = []
Collin Winter6afaeb72007-08-03 17:06:41 +0000126 if verbose: print('Listing remote directory %r...' % (pwd,))
Tim Peters70c43782001-01-17 08:48:39 +0000127 f.retrlines('LIST', listing.append)
128 filesfound = []
129 for line in listing:
Collin Winter6afaeb72007-08-03 17:06:41 +0000130 if verbose > 1: print('-->', repr(line))
Tim Peters70c43782001-01-17 08:48:39 +0000131 if mac:
132 # Mac listing has just filenames;
133 # trailing / means subdirectory
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000134 filename = line.strip()
Tim Peters70c43782001-01-17 08:48:39 +0000135 mode = '-'
136 if filename[-1:] == '/':
137 filename = filename[:-1]
138 mode = 'd'
139 infostuff = ''
140 else:
141 # Parse, assuming a UNIX listing
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000142 words = line.split(None, 8)
Tim Peters70c43782001-01-17 08:48:39 +0000143 if len(words) < 6:
Collin Winter6afaeb72007-08-03 17:06:41 +0000144 if verbose > 1: print('Skipping short line')
Tim Peters70c43782001-01-17 08:48:39 +0000145 continue
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000146 filename = words[-1].lstrip()
147 i = filename.find(" -> ")
Tim Peters70c43782001-01-17 08:48:39 +0000148 if i >= 0:
149 # words[0] had better start with 'l'...
150 if verbose > 1:
Collin Winter6afaeb72007-08-03 17:06:41 +0000151 print('Found symbolic link %r' % (filename,))
Tim Peters70c43782001-01-17 08:48:39 +0000152 linkto = filename[i+4:]
153 filename = filename[:i]
154 infostuff = words[-5:-1]
155 mode = words[0]
156 skip = 0
157 for pat in skippats:
158 if fnmatch(filename, pat):
159 if verbose > 1:
Collin Winter6afaeb72007-08-03 17:06:41 +0000160 print('Skip pattern', repr(pat), end=' ')
161 print('matches', repr(filename))
Tim Peters70c43782001-01-17 08:48:39 +0000162 skip = 1
163 break
164 if skip:
165 continue
166 if mode[0] == 'd':
167 if verbose > 1:
Collin Winter6afaeb72007-08-03 17:06:41 +0000168 print('Remembering subdirectory', repr(filename))
Tim Peters70c43782001-01-17 08:48:39 +0000169 subdirs.append(filename)
170 continue
171 filesfound.append(filename)
Georg Brandl8efadf52008-05-16 15:23:30 +0000172 if filename in info and info[filename] == infostuff:
Tim Peters70c43782001-01-17 08:48:39 +0000173 if verbose > 1:
Collin Winter6afaeb72007-08-03 17:06:41 +0000174 print('Already have this version of',repr(filename))
Tim Peters70c43782001-01-17 08:48:39 +0000175 continue
176 fullname = os.path.join(localdir, filename)
177 tempname = os.path.join(localdir, '@'+filename)
178 if interactive:
179 doit = askabout('file', filename, pwd)
180 if not doit:
Georg Brandl8efadf52008-05-16 15:23:30 +0000181 if filename not in info:
Tim Peters70c43782001-01-17 08:48:39 +0000182 info[filename] = 'Not retrieved'
183 continue
184 try:
185 os.unlink(tempname)
186 except os.error:
187 pass
188 if mode[0] == 'l':
189 if verbose:
Collin Winter6afaeb72007-08-03 17:06:41 +0000190 print("Creating symlink %r -> %r" % (filename, linkto))
Tim Peters70c43782001-01-17 08:48:39 +0000191 try:
192 os.symlink(linkto, tempname)
Guido van Rossumb940e112007-01-10 16:19:56 +0000193 except IOError as msg:
Collin Winter6afaeb72007-08-03 17:06:41 +0000194 print("Can't create %r: %s" % (tempname, msg))
Tim Peters70c43782001-01-17 08:48:39 +0000195 continue
196 else:
197 try:
198 fp = open(tempname, 'wb')
Guido van Rossumb940e112007-01-10 16:19:56 +0000199 except IOError as msg:
Collin Winter6afaeb72007-08-03 17:06:41 +0000200 print("Can't create %r: %s" % (tempname, msg))
Tim Peters70c43782001-01-17 08:48:39 +0000201 continue
202 if verbose:
Collin Winter6afaeb72007-08-03 17:06:41 +0000203 print('Retrieving %r from %r as %r...' % (filename, pwd, fullname))
Tim Peters70c43782001-01-17 08:48:39 +0000204 if verbose:
205 fp1 = LoggingFile(fp, 1024, sys.stdout)
206 else:
207 fp1 = fp
208 t0 = time.time()
209 try:
210 f.retrbinary('RETR ' + filename,
211 fp1.write, 8*1024)
Guido van Rossumb940e112007-01-10 16:19:56 +0000212 except ftplib.error_perm as msg:
Collin Winter6afaeb72007-08-03 17:06:41 +0000213 print(msg)
Tim Peters70c43782001-01-17 08:48:39 +0000214 t1 = time.time()
215 bytes = fp.tell()
216 fp.close()
217 if fp1 != fp:
218 fp1.close()
219 try:
220 os.unlink(fullname)
221 except os.error:
222 pass # Ignore the error
223 try:
224 os.rename(tempname, fullname)
Guido van Rossumb940e112007-01-10 16:19:56 +0000225 except os.error as msg:
Collin Winter6afaeb72007-08-03 17:06:41 +0000226 print("Can't rename %r to %r: %s" % (tempname, fullname, msg))
Tim Peters70c43782001-01-17 08:48:39 +0000227 continue
228 info[filename] = infostuff
229 writedict(info, infofilename)
230 if verbose and mode[0] != 'l':
231 dt = t1 - t0
232 kbytes = bytes / 1024.0
Collin Winter6afaeb72007-08-03 17:06:41 +0000233 print(int(round(kbytes)), end=' ')
234 print('Kbytes in', end=' ')
235 print(int(round(dt)), end=' ')
236 print('seconds', end=' ')
Tim Peters70c43782001-01-17 08:48:39 +0000237 if t1 > t0:
Collin Winter6afaeb72007-08-03 17:06:41 +0000238 print('(~%d Kbytes/sec)' % \
239 int(round(kbytes/dt),))
240 print()
Tim Peters70c43782001-01-17 08:48:39 +0000241 #
242 # Remove files from info that are no longer remote
243 deletions = 0
Georg Brandl8efadf52008-05-16 15:23:30 +0000244 for filename in list(info.keys()):
Tim Peters70c43782001-01-17 08:48:39 +0000245 if filename not in filesfound:
246 if verbose:
Collin Winter6afaeb72007-08-03 17:06:41 +0000247 print("Removing obsolete info entry for", end=' ')
248 print(repr(filename), "in", repr(localdir or "."))
Tim Peters70c43782001-01-17 08:48:39 +0000249 del info[filename]
250 deletions = deletions + 1
251 if deletions:
252 writedict(info, infofilename)
253 #
254 # Remove local files that are no longer in the remote directory
255 try:
256 if not localdir: names = os.listdir(os.curdir)
257 else: names = os.listdir(localdir)
258 except os.error:
259 names = []
260 for name in names:
Georg Brandl8efadf52008-05-16 15:23:30 +0000261 if name[0] == '.' or name in info or name in subdirs:
Tim Peters70c43782001-01-17 08:48:39 +0000262 continue
263 skip = 0
264 for pat in skippats:
265 if fnmatch(name, pat):
266 if verbose > 1:
Collin Winter6afaeb72007-08-03 17:06:41 +0000267 print('Skip pattern', repr(pat), end=' ')
268 print('matches', repr(name))
Tim Peters70c43782001-01-17 08:48:39 +0000269 skip = 1
270 break
271 if skip:
272 continue
273 fullname = os.path.join(localdir, name)
274 if not rmok:
275 if verbose:
Collin Winter6afaeb72007-08-03 17:06:41 +0000276 print('Local file', repr(fullname), end=' ')
277 print('is no longer pertinent')
Tim Peters70c43782001-01-17 08:48:39 +0000278 continue
Collin Winter6afaeb72007-08-03 17:06:41 +0000279 if verbose: print('Removing local file/dir', repr(fullname))
Tim Peters70c43782001-01-17 08:48:39 +0000280 remove(fullname)
281 #
282 # Recursively mirror subdirectories
283 for subdir in subdirs:
284 if interactive:
285 doit = askabout('subdirectory', subdir, pwd)
286 if not doit: continue
Collin Winter6afaeb72007-08-03 17:06:41 +0000287 if verbose: print('Processing subdirectory', repr(subdir))
Tim Peters70c43782001-01-17 08:48:39 +0000288 localsubdir = os.path.join(localdir, subdir)
289 pwd = f.pwd()
290 if verbose > 1:
Collin Winter6afaeb72007-08-03 17:06:41 +0000291 print('Remote directory now:', repr(pwd))
292 print('Remote cwd', repr(subdir))
Tim Peters70c43782001-01-17 08:48:39 +0000293 try:
294 f.cwd(subdir)
Guido van Rossumb940e112007-01-10 16:19:56 +0000295 except ftplib.error_perm as msg:
Collin Winter6afaeb72007-08-03 17:06:41 +0000296 print("Can't chdir to", repr(subdir), ":", repr(msg))
Tim Peters70c43782001-01-17 08:48:39 +0000297 else:
Collin Winter6afaeb72007-08-03 17:06:41 +0000298 if verbose: print('Mirroring as', repr(localsubdir))
Tim Peters70c43782001-01-17 08:48:39 +0000299 mirrorsubdir(f, localsubdir)
Collin Winter6afaeb72007-08-03 17:06:41 +0000300 if verbose > 1: print('Remote cwd ..')
Tim Peters70c43782001-01-17 08:48:39 +0000301 f.cwd('..')
302 newpwd = f.pwd()
303 if newpwd != pwd:
Collin Winter6afaeb72007-08-03 17:06:41 +0000304 print('Ended up in wrong directory after cd + cd ..')
305 print('Giving up now.')
Tim Peters70c43782001-01-17 08:48:39 +0000306 break
307 else:
Collin Winter6afaeb72007-08-03 17:06:41 +0000308 if verbose > 1: print('OK.')
Guido van Rossume830e551995-06-20 19:31:37 +0000309
Guido van Rossumfdc5adc1998-01-04 02:03:12 +0000310# Helper to remove a file or directory tree
311def remove(fullname):
Tim Peters70c43782001-01-17 08:48:39 +0000312 if os.path.isdir(fullname) and not os.path.islink(fullname):
313 try:
314 names = os.listdir(fullname)
315 except os.error:
316 names = []
317 ok = 1
318 for name in names:
319 if not remove(os.path.join(fullname, name)):
320 ok = 0
321 if not ok:
322 return 0
323 try:
324 os.rmdir(fullname)
Guido van Rossumb940e112007-01-10 16:19:56 +0000325 except os.error as msg:
Collin Winter6afaeb72007-08-03 17:06:41 +0000326 print("Can't remove local directory %r: %s" % (fullname, msg))
Tim Peters70c43782001-01-17 08:48:39 +0000327 return 0
328 else:
329 try:
330 os.unlink(fullname)
Guido van Rossumb940e112007-01-10 16:19:56 +0000331 except os.error as msg:
Collin Winter6afaeb72007-08-03 17:06:41 +0000332 print("Can't remove local file %r: %s" % (fullname, msg))
Tim Peters70c43782001-01-17 08:48:39 +0000333 return 0
334 return 1
Guido van Rossumfdc5adc1998-01-04 02:03:12 +0000335
Guido van Rossume830e551995-06-20 19:31:37 +0000336# Wrapper around a file for writing to write a hash sign every block.
337class LoggingFile:
Tim Peters70c43782001-01-17 08:48:39 +0000338 def __init__(self, fp, blocksize, outfp):
339 self.fp = fp
340 self.bytes = 0
341 self.hashes = 0
342 self.blocksize = blocksize
343 self.outfp = outfp
344 def write(self, data):
345 self.bytes = self.bytes + len(data)
346 hashes = int(self.bytes) / self.blocksize
347 while hashes > self.hashes:
348 self.outfp.write('#')
349 self.outfp.flush()
350 self.hashes = self.hashes + 1
351 self.fp.write(data)
352 def close(self):
353 self.outfp.write('\n')
Guido van Rossume830e551995-06-20 19:31:37 +0000354
Neal Norwitzce96f692006-03-17 06:49:51 +0000355def raw_input(prompt):
356 sys.stdout.write(prompt)
357 sys.stdout.flush()
358 return sys.stdin.readline()
359
Guido van Rossume830e551995-06-20 19:31:37 +0000360# Ask permission to download a file.
361def askabout(filetype, filename, pwd):
Tim Peters70c43782001-01-17 08:48:39 +0000362 prompt = 'Retrieve %s %s from %s ? [ny] ' % (filetype, filename, pwd)
363 while 1:
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000364 reply = raw_input(prompt).strip().lower()
Tim Peters70c43782001-01-17 08:48:39 +0000365 if reply in ['y', 'ye', 'yes']:
366 return 1
367 if reply in ['', 'n', 'no', 'nop', 'nope']:
368 return 0
Collin Winter6afaeb72007-08-03 17:06:41 +0000369 print('Please answer yes or no.')
Guido van Rossume830e551995-06-20 19:31:37 +0000370
371# Create a directory if it doesn't exist. Recursively create the
372# parent directory as well if needed.
373def makedir(pathname):
Tim Peters70c43782001-01-17 08:48:39 +0000374 if os.path.isdir(pathname):
375 return
376 dirname = os.path.dirname(pathname)
377 if dirname: makedir(dirname)
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000378 os.mkdir(pathname, 0o777)
Guido van Rossume830e551995-06-20 19:31:37 +0000379
380# Write a dictionary to a file in a way that can be read back using
381# rval() but is still somewhat readable (i.e. not a single long line).
Guido van Rossuma2596961998-06-29 23:17:16 +0000382# Also creates a backup file.
Guido van Rossume830e551995-06-20 19:31:37 +0000383def writedict(dict, filename):
Andrew M. Kuchlingac6df952003-05-13 18:14:25 +0000384 dir, fname = os.path.split(filename)
385 tempname = os.path.join(dir, '@' + fname)
386 backup = os.path.join(dir, fname + '~')
Tim Peters70c43782001-01-17 08:48:39 +0000387 try:
388 os.unlink(backup)
389 except os.error:
390 pass
391 fp = open(tempname, 'w')
392 fp.write('{\n')
393 for key, value in dict.items():
Walter Dörwald70a6b492004-02-12 17:35:32 +0000394 fp.write('%r: %r,\n' % (key, value))
Tim Peters70c43782001-01-17 08:48:39 +0000395 fp.write('}\n')
396 fp.close()
397 try:
398 os.rename(filename, backup)
399 except os.error:
400 pass
401 os.rename(tempname, filename)
Guido van Rossume830e551995-06-20 19:31:37 +0000402
Barry Warsaw64850ef2000-09-16 22:09:51 +0000403
404if __name__ == '__main__':
Tim Peters70c43782001-01-17 08:48:39 +0000405 main()