blob: 0886af62a81fa38b531f5e9f54daed6a05f314d8 [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossume830e551995-06-20 19:31:37 +00002
Guido van Rossumfdc5adc1998-01-04 02:03:12 +00003"""Mirror a remote ftp subtree into a local directory tree.
4
5usage: ftpmirror [-v] [-q] [-i] [-m] [-n] [-r] [-s pat]
6 [-l username [-p passwd [-a account]]]
Skip Montanaro7dfb02d2002-12-05 02:37:23 +00007 hostname[:port] [remotedir [localdir]]
Guido van Rossumfdc5adc1998-01-04 02:03:12 +00008-v: verbose
9-q: quiet
10-i: interactive mode
11-m: macintosh server (NCSA telnet 2.4) (implies -n -s '*.o')
12-n: don't log in
13-r: remove local files/directories no longer pertinent
Martin v. Löwisa4debff2001-06-07 17:17:00 +000014-l username [-p passwd [-a account]]: login info (default .netrc or anonymous)
Guido van Rossumfdc5adc1998-01-04 02:03:12 +000015-s pat: skip files matching pattern
Skip Montanaro7dfb02d2002-12-05 02:37:23 +000016hostname: remote host w/ optional port separated by ':'
Guido van Rossumfdc5adc1998-01-04 02:03:12 +000017remotedir: remote directory (default initial)
18localdir: local directory (default current)
19"""
20
Guido van Rossume830e551995-06-20 19:31:37 +000021import os
22import sys
23import time
24import getopt
Guido van Rossume830e551995-06-20 19:31:37 +000025import ftplib
Martin v. Löwisa4debff2001-06-07 17:17:00 +000026import netrc
Guido van Rossume830e551995-06-20 19:31:37 +000027from fnmatch import fnmatch
28
Guido van Rossumfdc5adc1998-01-04 02:03:12 +000029# Print usage message and exit
Guido van Rossume830e551995-06-20 19:31:37 +000030def usage(*args):
Tim Peters70c43782001-01-17 08:48:39 +000031 sys.stdout = sys.stderr
32 for msg in args: print msg
33 print __doc__
34 sys.exit(2)
Guido van Rossume830e551995-06-20 19:31:37 +000035
36verbose = 1 # 0 for -q, 2 for -v
37interactive = 0
38mac = 0
39rmok = 0
40nologin = 0
Guido van Rossumd2966cb1996-02-05 18:49:00 +000041skippats = ['.', '..', '.mirrorinfo']
Guido van Rossume830e551995-06-20 19:31:37 +000042
Guido van Rossumfdc5adc1998-01-04 02:03:12 +000043# Main program: parse command line and start processing
Guido van Rossume830e551995-06-20 19:31:37 +000044def main():
Tim Peters70c43782001-01-17 08:48:39 +000045 global verbose, interactive, mac, rmok, nologin
46 try:
47 opts, args = getopt.getopt(sys.argv[1:], 'a:bil:mnp:qrs:v')
48 except getopt.error, msg:
49 usage(msg)
50 login = ''
51 passwd = ''
52 account = ''
Martin v. Löwisa4debff2001-06-07 17:17:00 +000053 if not args: usage('hostname missing')
54 host = args[0]
Skip Montanaro7dfb02d2002-12-05 02:37:23 +000055 port = 0
56 if ':' in host:
57 host, port = host.split(':', 1)
Martin v. Löwisa4debff2001-06-07 17:17:00 +000058 try:
59 auth = netrc.netrc().authenticators(host)
60 if auth is not None:
61 login, account, passwd = auth
62 except (netrc.NetrcParseError, IOError):
63 pass
Tim Peters70c43782001-01-17 08:48:39 +000064 for o, a in opts:
65 if o == '-l': login = a
66 if o == '-p': passwd = a
67 if o == '-a': account = a
68 if o == '-v': verbose = verbose + 1
69 if o == '-q': verbose = 0
70 if o == '-i': interactive = 1
71 if o == '-m': mac = 1; nologin = 1; skippats.append('*.o')
72 if o == '-n': nologin = 1
73 if o == '-r': rmok = 1
74 if o == '-s': skippats.append(a)
Tim Peters70c43782001-01-17 08:48:39 +000075 remotedir = ''
76 localdir = ''
77 if args[1:]:
78 remotedir = args[1]
79 if args[2:]:
80 localdir = args[2]
81 if args[3:]: usage('too many arguments')
82 #
83 f = ftplib.FTP()
84 if verbose: print 'Connecting to %s...' % `host`
Skip Montanaro7dfb02d2002-12-05 02:37:23 +000085 f.connect(host,port)
Tim Peters70c43782001-01-17 08:48:39 +000086 if not nologin:
87 if verbose:
88 print 'Logging in as %s...' % `login or 'anonymous'`
89 f.login(login, passwd, account)
90 if verbose: print 'OK.'
91 pwd = f.pwd()
92 if verbose > 1: print 'PWD =', `pwd`
93 if remotedir:
94 if verbose > 1: print 'cwd(%s)' % `remotedir`
95 f.cwd(remotedir)
96 if verbose > 1: print 'OK.'
97 pwd = f.pwd()
98 if verbose > 1: print 'PWD =', `pwd`
99 #
100 mirrorsubdir(f, localdir)
Guido van Rossume830e551995-06-20 19:31:37 +0000101
Guido van Rossumfdc5adc1998-01-04 02:03:12 +0000102# Core logic: mirror one subdirectory (recursively)
Guido van Rossume830e551995-06-20 19:31:37 +0000103def mirrorsubdir(f, localdir):
Tim Peters70c43782001-01-17 08:48:39 +0000104 pwd = f.pwd()
105 if localdir and not os.path.isdir(localdir):
106 if verbose: print 'Creating local directory', `localdir`
107 try:
108 makedir(localdir)
109 except os.error, msg:
110 print "Failed to establish local directory", `localdir`
111 return
112 infofilename = os.path.join(localdir, '.mirrorinfo')
113 try:
114 text = open(infofilename, 'r').read()
115 except IOError, msg:
116 text = '{}'
117 try:
118 info = eval(text)
119 except (SyntaxError, NameError):
120 print 'Bad mirror info in %s' % `infofilename`
121 info = {}
122 subdirs = []
123 listing = []
124 if verbose: print 'Listing remote directory %s...' % `pwd`
125 f.retrlines('LIST', listing.append)
126 filesfound = []
127 for line in listing:
128 if verbose > 1: print '-->', `line`
129 if mac:
130 # Mac listing has just filenames;
131 # trailing / means subdirectory
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000132 filename = line.strip()
Tim Peters70c43782001-01-17 08:48:39 +0000133 mode = '-'
134 if filename[-1:] == '/':
135 filename = filename[:-1]
136 mode = 'd'
137 infostuff = ''
138 else:
139 # Parse, assuming a UNIX listing
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000140 words = line.split(None, 8)
Tim Peters70c43782001-01-17 08:48:39 +0000141 if len(words) < 6:
142 if verbose > 1: print 'Skipping short line'
143 continue
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000144 filename = words[-1].lstrip()
145 i = filename.find(" -> ")
Tim Peters70c43782001-01-17 08:48:39 +0000146 if i >= 0:
147 # words[0] had better start with 'l'...
148 if verbose > 1:
149 print 'Found symbolic link %s' % `filename`
150 linkto = filename[i+4:]
151 filename = filename[:i]
152 infostuff = words[-5:-1]
153 mode = words[0]
154 skip = 0
155 for pat in skippats:
156 if fnmatch(filename, pat):
157 if verbose > 1:
158 print 'Skip pattern', `pat`,
159 print 'matches', `filename`
160 skip = 1
161 break
162 if skip:
163 continue
164 if mode[0] == 'd':
165 if verbose > 1:
166 print 'Remembering subdirectory', `filename`
167 subdirs.append(filename)
168 continue
169 filesfound.append(filename)
170 if info.has_key(filename) and info[filename] == infostuff:
171 if verbose > 1:
172 print 'Already have this version of',`filename`
173 continue
174 fullname = os.path.join(localdir, filename)
175 tempname = os.path.join(localdir, '@'+filename)
176 if interactive:
177 doit = askabout('file', filename, pwd)
178 if not doit:
179 if not info.has_key(filename):
180 info[filename] = 'Not retrieved'
181 continue
182 try:
183 os.unlink(tempname)
184 except os.error:
185 pass
186 if mode[0] == 'l':
187 if verbose:
188 print "Creating symlink %s -> %s" % (
189 `filename`, `linkto`)
190 try:
191 os.symlink(linkto, tempname)
192 except IOError, msg:
193 print "Can't create %s: %s" % (
194 `tempname`, str(msg))
195 continue
196 else:
197 try:
198 fp = open(tempname, 'wb')
199 except IOError, msg:
200 print "Can't create %s: %s" % (
201 `tempname`, str(msg))
202 continue
203 if verbose:
204 print 'Retrieving %s from %s as %s...' % \
205 (`filename`, `pwd`, `fullname`)
206 if verbose:
207 fp1 = LoggingFile(fp, 1024, sys.stdout)
208 else:
209 fp1 = fp
210 t0 = time.time()
211 try:
212 f.retrbinary('RETR ' + filename,
213 fp1.write, 8*1024)
214 except ftplib.error_perm, msg:
215 print msg
216 t1 = time.time()
217 bytes = fp.tell()
218 fp.close()
219 if fp1 != fp:
220 fp1.close()
221 try:
222 os.unlink(fullname)
223 except os.error:
224 pass # Ignore the error
225 try:
226 os.rename(tempname, fullname)
227 except os.error, msg:
228 print "Can't rename %s to %s: %s" % (`tempname`,
229 `fullname`,
230 str(msg))
231 continue
232 info[filename] = infostuff
233 writedict(info, infofilename)
234 if verbose and mode[0] != 'l':
235 dt = t1 - t0
236 kbytes = bytes / 1024.0
237 print int(round(kbytes)),
238 print 'Kbytes in',
239 print int(round(dt)),
240 print 'seconds',
241 if t1 > t0:
242 print '(~%d Kbytes/sec)' % \
243 int(round(kbytes/dt),)
244 print
245 #
246 # Remove files from info that are no longer remote
247 deletions = 0
248 for filename in info.keys():
249 if filename not in filesfound:
250 if verbose:
251 print "Removing obsolete info entry for",
252 print `filename`, "in", `localdir or "."`
253 del info[filename]
254 deletions = deletions + 1
255 if deletions:
256 writedict(info, infofilename)
257 #
258 # Remove local files that are no longer in the remote directory
259 try:
260 if not localdir: names = os.listdir(os.curdir)
261 else: names = os.listdir(localdir)
262 except os.error:
263 names = []
264 for name in names:
265 if name[0] == '.' or info.has_key(name) or name in subdirs:
266 continue
267 skip = 0
268 for pat in skippats:
269 if fnmatch(name, pat):
270 if verbose > 1:
271 print 'Skip pattern', `pat`,
272 print 'matches', `name`
273 skip = 1
274 break
275 if skip:
276 continue
277 fullname = os.path.join(localdir, name)
278 if not rmok:
279 if verbose:
280 print 'Local file', `fullname`,
281 print 'is no longer pertinent'
282 continue
283 if verbose: print 'Removing local file/dir', `fullname`
284 remove(fullname)
285 #
286 # Recursively mirror subdirectories
287 for subdir in subdirs:
288 if interactive:
289 doit = askabout('subdirectory', subdir, pwd)
290 if not doit: continue
291 if verbose: print 'Processing subdirectory', `subdir`
292 localsubdir = os.path.join(localdir, subdir)
293 pwd = f.pwd()
294 if verbose > 1:
295 print 'Remote directory now:', `pwd`
296 print 'Remote cwd', `subdir`
297 try:
298 f.cwd(subdir)
299 except ftplib.error_perm, msg:
300 print "Can't chdir to", `subdir`, ":", `msg`
301 else:
302 if verbose: print 'Mirroring as', `localsubdir`
303 mirrorsubdir(f, localsubdir)
304 if verbose > 1: print 'Remote cwd ..'
305 f.cwd('..')
306 newpwd = f.pwd()
307 if newpwd != pwd:
308 print 'Ended up in wrong directory after cd + cd ..'
309 print 'Giving up now.'
310 break
311 else:
312 if verbose > 1: print 'OK.'
Guido van Rossume830e551995-06-20 19:31:37 +0000313
Guido van Rossumfdc5adc1998-01-04 02:03:12 +0000314# Helper to remove a file or directory tree
315def remove(fullname):
Tim Peters70c43782001-01-17 08:48:39 +0000316 if os.path.isdir(fullname) and not os.path.islink(fullname):
317 try:
318 names = os.listdir(fullname)
319 except os.error:
320 names = []
321 ok = 1
322 for name in names:
323 if not remove(os.path.join(fullname, name)):
324 ok = 0
325 if not ok:
326 return 0
327 try:
328 os.rmdir(fullname)
329 except os.error, msg:
330 print "Can't remove local directory %s: %s" % \
331 (`fullname`, str(msg))
332 return 0
333 else:
334 try:
335 os.unlink(fullname)
336 except os.error, msg:
337 print "Can't remove local file %s: %s" % \
338 (`fullname`, str(msg))
339 return 0
340 return 1
Guido van Rossumfdc5adc1998-01-04 02:03:12 +0000341
Guido van Rossume830e551995-06-20 19:31:37 +0000342# Wrapper around a file for writing to write a hash sign every block.
343class LoggingFile:
Tim Peters70c43782001-01-17 08:48:39 +0000344 def __init__(self, fp, blocksize, outfp):
345 self.fp = fp
346 self.bytes = 0
347 self.hashes = 0
348 self.blocksize = blocksize
349 self.outfp = outfp
350 def write(self, data):
351 self.bytes = self.bytes + len(data)
352 hashes = int(self.bytes) / self.blocksize
353 while hashes > self.hashes:
354 self.outfp.write('#')
355 self.outfp.flush()
356 self.hashes = self.hashes + 1
357 self.fp.write(data)
358 def close(self):
359 self.outfp.write('\n')
Guido van Rossume830e551995-06-20 19:31:37 +0000360
361# Ask permission to download a file.
362def askabout(filetype, filename, pwd):
Tim Peters70c43782001-01-17 08:48:39 +0000363 prompt = 'Retrieve %s %s from %s ? [ny] ' % (filetype, filename, pwd)
364 while 1:
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000365 reply = raw_input(prompt).strip().lower()
Tim Peters70c43782001-01-17 08:48:39 +0000366 if reply in ['y', 'ye', 'yes']:
367 return 1
368 if reply in ['', 'n', 'no', 'nop', 'nope']:
369 return 0
370 print 'Please answer yes or no.'
Guido van Rossume830e551995-06-20 19:31:37 +0000371
372# Create a directory if it doesn't exist. Recursively create the
373# parent directory as well if needed.
374def makedir(pathname):
Tim Peters70c43782001-01-17 08:48:39 +0000375 if os.path.isdir(pathname):
376 return
377 dirname = os.path.dirname(pathname)
378 if dirname: makedir(dirname)
379 os.mkdir(pathname, 0777)
Guido van Rossume830e551995-06-20 19:31:37 +0000380
381# Write a dictionary to a file in a way that can be read back using
382# rval() but is still somewhat readable (i.e. not a single long line).
Guido van Rossuma2596961998-06-29 23:17:16 +0000383# Also creates a backup file.
Guido van Rossume830e551995-06-20 19:31:37 +0000384def writedict(dict, filename):
Tim Peters70c43782001-01-17 08:48:39 +0000385 dir, file = os.path.split(filename)
386 tempname = os.path.join(dir, '@' + file)
387 backup = os.path.join(dir, file + '~')
388 try:
389 os.unlink(backup)
390 except os.error:
391 pass
392 fp = open(tempname, 'w')
393 fp.write('{\n')
394 for key, value in dict.items():
395 fp.write('%s: %s,\n' % (`key`, `value`))
396 fp.write('}\n')
397 fp.close()
398 try:
399 os.rename(filename, backup)
400 except os.error:
401 pass
402 os.rename(tempname, filename)
Guido van Rossume830e551995-06-20 19:31:37 +0000403
Barry Warsaw64850ef2000-09-16 22:09:51 +0000404
405if __name__ == '__main__':
Tim Peters70c43782001-01-17 08:48:39 +0000406 main()