blob: 38af9dc8b7b5f4f85b1598e2268e531ed519e007 [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossume830e551995-06-20 19:31:37 +00002
Guido van Rossumfdc5adc1998-01-04 02:03:12 +00003"""Mirror a remote ftp subtree into a local directory tree.
4
5usage: ftpmirror [-v] [-q] [-i] [-m] [-n] [-r] [-s pat]
6 [-l username [-p passwd [-a account]]]
Tim Peters70c43782001-01-17 08:48:39 +00007 hostname [remotedir [localdir]]
Guido van Rossumfdc5adc1998-01-04 02:03:12 +00008-v: verbose
9-q: quiet
10-i: interactive mode
11-m: macintosh server (NCSA telnet 2.4) (implies -n -s '*.o')
12-n: don't log in
13-r: remove local files/directories no longer pertinent
Martin v. Löwisa4debff2001-06-07 17:17:00 +000014-l username [-p passwd [-a account]]: login info (default .netrc or anonymous)
Guido van Rossumfdc5adc1998-01-04 02:03:12 +000015-s pat: skip files matching pattern
16hostname: remote host
17remotedir: remote directory (default initial)
18localdir: local directory (default current)
19"""
20
Guido van Rossume830e551995-06-20 19:31:37 +000021import os
22import sys
23import time
24import getopt
25import string
26import ftplib
Martin v. Löwisa4debff2001-06-07 17:17:00 +000027import netrc
Guido van Rossume830e551995-06-20 19:31:37 +000028from fnmatch import fnmatch
29
Guido van Rossumfdc5adc1998-01-04 02:03:12 +000030# Print usage message and exit
Guido van Rossume830e551995-06-20 19:31:37 +000031def usage(*args):
Tim Peters70c43782001-01-17 08:48:39 +000032 sys.stdout = sys.stderr
33 for msg in args: print msg
34 print __doc__
35 sys.exit(2)
Guido van Rossume830e551995-06-20 19:31:37 +000036
37verbose = 1 # 0 for -q, 2 for -v
38interactive = 0
39mac = 0
40rmok = 0
41nologin = 0
Guido van Rossumd2966cb1996-02-05 18:49:00 +000042skippats = ['.', '..', '.mirrorinfo']
Guido van Rossume830e551995-06-20 19:31:37 +000043
Guido van Rossumfdc5adc1998-01-04 02:03:12 +000044# Main program: parse command line and start processing
Guido van Rossume830e551995-06-20 19:31:37 +000045def main():
Tim Peters70c43782001-01-17 08:48:39 +000046 global verbose, interactive, mac, rmok, nologin
47 try:
48 opts, args = getopt.getopt(sys.argv[1:], 'a:bil:mnp:qrs:v')
49 except getopt.error, msg:
50 usage(msg)
51 login = ''
52 passwd = ''
53 account = ''
Martin v. Löwisa4debff2001-06-07 17:17:00 +000054 if not args: usage('hostname missing')
55 host = args[0]
56 try:
57 auth = netrc.netrc().authenticators(host)
58 if auth is not None:
59 login, account, passwd = auth
60 except (netrc.NetrcParseError, IOError):
61 pass
Tim Peters70c43782001-01-17 08:48:39 +000062 for o, a in opts:
63 if o == '-l': login = a
64 if o == '-p': passwd = a
65 if o == '-a': account = a
66 if o == '-v': verbose = verbose + 1
67 if o == '-q': verbose = 0
68 if o == '-i': interactive = 1
69 if o == '-m': mac = 1; nologin = 1; skippats.append('*.o')
70 if o == '-n': nologin = 1
71 if o == '-r': rmok = 1
72 if o == '-s': skippats.append(a)
Tim Peters70c43782001-01-17 08:48:39 +000073 remotedir = ''
74 localdir = ''
75 if args[1:]:
76 remotedir = args[1]
77 if args[2:]:
78 localdir = args[2]
79 if args[3:]: usage('too many arguments')
80 #
81 f = ftplib.FTP()
82 if verbose: print 'Connecting to %s...' % `host`
83 f.connect(host)
84 if not nologin:
85 if verbose:
86 print 'Logging in as %s...' % `login or 'anonymous'`
87 f.login(login, passwd, account)
88 if verbose: print 'OK.'
89 pwd = f.pwd()
90 if verbose > 1: print 'PWD =', `pwd`
91 if remotedir:
92 if verbose > 1: print 'cwd(%s)' % `remotedir`
93 f.cwd(remotedir)
94 if verbose > 1: print 'OK.'
95 pwd = f.pwd()
96 if verbose > 1: print 'PWD =', `pwd`
97 #
98 mirrorsubdir(f, localdir)
Guido van Rossume830e551995-06-20 19:31:37 +000099
Guido van Rossumfdc5adc1998-01-04 02:03:12 +0000100# Core logic: mirror one subdirectory (recursively)
Guido van Rossume830e551995-06-20 19:31:37 +0000101def mirrorsubdir(f, localdir):
Tim Peters70c43782001-01-17 08:48:39 +0000102 pwd = f.pwd()
103 if localdir and not os.path.isdir(localdir):
104 if verbose: print 'Creating local directory', `localdir`
105 try:
106 makedir(localdir)
107 except os.error, msg:
108 print "Failed to establish local directory", `localdir`
109 return
110 infofilename = os.path.join(localdir, '.mirrorinfo')
111 try:
112 text = open(infofilename, 'r').read()
113 except IOError, msg:
114 text = '{}'
115 try:
116 info = eval(text)
117 except (SyntaxError, NameError):
118 print 'Bad mirror info in %s' % `infofilename`
119 info = {}
120 subdirs = []
121 listing = []
122 if verbose: print 'Listing remote directory %s...' % `pwd`
123 f.retrlines('LIST', listing.append)
124 filesfound = []
125 for line in listing:
126 if verbose > 1: print '-->', `line`
127 if mac:
128 # Mac listing has just filenames;
129 # trailing / means subdirectory
130 filename = string.strip(line)
131 mode = '-'
132 if filename[-1:] == '/':
133 filename = filename[:-1]
134 mode = 'd'
135 infostuff = ''
136 else:
137 # Parse, assuming a UNIX listing
138 words = string.split(line, None, 8)
139 if len(words) < 6:
140 if verbose > 1: print 'Skipping short line'
141 continue
142 filename = string.lstrip(words[-1])
143 i = string.find(filename, " -> ")
144 if i >= 0:
145 # words[0] had better start with 'l'...
146 if verbose > 1:
147 print 'Found symbolic link %s' % `filename`
148 linkto = filename[i+4:]
149 filename = filename[:i]
150 infostuff = words[-5:-1]
151 mode = words[0]
152 skip = 0
153 for pat in skippats:
154 if fnmatch(filename, pat):
155 if verbose > 1:
156 print 'Skip pattern', `pat`,
157 print 'matches', `filename`
158 skip = 1
159 break
160 if skip:
161 continue
162 if mode[0] == 'd':
163 if verbose > 1:
164 print 'Remembering subdirectory', `filename`
165 subdirs.append(filename)
166 continue
167 filesfound.append(filename)
168 if info.has_key(filename) and info[filename] == infostuff:
169 if verbose > 1:
170 print 'Already have this version of',`filename`
171 continue
172 fullname = os.path.join(localdir, filename)
173 tempname = os.path.join(localdir, '@'+filename)
174 if interactive:
175 doit = askabout('file', filename, pwd)
176 if not doit:
177 if not info.has_key(filename):
178 info[filename] = 'Not retrieved'
179 continue
180 try:
181 os.unlink(tempname)
182 except os.error:
183 pass
184 if mode[0] == 'l':
185 if verbose:
186 print "Creating symlink %s -> %s" % (
187 `filename`, `linkto`)
188 try:
189 os.symlink(linkto, tempname)
190 except IOError, msg:
191 print "Can't create %s: %s" % (
192 `tempname`, str(msg))
193 continue
194 else:
195 try:
196 fp = open(tempname, 'wb')
197 except IOError, msg:
198 print "Can't create %s: %s" % (
199 `tempname`, str(msg))
200 continue
201 if verbose:
202 print 'Retrieving %s from %s as %s...' % \
203 (`filename`, `pwd`, `fullname`)
204 if verbose:
205 fp1 = LoggingFile(fp, 1024, sys.stdout)
206 else:
207 fp1 = fp
208 t0 = time.time()
209 try:
210 f.retrbinary('RETR ' + filename,
211 fp1.write, 8*1024)
212 except ftplib.error_perm, msg:
213 print msg
214 t1 = time.time()
215 bytes = fp.tell()
216 fp.close()
217 if fp1 != fp:
218 fp1.close()
219 try:
220 os.unlink(fullname)
221 except os.error:
222 pass # Ignore the error
223 try:
224 os.rename(tempname, fullname)
225 except os.error, msg:
226 print "Can't rename %s to %s: %s" % (`tempname`,
227 `fullname`,
228 str(msg))
229 continue
230 info[filename] = infostuff
231 writedict(info, infofilename)
232 if verbose and mode[0] != 'l':
233 dt = t1 - t0
234 kbytes = bytes / 1024.0
235 print int(round(kbytes)),
236 print 'Kbytes in',
237 print int(round(dt)),
238 print 'seconds',
239 if t1 > t0:
240 print '(~%d Kbytes/sec)' % \
241 int(round(kbytes/dt),)
242 print
243 #
244 # Remove files from info that are no longer remote
245 deletions = 0
246 for filename in info.keys():
247 if filename not in filesfound:
248 if verbose:
249 print "Removing obsolete info entry for",
250 print `filename`, "in", `localdir or "."`
251 del info[filename]
252 deletions = deletions + 1
253 if deletions:
254 writedict(info, infofilename)
255 #
256 # Remove local files that are no longer in the remote directory
257 try:
258 if not localdir: names = os.listdir(os.curdir)
259 else: names = os.listdir(localdir)
260 except os.error:
261 names = []
262 for name in names:
263 if name[0] == '.' or info.has_key(name) or name in subdirs:
264 continue
265 skip = 0
266 for pat in skippats:
267 if fnmatch(name, pat):
268 if verbose > 1:
269 print 'Skip pattern', `pat`,
270 print 'matches', `name`
271 skip = 1
272 break
273 if skip:
274 continue
275 fullname = os.path.join(localdir, name)
276 if not rmok:
277 if verbose:
278 print 'Local file', `fullname`,
279 print 'is no longer pertinent'
280 continue
281 if verbose: print 'Removing local file/dir', `fullname`
282 remove(fullname)
283 #
284 # Recursively mirror subdirectories
285 for subdir in subdirs:
286 if interactive:
287 doit = askabout('subdirectory', subdir, pwd)
288 if not doit: continue
289 if verbose: print 'Processing subdirectory', `subdir`
290 localsubdir = os.path.join(localdir, subdir)
291 pwd = f.pwd()
292 if verbose > 1:
293 print 'Remote directory now:', `pwd`
294 print 'Remote cwd', `subdir`
295 try:
296 f.cwd(subdir)
297 except ftplib.error_perm, msg:
298 print "Can't chdir to", `subdir`, ":", `msg`
299 else:
300 if verbose: print 'Mirroring as', `localsubdir`
301 mirrorsubdir(f, localsubdir)
302 if verbose > 1: print 'Remote cwd ..'
303 f.cwd('..')
304 newpwd = f.pwd()
305 if newpwd != pwd:
306 print 'Ended up in wrong directory after cd + cd ..'
307 print 'Giving up now.'
308 break
309 else:
310 if verbose > 1: print 'OK.'
Guido van Rossume830e551995-06-20 19:31:37 +0000311
Guido van Rossumfdc5adc1998-01-04 02:03:12 +0000312# Helper to remove a file or directory tree
313def remove(fullname):
Tim Peters70c43782001-01-17 08:48:39 +0000314 if os.path.isdir(fullname) and not os.path.islink(fullname):
315 try:
316 names = os.listdir(fullname)
317 except os.error:
318 names = []
319 ok = 1
320 for name in names:
321 if not remove(os.path.join(fullname, name)):
322 ok = 0
323 if not ok:
324 return 0
325 try:
326 os.rmdir(fullname)
327 except os.error, msg:
328 print "Can't remove local directory %s: %s" % \
329 (`fullname`, str(msg))
330 return 0
331 else:
332 try:
333 os.unlink(fullname)
334 except os.error, msg:
335 print "Can't remove local file %s: %s" % \
336 (`fullname`, str(msg))
337 return 0
338 return 1
Guido van Rossumfdc5adc1998-01-04 02:03:12 +0000339
Guido van Rossume830e551995-06-20 19:31:37 +0000340# Wrapper around a file for writing to write a hash sign every block.
341class LoggingFile:
Tim Peters70c43782001-01-17 08:48:39 +0000342 def __init__(self, fp, blocksize, outfp):
343 self.fp = fp
344 self.bytes = 0
345 self.hashes = 0
346 self.blocksize = blocksize
347 self.outfp = outfp
348 def write(self, data):
349 self.bytes = self.bytes + len(data)
350 hashes = int(self.bytes) / self.blocksize
351 while hashes > self.hashes:
352 self.outfp.write('#')
353 self.outfp.flush()
354 self.hashes = self.hashes + 1
355 self.fp.write(data)
356 def close(self):
357 self.outfp.write('\n')
Guido van Rossume830e551995-06-20 19:31:37 +0000358
359# Ask permission to download a file.
360def askabout(filetype, filename, pwd):
Tim Peters70c43782001-01-17 08:48:39 +0000361 prompt = 'Retrieve %s %s from %s ? [ny] ' % (filetype, filename, pwd)
362 while 1:
363 reply = string.lower(string.strip(raw_input(prompt)))
364 if reply in ['y', 'ye', 'yes']:
365 return 1
366 if reply in ['', 'n', 'no', 'nop', 'nope']:
367 return 0
368 print 'Please answer yes or no.'
Guido van Rossume830e551995-06-20 19:31:37 +0000369
370# Create a directory if it doesn't exist. Recursively create the
371# parent directory as well if needed.
372def makedir(pathname):
Tim Peters70c43782001-01-17 08:48:39 +0000373 if os.path.isdir(pathname):
374 return
375 dirname = os.path.dirname(pathname)
376 if dirname: makedir(dirname)
377 os.mkdir(pathname, 0777)
Guido van Rossume830e551995-06-20 19:31:37 +0000378
379# Write a dictionary to a file in a way that can be read back using
380# rval() but is still somewhat readable (i.e. not a single long line).
Guido van Rossuma2596961998-06-29 23:17:16 +0000381# Also creates a backup file.
Guido van Rossume830e551995-06-20 19:31:37 +0000382def writedict(dict, filename):
Tim Peters70c43782001-01-17 08:48:39 +0000383 dir, file = os.path.split(filename)
384 tempname = os.path.join(dir, '@' + file)
385 backup = os.path.join(dir, file + '~')
386 try:
387 os.unlink(backup)
388 except os.error:
389 pass
390 fp = open(tempname, 'w')
391 fp.write('{\n')
392 for key, value in dict.items():
393 fp.write('%s: %s,\n' % (`key`, `value`))
394 fp.write('}\n')
395 fp.close()
396 try:
397 os.rename(filename, backup)
398 except os.error:
399 pass
400 os.rename(tempname, filename)
Guido van Rossume830e551995-06-20 19:31:37 +0000401
Barry Warsaw64850ef2000-09-16 22:09:51 +0000402
403if __name__ == '__main__':
Tim Peters70c43782001-01-17 08:48:39 +0000404 main()