Guido van Rossum | f06ee5f | 1996-11-27 19:52:01 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 2 | |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 3 | """Mirror a remote ftp subtree into a local directory tree. |
| 4 | |
| 5 | usage: ftpmirror [-v] [-q] [-i] [-m] [-n] [-r] [-s pat] |
| 6 | [-l username [-p passwd [-a account]]] |
Skip Montanaro | 7dfb02d | 2002-12-05 02:37:23 +0000 | [diff] [blame] | 7 | hostname[:port] [remotedir [localdir]] |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 8 | -v: verbose |
| 9 | -q: quiet |
| 10 | -i: interactive mode |
| 11 | -m: macintosh server (NCSA telnet 2.4) (implies -n -s '*.o') |
| 12 | -n: don't log in |
| 13 | -r: remove local files/directories no longer pertinent |
Martin v. Löwis | a4debff | 2001-06-07 17:17:00 +0000 | [diff] [blame] | 14 | -l username [-p passwd [-a account]]: login info (default .netrc or anonymous) |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 15 | -s pat: skip files matching pattern |
Skip Montanaro | 7dfb02d | 2002-12-05 02:37:23 +0000 | [diff] [blame] | 16 | hostname: remote host w/ optional port separated by ':' |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 17 | remotedir: remote directory (default initial) |
| 18 | localdir: local directory (default current) |
| 19 | """ |
| 20 | |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 21 | import os |
| 22 | import sys |
| 23 | import time |
| 24 | import getopt |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 25 | import ftplib |
Martin v. Löwis | a4debff | 2001-06-07 17:17:00 +0000 | [diff] [blame] | 26 | import netrc |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 27 | from fnmatch import fnmatch |
| 28 | |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 29 | # Print usage message and exit |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 30 | def usage(*args): |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 31 | sys.stdout = sys.stderr |
| 32 | for msg in args: print msg |
| 33 | print __doc__ |
| 34 | sys.exit(2) |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 35 | |
| 36 | verbose = 1 # 0 for -q, 2 for -v |
| 37 | interactive = 0 |
| 38 | mac = 0 |
| 39 | rmok = 0 |
| 40 | nologin = 0 |
Guido van Rossum | d2966cb | 1996-02-05 18:49:00 +0000 | [diff] [blame] | 41 | skippats = ['.', '..', '.mirrorinfo'] |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 42 | |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 43 | # Main program: parse command line and start processing |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 44 | def main(): |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 45 | global verbose, interactive, mac, rmok, nologin |
| 46 | try: |
| 47 | opts, args = getopt.getopt(sys.argv[1:], 'a:bil:mnp:qrs:v') |
| 48 | except getopt.error, msg: |
| 49 | usage(msg) |
| 50 | login = '' |
| 51 | passwd = '' |
| 52 | account = '' |
Martin v. Löwis | a4debff | 2001-06-07 17:17:00 +0000 | [diff] [blame] | 53 | if not args: usage('hostname missing') |
| 54 | host = args[0] |
Skip Montanaro | 7dfb02d | 2002-12-05 02:37:23 +0000 | [diff] [blame] | 55 | port = 0 |
| 56 | if ':' in host: |
| 57 | host, port = host.split(':', 1) |
Skip Montanaro | a5f8c42 | 2002-12-05 02:43:14 +0000 | [diff] [blame] | 58 | port = int(port) |
Martin v. Löwis | a4debff | 2001-06-07 17:17:00 +0000 | [diff] [blame] | 59 | try: |
| 60 | auth = netrc.netrc().authenticators(host) |
| 61 | if auth is not None: |
| 62 | login, account, passwd = auth |
| 63 | except (netrc.NetrcParseError, IOError): |
| 64 | pass |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 65 | for o, a in opts: |
| 66 | if o == '-l': login = a |
| 67 | if o == '-p': passwd = a |
| 68 | if o == '-a': account = a |
| 69 | if o == '-v': verbose = verbose + 1 |
| 70 | if o == '-q': verbose = 0 |
| 71 | if o == '-i': interactive = 1 |
| 72 | if o == '-m': mac = 1; nologin = 1; skippats.append('*.o') |
| 73 | if o == '-n': nologin = 1 |
| 74 | if o == '-r': rmok = 1 |
| 75 | if o == '-s': skippats.append(a) |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 76 | remotedir = '' |
| 77 | localdir = '' |
| 78 | if args[1:]: |
| 79 | remotedir = args[1] |
| 80 | if args[2:]: |
| 81 | localdir = args[2] |
| 82 | if args[3:]: usage('too many arguments') |
| 83 | # |
| 84 | f = ftplib.FTP() |
Skip Montanaro | a5f8c42 | 2002-12-05 02:43:14 +0000 | [diff] [blame] | 85 | if verbose: print "Connecting to '%s%s'..." % (host, |
| 86 | (port and ":%d"%port or "")) |
Skip Montanaro | 7dfb02d | 2002-12-05 02:37:23 +0000 | [diff] [blame] | 87 | f.connect(host,port) |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 88 | if not nologin: |
| 89 | if verbose: |
| 90 | print 'Logging in as %s...' % `login or 'anonymous'` |
| 91 | f.login(login, passwd, account) |
| 92 | if verbose: print 'OK.' |
| 93 | pwd = f.pwd() |
| 94 | if verbose > 1: print 'PWD =', `pwd` |
| 95 | if remotedir: |
| 96 | if verbose > 1: print 'cwd(%s)' % `remotedir` |
| 97 | f.cwd(remotedir) |
| 98 | if verbose > 1: print 'OK.' |
| 99 | pwd = f.pwd() |
| 100 | if verbose > 1: print 'PWD =', `pwd` |
| 101 | # |
| 102 | mirrorsubdir(f, localdir) |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 103 | |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 104 | # Core logic: mirror one subdirectory (recursively) |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 105 | def mirrorsubdir(f, localdir): |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 106 | pwd = f.pwd() |
| 107 | if localdir and not os.path.isdir(localdir): |
| 108 | if verbose: print 'Creating local directory', `localdir` |
| 109 | try: |
| 110 | makedir(localdir) |
| 111 | except os.error, msg: |
| 112 | print "Failed to establish local directory", `localdir` |
| 113 | return |
| 114 | infofilename = os.path.join(localdir, '.mirrorinfo') |
| 115 | try: |
| 116 | text = open(infofilename, 'r').read() |
| 117 | except IOError, msg: |
| 118 | text = '{}' |
| 119 | try: |
| 120 | info = eval(text) |
| 121 | except (SyntaxError, NameError): |
| 122 | print 'Bad mirror info in %s' % `infofilename` |
| 123 | info = {} |
| 124 | subdirs = [] |
| 125 | listing = [] |
| 126 | if verbose: print 'Listing remote directory %s...' % `pwd` |
| 127 | f.retrlines('LIST', listing.append) |
| 128 | filesfound = [] |
| 129 | for line in listing: |
| 130 | if verbose > 1: print '-->', `line` |
| 131 | if mac: |
| 132 | # Mac listing has just filenames; |
| 133 | # trailing / means subdirectory |
Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 134 | filename = line.strip() |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 135 | mode = '-' |
| 136 | if filename[-1:] == '/': |
| 137 | filename = filename[:-1] |
| 138 | mode = 'd' |
| 139 | infostuff = '' |
| 140 | else: |
| 141 | # Parse, assuming a UNIX listing |
Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 142 | words = line.split(None, 8) |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 143 | if len(words) < 6: |
| 144 | if verbose > 1: print 'Skipping short line' |
| 145 | continue |
Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 146 | filename = words[-1].lstrip() |
| 147 | i = filename.find(" -> ") |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 148 | if i >= 0: |
| 149 | # words[0] had better start with 'l'... |
| 150 | if verbose > 1: |
| 151 | print 'Found symbolic link %s' % `filename` |
| 152 | linkto = filename[i+4:] |
| 153 | filename = filename[:i] |
| 154 | infostuff = words[-5:-1] |
| 155 | mode = words[0] |
| 156 | skip = 0 |
| 157 | for pat in skippats: |
| 158 | if fnmatch(filename, pat): |
| 159 | if verbose > 1: |
| 160 | print 'Skip pattern', `pat`, |
| 161 | print 'matches', `filename` |
| 162 | skip = 1 |
| 163 | break |
| 164 | if skip: |
| 165 | continue |
| 166 | if mode[0] == 'd': |
| 167 | if verbose > 1: |
| 168 | print 'Remembering subdirectory', `filename` |
| 169 | subdirs.append(filename) |
| 170 | continue |
| 171 | filesfound.append(filename) |
| 172 | if info.has_key(filename) and info[filename] == infostuff: |
| 173 | if verbose > 1: |
| 174 | print 'Already have this version of',`filename` |
| 175 | continue |
| 176 | fullname = os.path.join(localdir, filename) |
| 177 | tempname = os.path.join(localdir, '@'+filename) |
| 178 | if interactive: |
| 179 | doit = askabout('file', filename, pwd) |
| 180 | if not doit: |
| 181 | if not info.has_key(filename): |
| 182 | info[filename] = 'Not retrieved' |
| 183 | continue |
| 184 | try: |
| 185 | os.unlink(tempname) |
| 186 | except os.error: |
| 187 | pass |
| 188 | if mode[0] == 'l': |
| 189 | if verbose: |
| 190 | print "Creating symlink %s -> %s" % ( |
| 191 | `filename`, `linkto`) |
| 192 | try: |
| 193 | os.symlink(linkto, tempname) |
| 194 | except IOError, msg: |
| 195 | print "Can't create %s: %s" % ( |
| 196 | `tempname`, str(msg)) |
| 197 | continue |
| 198 | else: |
| 199 | try: |
| 200 | fp = open(tempname, 'wb') |
| 201 | except IOError, msg: |
| 202 | print "Can't create %s: %s" % ( |
| 203 | `tempname`, str(msg)) |
| 204 | continue |
| 205 | if verbose: |
| 206 | print 'Retrieving %s from %s as %s...' % \ |
| 207 | (`filename`, `pwd`, `fullname`) |
| 208 | if verbose: |
| 209 | fp1 = LoggingFile(fp, 1024, sys.stdout) |
| 210 | else: |
| 211 | fp1 = fp |
| 212 | t0 = time.time() |
| 213 | try: |
| 214 | f.retrbinary('RETR ' + filename, |
| 215 | fp1.write, 8*1024) |
| 216 | except ftplib.error_perm, msg: |
| 217 | print msg |
| 218 | t1 = time.time() |
| 219 | bytes = fp.tell() |
| 220 | fp.close() |
| 221 | if fp1 != fp: |
| 222 | fp1.close() |
| 223 | try: |
| 224 | os.unlink(fullname) |
| 225 | except os.error: |
| 226 | pass # Ignore the error |
| 227 | try: |
| 228 | os.rename(tempname, fullname) |
| 229 | except os.error, msg: |
| 230 | print "Can't rename %s to %s: %s" % (`tempname`, |
| 231 | `fullname`, |
| 232 | str(msg)) |
| 233 | continue |
| 234 | info[filename] = infostuff |
| 235 | writedict(info, infofilename) |
| 236 | if verbose and mode[0] != 'l': |
| 237 | dt = t1 - t0 |
| 238 | kbytes = bytes / 1024.0 |
| 239 | print int(round(kbytes)), |
| 240 | print 'Kbytes in', |
| 241 | print int(round(dt)), |
| 242 | print 'seconds', |
| 243 | if t1 > t0: |
| 244 | print '(~%d Kbytes/sec)' % \ |
| 245 | int(round(kbytes/dt),) |
| 246 | print |
| 247 | # |
| 248 | # Remove files from info that are no longer remote |
| 249 | deletions = 0 |
| 250 | for filename in info.keys(): |
| 251 | if filename not in filesfound: |
| 252 | if verbose: |
| 253 | print "Removing obsolete info entry for", |
| 254 | print `filename`, "in", `localdir or "."` |
| 255 | del info[filename] |
| 256 | deletions = deletions + 1 |
| 257 | if deletions: |
| 258 | writedict(info, infofilename) |
| 259 | # |
| 260 | # Remove local files that are no longer in the remote directory |
| 261 | try: |
| 262 | if not localdir: names = os.listdir(os.curdir) |
| 263 | else: names = os.listdir(localdir) |
| 264 | except os.error: |
| 265 | names = [] |
| 266 | for name in names: |
| 267 | if name[0] == '.' or info.has_key(name) or name in subdirs: |
| 268 | continue |
| 269 | skip = 0 |
| 270 | for pat in skippats: |
| 271 | if fnmatch(name, pat): |
| 272 | if verbose > 1: |
| 273 | print 'Skip pattern', `pat`, |
| 274 | print 'matches', `name` |
| 275 | skip = 1 |
| 276 | break |
| 277 | if skip: |
| 278 | continue |
| 279 | fullname = os.path.join(localdir, name) |
| 280 | if not rmok: |
| 281 | if verbose: |
| 282 | print 'Local file', `fullname`, |
| 283 | print 'is no longer pertinent' |
| 284 | continue |
| 285 | if verbose: print 'Removing local file/dir', `fullname` |
| 286 | remove(fullname) |
| 287 | # |
| 288 | # Recursively mirror subdirectories |
| 289 | for subdir in subdirs: |
| 290 | if interactive: |
| 291 | doit = askabout('subdirectory', subdir, pwd) |
| 292 | if not doit: continue |
| 293 | if verbose: print 'Processing subdirectory', `subdir` |
| 294 | localsubdir = os.path.join(localdir, subdir) |
| 295 | pwd = f.pwd() |
| 296 | if verbose > 1: |
| 297 | print 'Remote directory now:', `pwd` |
| 298 | print 'Remote cwd', `subdir` |
| 299 | try: |
| 300 | f.cwd(subdir) |
| 301 | except ftplib.error_perm, msg: |
| 302 | print "Can't chdir to", `subdir`, ":", `msg` |
| 303 | else: |
| 304 | if verbose: print 'Mirroring as', `localsubdir` |
| 305 | mirrorsubdir(f, localsubdir) |
| 306 | if verbose > 1: print 'Remote cwd ..' |
| 307 | f.cwd('..') |
| 308 | newpwd = f.pwd() |
| 309 | if newpwd != pwd: |
| 310 | print 'Ended up in wrong directory after cd + cd ..' |
| 311 | print 'Giving up now.' |
| 312 | break |
| 313 | else: |
| 314 | if verbose > 1: print 'OK.' |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 315 | |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 316 | # Helper to remove a file or directory tree |
| 317 | def remove(fullname): |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 318 | if os.path.isdir(fullname) and not os.path.islink(fullname): |
| 319 | try: |
| 320 | names = os.listdir(fullname) |
| 321 | except os.error: |
| 322 | names = [] |
| 323 | ok = 1 |
| 324 | for name in names: |
| 325 | if not remove(os.path.join(fullname, name)): |
| 326 | ok = 0 |
| 327 | if not ok: |
| 328 | return 0 |
| 329 | try: |
| 330 | os.rmdir(fullname) |
| 331 | except os.error, msg: |
| 332 | print "Can't remove local directory %s: %s" % \ |
| 333 | (`fullname`, str(msg)) |
| 334 | return 0 |
| 335 | else: |
| 336 | try: |
| 337 | os.unlink(fullname) |
| 338 | except os.error, msg: |
| 339 | print "Can't remove local file %s: %s" % \ |
| 340 | (`fullname`, str(msg)) |
| 341 | return 0 |
| 342 | return 1 |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 343 | |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 344 | # Wrapper around a file for writing to write a hash sign every block. |
| 345 | class LoggingFile: |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 346 | def __init__(self, fp, blocksize, outfp): |
| 347 | self.fp = fp |
| 348 | self.bytes = 0 |
| 349 | self.hashes = 0 |
| 350 | self.blocksize = blocksize |
| 351 | self.outfp = outfp |
| 352 | def write(self, data): |
| 353 | self.bytes = self.bytes + len(data) |
| 354 | hashes = int(self.bytes) / self.blocksize |
| 355 | while hashes > self.hashes: |
| 356 | self.outfp.write('#') |
| 357 | self.outfp.flush() |
| 358 | self.hashes = self.hashes + 1 |
| 359 | self.fp.write(data) |
| 360 | def close(self): |
| 361 | self.outfp.write('\n') |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 362 | |
| 363 | # Ask permission to download a file. |
| 364 | def askabout(filetype, filename, pwd): |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 365 | prompt = 'Retrieve %s %s from %s ? [ny] ' % (filetype, filename, pwd) |
| 366 | while 1: |
Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 367 | reply = raw_input(prompt).strip().lower() |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 368 | if reply in ['y', 'ye', 'yes']: |
| 369 | return 1 |
| 370 | if reply in ['', 'n', 'no', 'nop', 'nope']: |
| 371 | return 0 |
| 372 | print 'Please answer yes or no.' |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 373 | |
| 374 | # Create a directory if it doesn't exist. Recursively create the |
| 375 | # parent directory as well if needed. |
| 376 | def makedir(pathname): |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 377 | if os.path.isdir(pathname): |
| 378 | return |
| 379 | dirname = os.path.dirname(pathname) |
| 380 | if dirname: makedir(dirname) |
| 381 | os.mkdir(pathname, 0777) |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 382 | |
| 383 | # Write a dictionary to a file in a way that can be read back using |
| 384 | # rval() but is still somewhat readable (i.e. not a single long line). |
Guido van Rossum | a259696 | 1998-06-29 23:17:16 +0000 | [diff] [blame] | 385 | # Also creates a backup file. |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 386 | def writedict(dict, filename): |
Andrew M. Kuchling | ac6df95 | 2003-05-13 18:14:25 +0000 | [diff] [blame] | 387 | dir, fname = os.path.split(filename) |
| 388 | tempname = os.path.join(dir, '@' + fname) |
| 389 | backup = os.path.join(dir, fname + '~') |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 390 | try: |
| 391 | os.unlink(backup) |
| 392 | except os.error: |
| 393 | pass |
| 394 | fp = open(tempname, 'w') |
| 395 | fp.write('{\n') |
| 396 | for key, value in dict.items(): |
| 397 | fp.write('%s: %s,\n' % (`key`, `value`)) |
| 398 | fp.write('}\n') |
| 399 | fp.close() |
| 400 | try: |
| 401 | os.rename(filename, backup) |
| 402 | except os.error: |
| 403 | pass |
| 404 | os.rename(tempname, filename) |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 405 | |
Barry Warsaw | 64850ef | 2000-09-16 22:09:51 +0000 | [diff] [blame] | 406 | |
| 407 | if __name__ == '__main__': |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 408 | main() |