Guido van Rossum | f06ee5f | 1996-11-27 19:52:01 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 2 | |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 3 | """Mirror a remote ftp subtree into a local directory tree. |
| 4 | |
| 5 | usage: ftpmirror [-v] [-q] [-i] [-m] [-n] [-r] [-s pat] |
| 6 | [-l username [-p passwd [-a account]]] |
| 7 | hostname [remotedir [localdir]] |
| 8 | -v: verbose |
| 9 | -q: quiet |
| 10 | -i: interactive mode |
| 11 | -m: macintosh server (NCSA telnet 2.4) (implies -n -s '*.o') |
| 12 | -n: don't log in |
| 13 | -r: remove local files/directories no longer pertinent |
| 14 | -l username [-p passwd [-a account]]: login info (default anonymous ftp) |
| 15 | -s pat: skip files matching pattern |
| 16 | hostname: remote host |
| 17 | remotedir: remote directory (default initial) |
| 18 | localdir: local directory (default current) |
| 19 | """ |
| 20 | |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 21 | # XXX To do: |
| 22 | # - handle symbolic links |
| 23 | # - back up .mirrorinfo before overwriting |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 24 | |
| 25 | import os |
| 26 | import sys |
| 27 | import time |
| 28 | import getopt |
| 29 | import string |
| 30 | import ftplib |
| 31 | from fnmatch import fnmatch |
| 32 | |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 33 | # Print usage message and exit |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 34 | def usage(*args): |
| 35 | sys.stdout = sys.stderr |
| 36 | for msg in args: print msg |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 37 | print __doc__ |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 38 | sys.exit(2) |
| 39 | |
| 40 | verbose = 1 # 0 for -q, 2 for -v |
| 41 | interactive = 0 |
| 42 | mac = 0 |
| 43 | rmok = 0 |
| 44 | nologin = 0 |
Guido van Rossum | d2966cb | 1996-02-05 18:49:00 +0000 | [diff] [blame] | 45 | skippats = ['.', '..', '.mirrorinfo'] |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 46 | |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 47 | # Main program: parse command line and start processing |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 48 | def main(): |
| 49 | global verbose, interactive, mac, rmok, nologin |
| 50 | try: |
| 51 | opts, args = getopt.getopt(sys.argv[1:], 'a:bil:mnp:qrs:v') |
| 52 | except getopt.error, msg: |
| 53 | usage(msg) |
| 54 | login = '' |
| 55 | passwd = '' |
| 56 | account = '' |
| 57 | for o, a in opts: |
| 58 | if o == '-l': login = a |
| 59 | if o == '-p': passwd = a |
| 60 | if o == '-a': account = a |
| 61 | if o == '-v': verbose = verbose + 1 |
| 62 | if o == '-q': verbose = 0 |
| 63 | if o == '-i': interactive = 1 |
| 64 | if o == '-m': mac = 1; nologin = 1; skippats.append('*.o') |
| 65 | if o == '-n': nologin = 1 |
| 66 | if o == '-r': rmok = 1 |
| 67 | if o == '-s': skippats.append(a) |
| 68 | if not args: usage('hostname missing') |
| 69 | host = args[0] |
| 70 | remotedir = '' |
| 71 | localdir = '' |
| 72 | if args[1:]: |
| 73 | remotedir = args[1] |
| 74 | if args[2:]: |
| 75 | localdir = args[2] |
| 76 | if args[3:]: usage('too many arguments') |
| 77 | # |
| 78 | f = ftplib.FTP() |
| 79 | if verbose: print 'Connecting to %s...' % host |
| 80 | f.connect(host) |
| 81 | if not nologin: |
| 82 | if verbose: |
| 83 | print 'Logging in as %s...' % (login or 'anonymous') |
| 84 | f.login(login, passwd, account) |
| 85 | if verbose: print 'OK.' |
| 86 | pwd = f.pwd() |
| 87 | if verbose > 1: print 'PWD =', `pwd` |
| 88 | if remotedir: |
| 89 | if verbose > 1: print 'cwd(%s)' % `remotedir` |
| 90 | f.cwd(remotedir) |
| 91 | if verbose > 1: print 'OK.' |
| 92 | pwd = f.pwd() |
| 93 | if verbose > 1: print 'PWD =', `pwd` |
| 94 | # |
| 95 | mirrorsubdir(f, localdir) |
| 96 | |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 97 | # Core logic: mirror one subdirectory (recursively) |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 98 | def mirrorsubdir(f, localdir): |
| 99 | pwd = f.pwd() |
| 100 | if localdir and not os.path.isdir(localdir): |
| 101 | if verbose: print 'Creating local directory', localdir |
Guido van Rossum | 9a2c546 | 1996-04-09 02:51:23 +0000 | [diff] [blame] | 102 | try: |
| 103 | makedir(localdir) |
| 104 | except os.error, msg: |
| 105 | print "Failed to establish local directory", localdir |
| 106 | return |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 107 | infofilename = os.path.join(localdir, '.mirrorinfo') |
| 108 | try: |
| 109 | text = open(infofilename, 'r').read() |
| 110 | except IOError, msg: |
| 111 | text = '{}' |
| 112 | try: |
| 113 | info = eval(text) |
| 114 | except (SyntaxError, NameError): |
| 115 | print 'Bad mirror info in %s' % infofilename |
| 116 | info = {} |
| 117 | subdirs = [] |
| 118 | listing = [] |
| 119 | if verbose: print 'Listing remote directory %s...' % pwd |
| 120 | f.retrlines('LIST', listing.append) |
Guido van Rossum | e41d00b | 1996-11-14 18:24:47 +0000 | [diff] [blame] | 121 | filesfound = [] |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 122 | for line in listing: |
| 123 | if verbose > 1: print '-->', `line` |
| 124 | if mac: |
| 125 | # Mac listing has just filenames; |
| 126 | # trailing / means subdirectory |
| 127 | filename = string.strip(line) |
| 128 | mode = '-' |
| 129 | if filename[-1:] == '/': |
| 130 | filename = filename[:-1] |
| 131 | mode = 'd' |
| 132 | infostuff = '' |
| 133 | else: |
| 134 | # Parse, assuming a UNIX listing |
| 135 | words = string.split(line) |
| 136 | if len(words) < 6: |
| 137 | if verbose > 1: print 'Skipping short line' |
| 138 | continue |
| 139 | if words[-2] == '->': |
| 140 | if verbose > 1: |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 141 | print 'Skipping symbolic link %s -> %s' % \ |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 142 | (words[-3], words[-1]) |
| 143 | continue |
| 144 | filename = words[-1] |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 145 | infostuff = words[-5:-1] |
| 146 | mode = words[0] |
| 147 | skip = 0 |
| 148 | for pat in skippats: |
| 149 | if fnmatch(filename, pat): |
| 150 | if verbose > 1: |
| 151 | print 'Skip pattern', pat, |
| 152 | print 'matches', filename |
| 153 | skip = 1 |
| 154 | break |
| 155 | if skip: |
| 156 | continue |
| 157 | if mode[0] == 'd': |
| 158 | if verbose > 1: |
| 159 | print 'Remembering subdirectory', filename |
| 160 | subdirs.append(filename) |
| 161 | continue |
Guido van Rossum | e41d00b | 1996-11-14 18:24:47 +0000 | [diff] [blame] | 162 | filesfound.append(filename) |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 163 | if info.has_key(filename) and info[filename] == infostuff: |
| 164 | if verbose > 1: |
| 165 | print 'Already have this version of', filename |
| 166 | continue |
| 167 | fullname = os.path.join(localdir, filename) |
Guido van Rossum | 9a2c546 | 1996-04-09 02:51:23 +0000 | [diff] [blame] | 168 | tempname = os.path.join(localdir, '@'+filename) |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 169 | if interactive: |
| 170 | doit = askabout('file', filename, pwd) |
| 171 | if not doit: |
| 172 | if not info.has_key(filename): |
| 173 | info[filename] = 'Not retrieved' |
| 174 | continue |
| 175 | try: |
Guido van Rossum | 9a2c546 | 1996-04-09 02:51:23 +0000 | [diff] [blame] | 176 | os.unlink(tempname) |
Guido van Rossum | d2966cb | 1996-02-05 18:49:00 +0000 | [diff] [blame] | 177 | except os.error: |
| 178 | pass |
| 179 | try: |
Guido van Rossum | 1ade44c | 1997-05-15 18:25:29 +0000 | [diff] [blame] | 180 | fp = open(tempname, 'wb') |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 181 | except IOError, msg: |
Guido van Rossum | 9a2c546 | 1996-04-09 02:51:23 +0000 | [diff] [blame] | 182 | print "Can't create %s: %s" % (tempname, str(msg)) |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 183 | continue |
| 184 | if verbose: |
| 185 | print 'Retrieving %s from %s as %s...' % \ |
| 186 | (filename, pwd, fullname) |
| 187 | if verbose: |
| 188 | fp1 = LoggingFile(fp, 1024, sys.stdout) |
| 189 | else: |
| 190 | fp1 = fp |
| 191 | t0 = time.time() |
Guido van Rossum | d2966cb | 1996-02-05 18:49:00 +0000 | [diff] [blame] | 192 | try: |
| 193 | f.retrbinary('RETR ' + filename, fp1.write, 8*1024) |
| 194 | except ftplib.error_perm, msg: |
| 195 | print msg |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 196 | t1 = time.time() |
| 197 | bytes = fp.tell() |
| 198 | fp.close() |
| 199 | if fp1 != fp: |
| 200 | fp1.close() |
Guido van Rossum | 9a2c546 | 1996-04-09 02:51:23 +0000 | [diff] [blame] | 201 | try: |
Guido van Rossum | 650b3aa | 1997-05-19 15:20:49 +0000 | [diff] [blame] | 202 | os.unlink(fullname) |
| 203 | except os.error: |
| 204 | pass # Ignore the error |
| 205 | try: |
| 206 | os.rename(tempname, fullname) |
Guido van Rossum | 9a2c546 | 1996-04-09 02:51:23 +0000 | [diff] [blame] | 207 | except os.error, msg: |
| 208 | print "Can't rename %s to %s: %s" % (tempname, |
| 209 | fullname, |
| 210 | str(msg)) |
| 211 | continue |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 212 | info[filename] = infostuff |
| 213 | writedict(info, infofilename) |
| 214 | if verbose: |
| 215 | dt = t1 - t0 |
| 216 | kbytes = bytes / 1024.0 |
| 217 | print int(round(kbytes)), |
| 218 | print 'Kbytes in', |
| 219 | print int(round(dt)), |
| 220 | print 'seconds', |
| 221 | if t1 > t0: |
| 222 | print '(~%d Kbytes/sec)' % \ |
| 223 | int(round(kbytes/dt),) |
| 224 | print |
| 225 | # |
Guido van Rossum | e41d00b | 1996-11-14 18:24:47 +0000 | [diff] [blame] | 226 | # Remove files from info that are no longer remote |
| 227 | deletions = 0 |
| 228 | for filename in info.keys(): |
| 229 | if filename not in filesfound: |
| 230 | if verbose: |
| 231 | print "Removing obsolete info entry for", |
| 232 | print filename, "in", localdir or "." |
| 233 | del info[filename] |
| 234 | deletions = deletions + 1 |
| 235 | if deletions: |
| 236 | writedict(info, infofilename) |
| 237 | # |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 238 | # Remove local files that are no longer in the remote directory |
Guido van Rossum | 9a2c546 | 1996-04-09 02:51:23 +0000 | [diff] [blame] | 239 | try: |
| 240 | if not localdir: names = os.listdir(os.curdir) |
| 241 | else: names = os.listdir(localdir) |
| 242 | except os.error: |
| 243 | names = [] |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 244 | for name in names: |
| 245 | if name[0] == '.' or info.has_key(name) or name in subdirs: |
| 246 | continue |
Guido van Rossum | e41d00b | 1996-11-14 18:24:47 +0000 | [diff] [blame] | 247 | skip = 0 |
| 248 | for pat in skippats: |
| 249 | if fnmatch(name, pat): |
| 250 | if verbose > 1: |
| 251 | print 'Skip pattern', pat, |
| 252 | print 'matches', name |
| 253 | skip = 1 |
| 254 | break |
| 255 | if skip: |
| 256 | continue |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 257 | fullname = os.path.join(localdir, name) |
| 258 | if not rmok: |
| 259 | if verbose: |
| 260 | print 'Local file', fullname, |
| 261 | print 'is no longer pertinent' |
| 262 | continue |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 263 | if verbose: print 'Removing local file/dir', fullname |
| 264 | remove(fullname) |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 265 | # |
| 266 | # Recursively mirror subdirectories |
| 267 | for subdir in subdirs: |
| 268 | if interactive: |
| 269 | doit = askabout('subdirectory', subdir, pwd) |
| 270 | if not doit: continue |
| 271 | if verbose: print 'Processing subdirectory', subdir |
| 272 | localsubdir = os.path.join(localdir, subdir) |
| 273 | pwd = f.pwd() |
| 274 | if verbose > 1: |
| 275 | print 'Remote directory now:', pwd |
| 276 | print 'Remote cwd', subdir |
| 277 | try: |
| 278 | f.cwd(subdir) |
| 279 | except ftplib.error_perm, msg: |
| 280 | print "Can't chdir to", subdir, ":", msg |
| 281 | else: |
| 282 | if verbose: print 'Mirroring as', localsubdir |
| 283 | mirrorsubdir(f, localsubdir) |
| 284 | if verbose > 1: print 'Remote cwd ..' |
| 285 | f.cwd('..') |
| 286 | newpwd = f.pwd() |
| 287 | if newpwd != pwd: |
| 288 | print 'Ended up in wrong directory after cd + cd ..' |
| 289 | print 'Giving up now.' |
| 290 | break |
| 291 | else: |
| 292 | if verbose > 1: print 'OK.' |
| 293 | |
Guido van Rossum | fdc5adc | 1998-01-04 02:03:12 +0000 | [diff] [blame] | 294 | # Helper to remove a file or directory tree |
| 295 | def remove(fullname): |
| 296 | if os.path.isdir(fullname) and not os.path.islink(fullname): |
| 297 | try: |
| 298 | names = os.listdir(fullname) |
| 299 | except os.error: |
| 300 | names = [] |
| 301 | ok = 1 |
| 302 | for name in names: |
| 303 | if not remove(os.path.join(fullname, name)): |
| 304 | ok = 0 |
| 305 | if not ok: |
| 306 | return 0 |
| 307 | try: |
| 308 | os.rmdir(fullname) |
| 309 | except os.error, msg: |
| 310 | print "Can't remove local directory %s: %s" % \ |
| 311 | (fullname, str(msg)) |
| 312 | return 0 |
| 313 | else: |
| 314 | try: |
| 315 | os.unlink(fullname) |
| 316 | except os.error, msg: |
| 317 | print "Can't remove local file %s: %s" % \ |
| 318 | (fullname, str(msg)) |
| 319 | return 0 |
| 320 | return 1 |
| 321 | |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 322 | # Wrapper around a file for writing to write a hash sign every block. |
| 323 | class LoggingFile: |
| 324 | def __init__(self, fp, blocksize, outfp): |
| 325 | self.fp = fp |
| 326 | self.bytes = 0 |
| 327 | self.hashes = 0 |
| 328 | self.blocksize = blocksize |
| 329 | self.outfp = outfp |
| 330 | def write(self, data): |
| 331 | self.bytes = self.bytes + len(data) |
| 332 | hashes = int(self.bytes) / self.blocksize |
| 333 | while hashes > self.hashes: |
| 334 | self.outfp.write('#') |
| 335 | self.outfp.flush() |
| 336 | self.hashes = self.hashes + 1 |
| 337 | self.fp.write(data) |
| 338 | def close(self): |
| 339 | self.outfp.write('\n') |
| 340 | |
| 341 | # Ask permission to download a file. |
| 342 | def askabout(filetype, filename, pwd): |
| 343 | prompt = 'Retrieve %s %s from %s ? [ny] ' % (filetype, filename, pwd) |
| 344 | while 1: |
| 345 | reply = string.lower(string.strip(raw_input(prompt))) |
| 346 | if reply in ['y', 'ye', 'yes']: |
| 347 | return 1 |
| 348 | if reply in ['', 'n', 'no', 'nop', 'nope']: |
| 349 | return 0 |
| 350 | print 'Please answer yes or no.' |
| 351 | |
| 352 | # Create a directory if it doesn't exist. Recursively create the |
| 353 | # parent directory as well if needed. |
| 354 | def makedir(pathname): |
| 355 | if os.path.isdir(pathname): |
| 356 | return |
| 357 | dirname = os.path.dirname(pathname) |
| 358 | if dirname: makedir(dirname) |
| 359 | os.mkdir(pathname, 0777) |
| 360 | |
| 361 | # Write a dictionary to a file in a way that can be read back using |
| 362 | # rval() but is still somewhat readable (i.e. not a single long line). |
| 363 | def writedict(dict, filename): |
| 364 | fp = open(filename, 'w') |
| 365 | fp.write('{\n') |
| 366 | for key, value in dict.items(): |
| 367 | fp.write('%s: %s,\n' % (`key`, `value`)) |
| 368 | fp.write('}\n') |
| 369 | fp.close() |
| 370 | |
| 371 | main() |