Guido van Rossum | d2966cb | 1996-02-05 18:49:00 +0000 | [diff] [blame] | 1 | #! /ufs/guido/bin/sgi/python |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 2 | |
| 3 | # Mirror a remote ftp subtree into a local directory tree. |
| 4 | # Basic usage: ftpmirror [options] host remotedir localdir |
| 5 | # |
| 6 | # XXX To do: |
| 7 | # - handle symbolic links |
| 8 | # - back up .mirrorinfo before overwriting |
| 9 | # - use pickles for .mirrorinfo? |
| 10 | |
| 11 | import os |
| 12 | import sys |
| 13 | import time |
| 14 | import getopt |
| 15 | import string |
| 16 | import ftplib |
| 17 | from fnmatch import fnmatch |
| 18 | |
| 19 | usage_msg = """ |
| 20 | usage: ftpmirror [-v] [-q] [-i] [-m] [-n] [-r] [-s pat] |
| 21 | [-l username [-p passwd [-a account]]] |
| 22 | hostname [remotedir [localdir]] |
| 23 | -v: verbose |
| 24 | -q: quiet |
| 25 | -i: interactive mode |
| 26 | -m: macintosh server (NCSA telnet 2.4) (implies -n -s '*.o') |
| 27 | -n: don't log in |
| 28 | -r: remove files no longer pertinent |
| 29 | -l username [-p passwd [-a account]]: login info (default anonymous ftp) |
| 30 | -s pat: skip files matching pattern |
| 31 | hostname: remote host |
| 32 | remotedir: remote directory (default initial) |
| 33 | localdir: local directory (default current) |
| 34 | """ |
| 35 | def usage(*args): |
| 36 | sys.stdout = sys.stderr |
| 37 | for msg in args: print msg |
| 38 | print usage_msg |
| 39 | sys.exit(2) |
| 40 | |
| 41 | verbose = 1 # 0 for -q, 2 for -v |
| 42 | interactive = 0 |
| 43 | mac = 0 |
| 44 | rmok = 0 |
| 45 | nologin = 0 |
Guido van Rossum | d2966cb | 1996-02-05 18:49:00 +0000 | [diff] [blame] | 46 | skippats = ['.', '..', '.mirrorinfo'] |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 47 | |
| 48 | def main(): |
| 49 | global verbose, interactive, mac, rmok, nologin |
| 50 | try: |
| 51 | opts, args = getopt.getopt(sys.argv[1:], 'a:bil:mnp:qrs:v') |
| 52 | except getopt.error, msg: |
| 53 | usage(msg) |
| 54 | login = '' |
| 55 | passwd = '' |
| 56 | account = '' |
| 57 | for o, a in opts: |
| 58 | if o == '-l': login = a |
| 59 | if o == '-p': passwd = a |
| 60 | if o == '-a': account = a |
| 61 | if o == '-v': verbose = verbose + 1 |
| 62 | if o == '-q': verbose = 0 |
| 63 | if o == '-i': interactive = 1 |
| 64 | if o == '-m': mac = 1; nologin = 1; skippats.append('*.o') |
| 65 | if o == '-n': nologin = 1 |
| 66 | if o == '-r': rmok = 1 |
| 67 | if o == '-s': skippats.append(a) |
| 68 | if not args: usage('hostname missing') |
| 69 | host = args[0] |
| 70 | remotedir = '' |
| 71 | localdir = '' |
| 72 | if args[1:]: |
| 73 | remotedir = args[1] |
| 74 | if args[2:]: |
| 75 | localdir = args[2] |
| 76 | if args[3:]: usage('too many arguments') |
| 77 | # |
| 78 | f = ftplib.FTP() |
| 79 | if verbose: print 'Connecting to %s...' % host |
| 80 | f.connect(host) |
| 81 | if not nologin: |
| 82 | if verbose: |
| 83 | print 'Logging in as %s...' % (login or 'anonymous') |
| 84 | f.login(login, passwd, account) |
| 85 | if verbose: print 'OK.' |
| 86 | pwd = f.pwd() |
| 87 | if verbose > 1: print 'PWD =', `pwd` |
| 88 | if remotedir: |
| 89 | if verbose > 1: print 'cwd(%s)' % `remotedir` |
| 90 | f.cwd(remotedir) |
| 91 | if verbose > 1: print 'OK.' |
| 92 | pwd = f.pwd() |
| 93 | if verbose > 1: print 'PWD =', `pwd` |
| 94 | # |
| 95 | mirrorsubdir(f, localdir) |
| 96 | |
| 97 | def mirrorsubdir(f, localdir): |
| 98 | pwd = f.pwd() |
| 99 | if localdir and not os.path.isdir(localdir): |
| 100 | if verbose: print 'Creating local directory', localdir |
| 101 | makedir(localdir) |
| 102 | infofilename = os.path.join(localdir, '.mirrorinfo') |
| 103 | try: |
| 104 | text = open(infofilename, 'r').read() |
| 105 | except IOError, msg: |
| 106 | text = '{}' |
| 107 | try: |
| 108 | info = eval(text) |
| 109 | except (SyntaxError, NameError): |
| 110 | print 'Bad mirror info in %s' % infofilename |
| 111 | info = {} |
| 112 | subdirs = [] |
| 113 | listing = [] |
| 114 | if verbose: print 'Listing remote directory %s...' % pwd |
| 115 | f.retrlines('LIST', listing.append) |
| 116 | for line in listing: |
| 117 | if verbose > 1: print '-->', `line` |
| 118 | if mac: |
| 119 | # Mac listing has just filenames; |
| 120 | # trailing / means subdirectory |
| 121 | filename = string.strip(line) |
| 122 | mode = '-' |
| 123 | if filename[-1:] == '/': |
| 124 | filename = filename[:-1] |
| 125 | mode = 'd' |
| 126 | infostuff = '' |
| 127 | else: |
| 128 | # Parse, assuming a UNIX listing |
| 129 | words = string.split(line) |
| 130 | if len(words) < 6: |
| 131 | if verbose > 1: print 'Skipping short line' |
| 132 | continue |
| 133 | if words[-2] == '->': |
| 134 | if verbose > 1: |
| 135 | print 'Skipping symbolic link %s -> %s' % \ |
| 136 | (words[-3], words[-1]) |
| 137 | continue |
| 138 | filename = words[-1] |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 139 | infostuff = words[-5:-1] |
| 140 | mode = words[0] |
| 141 | skip = 0 |
| 142 | for pat in skippats: |
| 143 | if fnmatch(filename, pat): |
| 144 | if verbose > 1: |
| 145 | print 'Skip pattern', pat, |
| 146 | print 'matches', filename |
| 147 | skip = 1 |
| 148 | break |
| 149 | if skip: |
| 150 | continue |
| 151 | if mode[0] == 'd': |
| 152 | if verbose > 1: |
| 153 | print 'Remembering subdirectory', filename |
| 154 | subdirs.append(filename) |
| 155 | continue |
| 156 | if info.has_key(filename) and info[filename] == infostuff: |
| 157 | if verbose > 1: |
| 158 | print 'Already have this version of', filename |
| 159 | continue |
| 160 | fullname = os.path.join(localdir, filename) |
| 161 | if interactive: |
| 162 | doit = askabout('file', filename, pwd) |
| 163 | if not doit: |
| 164 | if not info.has_key(filename): |
| 165 | info[filename] = 'Not retrieved' |
| 166 | continue |
| 167 | try: |
Guido van Rossum | d2966cb | 1996-02-05 18:49:00 +0000 | [diff] [blame] | 168 | os.unlink(fullname) |
| 169 | except os.error: |
| 170 | pass |
| 171 | try: |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 172 | fp = open(fullname, 'w') |
| 173 | except IOError, msg: |
| 174 | print "Can't create %s: %s" % (fullname, str(msg)) |
| 175 | continue |
| 176 | if verbose: |
| 177 | print 'Retrieving %s from %s as %s...' % \ |
| 178 | (filename, pwd, fullname) |
| 179 | if verbose: |
| 180 | fp1 = LoggingFile(fp, 1024, sys.stdout) |
| 181 | else: |
| 182 | fp1 = fp |
| 183 | t0 = time.time() |
Guido van Rossum | d2966cb | 1996-02-05 18:49:00 +0000 | [diff] [blame] | 184 | try: |
| 185 | f.retrbinary('RETR ' + filename, fp1.write, 8*1024) |
| 186 | except ftplib.error_perm, msg: |
| 187 | print msg |
Guido van Rossum | e830e55 | 1995-06-20 19:31:37 +0000 | [diff] [blame] | 188 | t1 = time.time() |
| 189 | bytes = fp.tell() |
| 190 | fp.close() |
| 191 | if fp1 != fp: |
| 192 | fp1.close() |
| 193 | info[filename] = infostuff |
| 194 | writedict(info, infofilename) |
| 195 | if verbose: |
| 196 | dt = t1 - t0 |
| 197 | kbytes = bytes / 1024.0 |
| 198 | print int(round(kbytes)), |
| 199 | print 'Kbytes in', |
| 200 | print int(round(dt)), |
| 201 | print 'seconds', |
| 202 | if t1 > t0: |
| 203 | print '(~%d Kbytes/sec)' % \ |
| 204 | int(round(kbytes/dt),) |
| 205 | print |
| 206 | # |
| 207 | # Remove local files that are no longer in the remote directory |
| 208 | if not localdir: names = os.listdir(os.curdir) |
| 209 | else: names = os.listdir(localdir) |
| 210 | for name in names: |
| 211 | if name[0] == '.' or info.has_key(name) or name in subdirs: |
| 212 | continue |
| 213 | fullname = os.path.join(localdir, name) |
| 214 | if not rmok: |
| 215 | if verbose: |
| 216 | print 'Local file', fullname, |
| 217 | print 'is no longer pertinent' |
| 218 | continue |
| 219 | if verbose: print 'Removing local file', fullname |
| 220 | try: |
| 221 | os.unlink(fullname) |
| 222 | except os.error, msg: |
| 223 | print "Can't remove local file %s: %s" % \ |
| 224 | (fullname, str(msg)) |
| 225 | # |
| 226 | # Recursively mirror subdirectories |
| 227 | for subdir in subdirs: |
| 228 | if interactive: |
| 229 | doit = askabout('subdirectory', subdir, pwd) |
| 230 | if not doit: continue |
| 231 | if verbose: print 'Processing subdirectory', subdir |
| 232 | localsubdir = os.path.join(localdir, subdir) |
| 233 | pwd = f.pwd() |
| 234 | if verbose > 1: |
| 235 | print 'Remote directory now:', pwd |
| 236 | print 'Remote cwd', subdir |
| 237 | try: |
| 238 | f.cwd(subdir) |
| 239 | except ftplib.error_perm, msg: |
| 240 | print "Can't chdir to", subdir, ":", msg |
| 241 | else: |
| 242 | if verbose: print 'Mirroring as', localsubdir |
| 243 | mirrorsubdir(f, localsubdir) |
| 244 | if verbose > 1: print 'Remote cwd ..' |
| 245 | f.cwd('..') |
| 246 | newpwd = f.pwd() |
| 247 | if newpwd != pwd: |
| 248 | print 'Ended up in wrong directory after cd + cd ..' |
| 249 | print 'Giving up now.' |
| 250 | break |
| 251 | else: |
| 252 | if verbose > 1: print 'OK.' |
| 253 | |
| 254 | # Wrapper around a file for writing to write a hash sign every block. |
| 255 | class LoggingFile: |
| 256 | def __init__(self, fp, blocksize, outfp): |
| 257 | self.fp = fp |
| 258 | self.bytes = 0 |
| 259 | self.hashes = 0 |
| 260 | self.blocksize = blocksize |
| 261 | self.outfp = outfp |
| 262 | def write(self, data): |
| 263 | self.bytes = self.bytes + len(data) |
| 264 | hashes = int(self.bytes) / self.blocksize |
| 265 | while hashes > self.hashes: |
| 266 | self.outfp.write('#') |
| 267 | self.outfp.flush() |
| 268 | self.hashes = self.hashes + 1 |
| 269 | self.fp.write(data) |
| 270 | def close(self): |
| 271 | self.outfp.write('\n') |
| 272 | |
| 273 | # Ask permission to download a file. |
| 274 | def askabout(filetype, filename, pwd): |
| 275 | prompt = 'Retrieve %s %s from %s ? [ny] ' % (filetype, filename, pwd) |
| 276 | while 1: |
| 277 | reply = string.lower(string.strip(raw_input(prompt))) |
| 278 | if reply in ['y', 'ye', 'yes']: |
| 279 | return 1 |
| 280 | if reply in ['', 'n', 'no', 'nop', 'nope']: |
| 281 | return 0 |
| 282 | print 'Please answer yes or no.' |
| 283 | |
| 284 | # Create a directory if it doesn't exist. Recursively create the |
| 285 | # parent directory as well if needed. |
| 286 | def makedir(pathname): |
| 287 | if os.path.isdir(pathname): |
| 288 | return |
| 289 | dirname = os.path.dirname(pathname) |
| 290 | if dirname: makedir(dirname) |
| 291 | os.mkdir(pathname, 0777) |
| 292 | |
| 293 | # Write a dictionary to a file in a way that can be read back using |
| 294 | # rval() but is still somewhat readable (i.e. not a single long line). |
| 295 | def writedict(dict, filename): |
| 296 | fp = open(filename, 'w') |
| 297 | fp.write('{\n') |
| 298 | for key, value in dict.items(): |
| 299 | fp.write('%s: %s,\n' % (`key`, `value`)) |
| 300 | fp.write('}\n') |
| 301 | fp.close() |
| 302 | |
| 303 | main() |