/Tools/scripts/ftpmirror.py

http://unladen-swallow.googlecode.com/ · Python · 400 lines · 357 code · 6 blank · 37 comment · 12 complexity · 79dd1fea53d3d4ad26737608f4862572 MD5 · raw file

  1. #! /usr/bin/env python
  2. """Mirror a remote ftp subtree into a local directory tree.
  3. usage: ftpmirror [-v] [-q] [-i] [-m] [-n] [-r] [-s pat]
  4. [-l username [-p passwd [-a account]]]
  5. hostname[:port] [remotedir [localdir]]
  6. -v: verbose
  7. -q: quiet
  8. -i: interactive mode
  9. -m: macintosh server (NCSA telnet 2.4) (implies -n -s '*.o')
  10. -n: don't log in
  11. -r: remove local files/directories no longer pertinent
  12. -l username [-p passwd [-a account]]: login info (default .netrc or anonymous)
  13. -s pat: skip files matching pattern
  14. hostname: remote host w/ optional port separated by ':'
  15. remotedir: remote directory (default initial)
  16. localdir: local directory (default current)
  17. """
  18. import os
  19. import sys
  20. import time
  21. import getopt
  22. import ftplib
  23. import netrc
  24. from fnmatch import fnmatch
  25. # Print usage message and exit
  26. def usage(*args):
  27. sys.stdout = sys.stderr
  28. for msg in args: print msg
  29. print __doc__
  30. sys.exit(2)
  31. verbose = 1 # 0 for -q, 2 for -v
  32. interactive = 0
  33. mac = 0
  34. rmok = 0
  35. nologin = 0
  36. skippats = ['.', '..', '.mirrorinfo']
  37. # Main program: parse command line and start processing
  38. def main():
  39. global verbose, interactive, mac, rmok, nologin
  40. try:
  41. opts, args = getopt.getopt(sys.argv[1:], 'a:bil:mnp:qrs:v')
  42. except getopt.error, msg:
  43. usage(msg)
  44. login = ''
  45. passwd = ''
  46. account = ''
  47. if not args: usage('hostname missing')
  48. host = args[0]
  49. port = 0
  50. if ':' in host:
  51. host, port = host.split(':', 1)
  52. port = int(port)
  53. try:
  54. auth = netrc.netrc().authenticators(host)
  55. if auth is not None:
  56. login, account, passwd = auth
  57. except (netrc.NetrcParseError, IOError):
  58. pass
  59. for o, a in opts:
  60. if o == '-l': login = a
  61. if o == '-p': passwd = a
  62. if o == '-a': account = a
  63. if o == '-v': verbose = verbose + 1
  64. if o == '-q': verbose = 0
  65. if o == '-i': interactive = 1
  66. if o == '-m': mac = 1; nologin = 1; skippats.append('*.o')
  67. if o == '-n': nologin = 1
  68. if o == '-r': rmok = 1
  69. if o == '-s': skippats.append(a)
  70. remotedir = ''
  71. localdir = ''
  72. if args[1:]:
  73. remotedir = args[1]
  74. if args[2:]:
  75. localdir = args[2]
  76. if args[3:]: usage('too many arguments')
  77. #
  78. f = ftplib.FTP()
  79. if verbose: print "Connecting to '%s%s'..." % (host,
  80. (port and ":%d"%port or ""))
  81. f.connect(host,port)
  82. if not nologin:
  83. if verbose:
  84. print 'Logging in as %r...' % (login or 'anonymous')
  85. f.login(login, passwd, account)
  86. if verbose: print 'OK.'
  87. pwd = f.pwd()
  88. if verbose > 1: print 'PWD =', repr(pwd)
  89. if remotedir:
  90. if verbose > 1: print 'cwd(%s)' % repr(remotedir)
  91. f.cwd(remotedir)
  92. if verbose > 1: print 'OK.'
  93. pwd = f.pwd()
  94. if verbose > 1: print 'PWD =', repr(pwd)
  95. #
  96. mirrorsubdir(f, localdir)
  97. # Core logic: mirror one subdirectory (recursively)
  98. def mirrorsubdir(f, localdir):
  99. pwd = f.pwd()
  100. if localdir and not os.path.isdir(localdir):
  101. if verbose: print 'Creating local directory', repr(localdir)
  102. try:
  103. makedir(localdir)
  104. except os.error, msg:
  105. print "Failed to establish local directory", repr(localdir)
  106. return
  107. infofilename = os.path.join(localdir, '.mirrorinfo')
  108. try:
  109. text = open(infofilename, 'r').read()
  110. except IOError, msg:
  111. text = '{}'
  112. try:
  113. info = eval(text)
  114. except (SyntaxError, NameError):
  115. print 'Bad mirror info in', repr(infofilename)
  116. info = {}
  117. subdirs = []
  118. listing = []
  119. if verbose: print 'Listing remote directory %r...' % (pwd,)
  120. f.retrlines('LIST', listing.append)
  121. filesfound = []
  122. for line in listing:
  123. if verbose > 1: print '-->', repr(line)
  124. if mac:
  125. # Mac listing has just filenames;
  126. # trailing / means subdirectory
  127. filename = line.strip()
  128. mode = '-'
  129. if filename[-1:] == '/':
  130. filename = filename[:-1]
  131. mode = 'd'
  132. infostuff = ''
  133. else:
  134. # Parse, assuming a UNIX listing
  135. words = line.split(None, 8)
  136. if len(words) < 6:
  137. if verbose > 1: print 'Skipping short line'
  138. continue
  139. filename = words[-1].lstrip()
  140. i = filename.find(" -> ")
  141. if i >= 0:
  142. # words[0] had better start with 'l'...
  143. if verbose > 1:
  144. print 'Found symbolic link %r' % (filename,)
  145. linkto = filename[i+4:]
  146. filename = filename[:i]
  147. infostuff = words[-5:-1]
  148. mode = words[0]
  149. skip = 0
  150. for pat in skippats:
  151. if fnmatch(filename, pat):
  152. if verbose > 1:
  153. print 'Skip pattern', repr(pat),
  154. print 'matches', repr(filename)
  155. skip = 1
  156. break
  157. if skip:
  158. continue
  159. if mode[0] == 'd':
  160. if verbose > 1:
  161. print 'Remembering subdirectory', repr(filename)
  162. subdirs.append(filename)
  163. continue
  164. filesfound.append(filename)
  165. if info.has_key(filename) and info[filename] == infostuff:
  166. if verbose > 1:
  167. print 'Already have this version of',repr(filename)
  168. continue
  169. fullname = os.path.join(localdir, filename)
  170. tempname = os.path.join(localdir, '@'+filename)
  171. if interactive:
  172. doit = askabout('file', filename, pwd)
  173. if not doit:
  174. if not info.has_key(filename):
  175. info[filename] = 'Not retrieved'
  176. continue
  177. try:
  178. os.unlink(tempname)
  179. except os.error:
  180. pass
  181. if mode[0] == 'l':
  182. if verbose:
  183. print "Creating symlink %r -> %r" % (filename, linkto)
  184. try:
  185. os.symlink(linkto, tempname)
  186. except IOError, msg:
  187. print "Can't create %r: %s" % (tempname, msg)
  188. continue
  189. else:
  190. try:
  191. fp = open(tempname, 'wb')
  192. except IOError, msg:
  193. print "Can't create %r: %s" % (tempname, msg)
  194. continue
  195. if verbose:
  196. print 'Retrieving %r from %r as %r...' % (filename, pwd, fullname)
  197. if verbose:
  198. fp1 = LoggingFile(fp, 1024, sys.stdout)
  199. else:
  200. fp1 = fp
  201. t0 = time.time()
  202. try:
  203. f.retrbinary('RETR ' + filename,
  204. fp1.write, 8*1024)
  205. except ftplib.error_perm, msg:
  206. print msg
  207. t1 = time.time()
  208. bytes = fp.tell()
  209. fp.close()
  210. if fp1 != fp:
  211. fp1.close()
  212. try:
  213. os.unlink(fullname)
  214. except os.error:
  215. pass # Ignore the error
  216. try:
  217. os.rename(tempname, fullname)
  218. except os.error, msg:
  219. print "Can't rename %r to %r: %s" % (tempname, fullname, msg)
  220. continue
  221. info[filename] = infostuff
  222. writedict(info, infofilename)
  223. if verbose and mode[0] != 'l':
  224. dt = t1 - t0
  225. kbytes = bytes / 1024.0
  226. print int(round(kbytes)),
  227. print 'Kbytes in',
  228. print int(round(dt)),
  229. print 'seconds',
  230. if t1 > t0:
  231. print '(~%d Kbytes/sec)' % \
  232. int(round(kbytes/dt),)
  233. print
  234. #
  235. # Remove files from info that are no longer remote
  236. deletions = 0
  237. for filename in info.keys():
  238. if filename not in filesfound:
  239. if verbose:
  240. print "Removing obsolete info entry for",
  241. print repr(filename), "in", repr(localdir or ".")
  242. del info[filename]
  243. deletions = deletions + 1
  244. if deletions:
  245. writedict(info, infofilename)
  246. #
  247. # Remove local files that are no longer in the remote directory
  248. try:
  249. if not localdir: names = os.listdir(os.curdir)
  250. else: names = os.listdir(localdir)
  251. except os.error:
  252. names = []
  253. for name in names:
  254. if name[0] == '.' or info.has_key(name) or name in subdirs:
  255. continue
  256. skip = 0
  257. for pat in skippats:
  258. if fnmatch(name, pat):
  259. if verbose > 1:
  260. print 'Skip pattern', repr(pat),
  261. print 'matches', repr(name)
  262. skip = 1
  263. break
  264. if skip:
  265. continue
  266. fullname = os.path.join(localdir, name)
  267. if not rmok:
  268. if verbose:
  269. print 'Local file', repr(fullname),
  270. print 'is no longer pertinent'
  271. continue
  272. if verbose: print 'Removing local file/dir', repr(fullname)
  273. remove(fullname)
  274. #
  275. # Recursively mirror subdirectories
  276. for subdir in subdirs:
  277. if interactive:
  278. doit = askabout('subdirectory', subdir, pwd)
  279. if not doit: continue
  280. if verbose: print 'Processing subdirectory', repr(subdir)
  281. localsubdir = os.path.join(localdir, subdir)
  282. pwd = f.pwd()
  283. if verbose > 1:
  284. print 'Remote directory now:', repr(pwd)
  285. print 'Remote cwd', repr(subdir)
  286. try:
  287. f.cwd(subdir)
  288. except ftplib.error_perm, msg:
  289. print "Can't chdir to", repr(subdir), ":", repr(msg)
  290. else:
  291. if verbose: print 'Mirroring as', repr(localsubdir)
  292. mirrorsubdir(f, localsubdir)
  293. if verbose > 1: print 'Remote cwd ..'
  294. f.cwd('..')
  295. newpwd = f.pwd()
  296. if newpwd != pwd:
  297. print 'Ended up in wrong directory after cd + cd ..'
  298. print 'Giving up now.'
  299. break
  300. else:
  301. if verbose > 1: print 'OK.'
  302. # Helper to remove a file or directory tree
  303. def remove(fullname):
  304. if os.path.isdir(fullname) and not os.path.islink(fullname):
  305. try:
  306. names = os.listdir(fullname)
  307. except os.error:
  308. names = []
  309. ok = 1
  310. for name in names:
  311. if not remove(os.path.join(fullname, name)):
  312. ok = 0
  313. if not ok:
  314. return 0
  315. try:
  316. os.rmdir(fullname)
  317. except os.error, msg:
  318. print "Can't remove local directory %r: %s" % (fullname, msg)
  319. return 0
  320. else:
  321. try:
  322. os.unlink(fullname)
  323. except os.error, msg:
  324. print "Can't remove local file %r: %s" % (fullname, msg)
  325. return 0
  326. return 1
  327. # Wrapper around a file for writing to write a hash sign every block.
  328. class LoggingFile:
  329. def __init__(self, fp, blocksize, outfp):
  330. self.fp = fp
  331. self.bytes = 0
  332. self.hashes = 0
  333. self.blocksize = blocksize
  334. self.outfp = outfp
  335. def write(self, data):
  336. self.bytes = self.bytes + len(data)
  337. hashes = int(self.bytes) / self.blocksize
  338. while hashes > self.hashes:
  339. self.outfp.write('#')
  340. self.outfp.flush()
  341. self.hashes = self.hashes + 1
  342. self.fp.write(data)
  343. def close(self):
  344. self.outfp.write('\n')
  345. # Ask permission to download a file.
  346. def askabout(filetype, filename, pwd):
  347. prompt = 'Retrieve %s %s from %s ? [ny] ' % (filetype, filename, pwd)
  348. while 1:
  349. reply = raw_input(prompt).strip().lower()
  350. if reply in ['y', 'ye', 'yes']:
  351. return 1
  352. if reply in ['', 'n', 'no', 'nop', 'nope']:
  353. return 0
  354. print 'Please answer yes or no.'
  355. # Create a directory if it doesn't exist. Recursively create the
  356. # parent directory as well if needed.
  357. def makedir(pathname):
  358. if os.path.isdir(pathname):
  359. return
  360. dirname = os.path.dirname(pathname)
  361. if dirname: makedir(dirname)
  362. os.mkdir(pathname, 0777)
  363. # Write a dictionary to a file in a way that can be read back using
  364. # rval() but is still somewhat readable (i.e. not a single long line).
  365. # Also creates a backup file.
  366. def writedict(dict, filename):
  367. dir, fname = os.path.split(filename)
  368. tempname = os.path.join(dir, '@' + fname)
  369. backup = os.path.join(dir, fname + '~')
  370. try:
  371. os.unlink(backup)
  372. except os.error:
  373. pass
  374. fp = open(tempname, 'w')
  375. fp.write('{\n')
  376. for key, value in dict.items():
  377. fp.write('%r: %r,\n' % (key, value))
  378. fp.write('}\n')
  379. fp.close()
  380. try:
  381. os.rename(filename, backup)
  382. except os.error:
  383. pass
  384. os.rename(tempname, filename)
  385. if __name__ == '__main__':
  386. main()