PageRenderTime 47ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/mercurial/byterange.py

https://bitbucket.org/mirror/mercurial/
Python | 459 lines | 433 code | 5 blank | 21 comment | 0 complexity | eeb2b4633f4f73daf0ecba8d345ac10b MD5 | raw file
Possible License(s): GPL-2.0
  1. # This library is free software; you can redistribute it and/or
  2. # modify it under the terms of the GNU Lesser General Public
  3. # License as published by the Free Software Foundation; either
  4. # version 2.1 of the License, or (at your option) any later version.
  5. #
  6. # This library is distributed in the hope that it will be useful,
  7. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  9. # Lesser General Public License for more details.
  10. #
  11. # You should have received a copy of the GNU Lesser General Public
  12. # License along with this library; if not, see
  13. # <http://www.gnu.org/licenses/>.
  14. # This file is part of urlgrabber, a high-level cross-protocol url-grabber
  15. # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
  16. # $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
  17. import os
  18. import stat
  19. import urllib
  20. import urllib2
  21. import email.Utils
  22. class RangeError(IOError):
  23. """Error raised when an unsatisfiable range is requested."""
  24. pass
  25. class HTTPRangeHandler(urllib2.BaseHandler):
  26. """Handler that enables HTTP Range headers.
  27. This was extremely simple. The Range header is a HTTP feature to
  28. begin with so all this class does is tell urllib2 that the
  29. "206 Partial Content" response from the HTTP server is what we
  30. expected.
  31. Example:
  32. import urllib2
  33. import byterange
  34. range_handler = range.HTTPRangeHandler()
  35. opener = urllib2.build_opener(range_handler)
  36. # install it
  37. urllib2.install_opener(opener)
  38. # create Request and set Range header
  39. req = urllib2.Request('http://www.python.org/')
  40. req.header['Range'] = 'bytes=30-50'
  41. f = urllib2.urlopen(req)
  42. """
  43. def http_error_206(self, req, fp, code, msg, hdrs):
  44. # 206 Partial Content Response
  45. r = urllib.addinfourl(fp, hdrs, req.get_full_url())
  46. r.code = code
  47. r.msg = msg
  48. return r
  49. def http_error_416(self, req, fp, code, msg, hdrs):
  50. # HTTP's Range Not Satisfiable error
  51. raise RangeError('Requested Range Not Satisfiable')
  52. class RangeableFileObject(object):
  53. """File object wrapper to enable raw range handling.
  54. This was implemented primarily for handling range
  55. specifications for file:// urls. This object effectively makes
  56. a file object look like it consists only of a range of bytes in
  57. the stream.
  58. Examples:
  59. # expose 10 bytes, starting at byte position 20, from
  60. # /etc/aliases.
  61. >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
  62. # seek seeks within the range (to position 23 in this case)
  63. >>> fo.seek(3)
  64. # tell tells where your at _within the range_ (position 3 in
  65. # this case)
  66. >>> fo.tell()
  67. # read EOFs if an attempt is made to read past the last
  68. # byte in the range. the following will return only 7 bytes.
  69. >>> fo.read(30)
  70. """
  71. def __init__(self, fo, rangetup):
  72. """Create a RangeableFileObject.
  73. fo -- a file like object. only the read() method need be
  74. supported but supporting an optimized seek() is
  75. preferable.
  76. rangetup -- a (firstbyte,lastbyte) tuple specifying the range
  77. to work over.
  78. The file object provided is assumed to be at byte offset 0.
  79. """
  80. self.fo = fo
  81. (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
  82. self.realpos = 0
  83. self._do_seek(self.firstbyte)
  84. def __getattr__(self, name):
  85. """This effectively allows us to wrap at the instance level.
  86. Any attribute not found in _this_ object will be searched for
  87. in self.fo. This includes methods."""
  88. return getattr(self.fo, name)
  89. def tell(self):
  90. """Return the position within the range.
  91. This is different from fo.seek in that position 0 is the
  92. first byte position of the range tuple. For example, if
  93. this object was created with a range tuple of (500,899),
  94. tell() will return 0 when at byte position 500 of the file.
  95. """
  96. return (self.realpos - self.firstbyte)
  97. def seek(self, offset, whence=0):
  98. """Seek within the byte range.
  99. Positioning is identical to that described under tell().
  100. """
  101. assert whence in (0, 1, 2)
  102. if whence == 0: # absolute seek
  103. realoffset = self.firstbyte + offset
  104. elif whence == 1: # relative seek
  105. realoffset = self.realpos + offset
  106. elif whence == 2: # absolute from end of file
  107. # XXX: are we raising the right Error here?
  108. raise IOError('seek from end of file not supported.')
  109. # do not allow seek past lastbyte in range
  110. if self.lastbyte and (realoffset >= self.lastbyte):
  111. realoffset = self.lastbyte
  112. self._do_seek(realoffset - self.realpos)
  113. def read(self, size=-1):
  114. """Read within the range.
  115. This method will limit the size read based on the range.
  116. """
  117. size = self._calc_read_size(size)
  118. rslt = self.fo.read(size)
  119. self.realpos += len(rslt)
  120. return rslt
  121. def readline(self, size=-1):
  122. """Read lines within the range.
  123. This method will limit the size read based on the range.
  124. """
  125. size = self._calc_read_size(size)
  126. rslt = self.fo.readline(size)
  127. self.realpos += len(rslt)
  128. return rslt
  129. def _calc_read_size(self, size):
  130. """Handles calculating the amount of data to read based on
  131. the range.
  132. """
  133. if self.lastbyte:
  134. if size > -1:
  135. if ((self.realpos + size) >= self.lastbyte):
  136. size = (self.lastbyte - self.realpos)
  137. else:
  138. size = (self.lastbyte - self.realpos)
  139. return size
  140. def _do_seek(self, offset):
  141. """Seek based on whether wrapped object supports seek().
  142. offset is relative to the current position (self.realpos).
  143. """
  144. assert offset >= 0
  145. seek = getattr(self.fo, 'seek', self._poor_mans_seek)
  146. seek(self.realpos + offset)
  147. self.realpos += offset
  148. def _poor_mans_seek(self, offset):
  149. """Seek by calling the wrapped file objects read() method.
  150. This is used for file like objects that do not have native
  151. seek support. The wrapped objects read() method is called
  152. to manually seek to the desired position.
  153. offset -- read this number of bytes from the wrapped
  154. file object.
  155. raise RangeError if we encounter EOF before reaching the
  156. specified offset.
  157. """
  158. pos = 0
  159. bufsize = 1024
  160. while pos < offset:
  161. if (pos + bufsize) > offset:
  162. bufsize = offset - pos
  163. buf = self.fo.read(bufsize)
  164. if len(buf) != bufsize:
  165. raise RangeError('Requested Range Not Satisfiable')
  166. pos += bufsize
  167. class FileRangeHandler(urllib2.FileHandler):
  168. """FileHandler subclass that adds Range support.
  169. This class handles Range headers exactly like an HTTP
  170. server would.
  171. """
  172. def open_local_file(self, req):
  173. import mimetypes
  174. import email
  175. host = req.get_host()
  176. file = req.get_selector()
  177. localfile = urllib.url2pathname(file)
  178. stats = os.stat(localfile)
  179. size = stats[stat.ST_SIZE]
  180. modified = email.Utils.formatdate(stats[stat.ST_MTIME])
  181. mtype = mimetypes.guess_type(file)[0]
  182. if host:
  183. host, port = urllib.splitport(host)
  184. if port or socket.gethostbyname(host) not in self.get_names():
  185. raise urllib2.URLError('file not on local host')
  186. fo = open(localfile,'rb')
  187. brange = req.headers.get('Range', None)
  188. brange = range_header_to_tuple(brange)
  189. assert brange != ()
  190. if brange:
  191. (fb, lb) = brange
  192. if lb == '':
  193. lb = size
  194. if fb < 0 or fb > size or lb > size:
  195. raise RangeError('Requested Range Not Satisfiable')
  196. size = (lb - fb)
  197. fo = RangeableFileObject(fo, (fb, lb))
  198. headers = email.message_from_string(
  199. 'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' %
  200. (mtype or 'text/plain', size, modified))
  201. return urllib.addinfourl(fo, headers, 'file:'+file)
  202. # FTP Range Support
  203. # Unfortunately, a large amount of base FTP code had to be copied
  204. # from urllib and urllib2 in order to insert the FTP REST command.
  205. # Code modifications for range support have been commented as
  206. # follows:
  207. # -- range support modifications start/end here
  208. from urllib import splitport, splituser, splitpasswd, splitattr, \
  209. unquote, addclosehook, addinfourl
  210. import ftplib
  211. import socket
  212. import mimetypes
  213. import email
  214. class FTPRangeHandler(urllib2.FTPHandler):
  215. def ftp_open(self, req):
  216. host = req.get_host()
  217. if not host:
  218. raise IOError('ftp error', 'no host given')
  219. host, port = splitport(host)
  220. if port is None:
  221. port = ftplib.FTP_PORT
  222. else:
  223. port = int(port)
  224. # username/password handling
  225. user, host = splituser(host)
  226. if user:
  227. user, passwd = splitpasswd(user)
  228. else:
  229. passwd = None
  230. host = unquote(host)
  231. user = unquote(user or '')
  232. passwd = unquote(passwd or '')
  233. try:
  234. host = socket.gethostbyname(host)
  235. except socket.error, msg:
  236. raise urllib2.URLError(msg)
  237. path, attrs = splitattr(req.get_selector())
  238. dirs = path.split('/')
  239. dirs = map(unquote, dirs)
  240. dirs, file = dirs[:-1], dirs[-1]
  241. if dirs and not dirs[0]:
  242. dirs = dirs[1:]
  243. try:
  244. fw = self.connect_ftp(user, passwd, host, port, dirs)
  245. type = file and 'I' or 'D'
  246. for attr in attrs:
  247. attr, value = splitattr(attr)
  248. if attr.lower() == 'type' and \
  249. value in ('a', 'A', 'i', 'I', 'd', 'D'):
  250. type = value.upper()
  251. # -- range support modifications start here
  252. rest = None
  253. range_tup = range_header_to_tuple(req.headers.get('Range', None))
  254. assert range_tup != ()
  255. if range_tup:
  256. (fb, lb) = range_tup
  257. if fb > 0:
  258. rest = fb
  259. # -- range support modifications end here
  260. fp, retrlen = fw.retrfile(file, type, rest)
  261. # -- range support modifications start here
  262. if range_tup:
  263. (fb, lb) = range_tup
  264. if lb == '':
  265. if retrlen is None or retrlen == 0:
  266. raise RangeError('Requested Range Not Satisfiable due'
  267. ' to unobtainable file length.')
  268. lb = retrlen
  269. retrlen = lb - fb
  270. if retrlen < 0:
  271. # beginning of range is larger than file
  272. raise RangeError('Requested Range Not Satisfiable')
  273. else:
  274. retrlen = lb - fb
  275. fp = RangeableFileObject(fp, (0, retrlen))
  276. # -- range support modifications end here
  277. headers = ""
  278. mtype = mimetypes.guess_type(req.get_full_url())[0]
  279. if mtype:
  280. headers += "Content-Type: %s\n" % mtype
  281. if retrlen is not None and retrlen >= 0:
  282. headers += "Content-Length: %d\n" % retrlen
  283. headers = email.message_from_string(headers)
  284. return addinfourl(fp, headers, req.get_full_url())
  285. except ftplib.all_errors, msg:
  286. raise IOError('ftp error', msg)
  287. def connect_ftp(self, user, passwd, host, port, dirs):
  288. fw = ftpwrapper(user, passwd, host, port, dirs)
  289. return fw
  290. class ftpwrapper(urllib.ftpwrapper):
  291. # range support note:
  292. # this ftpwrapper code is copied directly from
  293. # urllib. The only enhancement is to add the rest
  294. # argument and pass it on to ftp.ntransfercmd
  295. def retrfile(self, file, type, rest=None):
  296. self.endtransfer()
  297. if type in ('d', 'D'):
  298. cmd = 'TYPE A'
  299. isdir = 1
  300. else:
  301. cmd = 'TYPE ' + type
  302. isdir = 0
  303. try:
  304. self.ftp.voidcmd(cmd)
  305. except ftplib.all_errors:
  306. self.init()
  307. self.ftp.voidcmd(cmd)
  308. conn = None
  309. if file and not isdir:
  310. # Use nlst to see if the file exists at all
  311. try:
  312. self.ftp.nlst(file)
  313. except ftplib.error_perm, reason:
  314. raise IOError('ftp error', reason)
  315. # Restore the transfer mode!
  316. self.ftp.voidcmd(cmd)
  317. # Try to retrieve as a file
  318. try:
  319. cmd = 'RETR ' + file
  320. conn = self.ftp.ntransfercmd(cmd, rest)
  321. except ftplib.error_perm, reason:
  322. if str(reason).startswith('501'):
  323. # workaround for REST not supported error
  324. fp, retrlen = self.retrfile(file, type)
  325. fp = RangeableFileObject(fp, (rest,''))
  326. return (fp, retrlen)
  327. elif not str(reason).startswith('550'):
  328. raise IOError('ftp error', reason)
  329. if not conn:
  330. # Set transfer mode to ASCII!
  331. self.ftp.voidcmd('TYPE A')
  332. # Try a directory listing
  333. if file:
  334. cmd = 'LIST ' + file
  335. else:
  336. cmd = 'LIST'
  337. conn = self.ftp.ntransfercmd(cmd)
  338. self.busy = 1
  339. # Pass back both a suitably decorated object and a retrieval length
  340. return (addclosehook(conn[0].makefile('rb'),
  341. self.endtransfer), conn[1])
  342. ####################################################################
  343. # Range Tuple Functions
  344. # XXX: These range tuple functions might go better in a class.
  345. _rangere = None
  346. def range_header_to_tuple(range_header):
  347. """Get a (firstbyte,lastbyte) tuple from a Range header value.
  348. Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
  349. function pulls the firstbyte and lastbyte values and returns
  350. a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
  351. the header value, it is returned as an empty string in the
  352. tuple.
  353. Return None if range_header is None
  354. Return () if range_header does not conform to the range spec
  355. pattern.
  356. """
  357. global _rangere
  358. if range_header is None:
  359. return None
  360. if _rangere is None:
  361. import re
  362. _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
  363. match = _rangere.match(range_header)
  364. if match:
  365. tup = range_tuple_normalize(match.group(1, 2))
  366. if tup and tup[1]:
  367. tup = (tup[0], tup[1]+1)
  368. return tup
  369. return ()
  370. def range_tuple_to_header(range_tup):
  371. """Convert a range tuple to a Range header value.
  372. Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
  373. if no range is needed.
  374. """
  375. if range_tup is None:
  376. return None
  377. range_tup = range_tuple_normalize(range_tup)
  378. if range_tup:
  379. if range_tup[1]:
  380. range_tup = (range_tup[0], range_tup[1] - 1)
  381. return 'bytes=%s-%s' % range_tup
  382. def range_tuple_normalize(range_tup):
  383. """Normalize a (first_byte,last_byte) range tuple.
  384. Return a tuple whose first element is guaranteed to be an int
  385. and whose second element will be '' (meaning: the last byte) or
  386. an int. Finally, return None if the normalized tuple == (0,'')
  387. as that is equivalent to retrieving the entire file.
  388. """
  389. if range_tup is None:
  390. return None
  391. # handle first byte
  392. fb = range_tup[0]
  393. if fb in (None, ''):
  394. fb = 0
  395. else:
  396. fb = int(fb)
  397. # handle last byte
  398. try:
  399. lb = range_tup[1]
  400. except IndexError:
  401. lb = ''
  402. else:
  403. if lb is None:
  404. lb = ''
  405. elif lb != '':
  406. lb = int(lb)
  407. # check if range is over the entire file
  408. if (fb, lb) == (0, ''):
  409. return None
  410. # check that the range is valid
  411. if lb < fb:
  412. raise RangeError('Invalid byte range: %s-%s' % (fb, lb))
  413. return (fb, lb)