PageRenderTime 90ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/indra/lib/python/indra/ipc/webdav.py

https://bitbucket.org/lindenlab/viewer-beta/
Python | 597 lines | 588 code | 0 blank | 9 comment | 0 complexity | 43385a57e3eec16ef660c90511e88fc4 MD5 | raw file
Possible License(s): LGPL-2.1
  1. """
  2. @file webdav.py
  3. @brief Classes to make manipulation of a webdav store easier.
  4. $LicenseInfo:firstyear=2007&license=mit$
  5. Copyright (c) 2007-2009, Linden Research, Inc.
  6. Permission is hereby granted, free of charge, to any person obtaining a copy
  7. of this software and associated documentation files (the "Software"), to deal
  8. in the Software without restriction, including without limitation the rights
  9. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. copies of the Software, and to permit persons to whom the Software is
  11. furnished to do so, subject to the following conditions:
  12. The above copyright notice and this permission notice shall be included in
  13. all copies or substantial portions of the Software.
  14. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. THE SOFTWARE.
  21. $/LicenseInfo$
  22. """
  23. import sys, os, httplib, urlparse
  24. import socket, time
  25. import xml.dom.minidom
  26. import syslog
  27. # import signal
  28. __revision__ = '0'
  29. dav_debug = False
  30. # def urlsafe_b64decode (enc):
  31. # return base64.decodestring (enc.replace ('_', '/').replace ('-', '+'))
  32. # def urlsafe_b64encode (str):
  33. # return base64.encodestring (str).replace ('+', '-').replace ('/', '_')
  34. class DAVError (Exception):
  35. """ Base class for exceptions in this module. """
  36. def __init__ (self, status=0, message='', body='', details=''):
  37. self.status = status
  38. self.message = message
  39. self.body = body
  40. self.details = details
  41. Exception.__init__ (self, '%d:%s:%s%s' % (self.status, self.message,
  42. self.body, self.details))
  43. def print_to_stderr (self):
  44. """ print_to_stderr docstring """
  45. print >> sys.stderr, str (self.status) + ' ' + self.message
  46. print >> sys.stderr, str (self.details)
  47. class Timeout (Exception):
  48. """ Timeout docstring """
  49. def __init__ (self, arg=''):
  50. Exception.__init__ (self, arg)
  51. def alarm_handler (signum, frame):
  52. """ alarm_handler docstring """
  53. raise Timeout ('caught alarm')
  54. class WebDAV:
  55. """ WebDAV docstring """
  56. def __init__ (self, url, proxy=None, retries_before_fail=6):
  57. self.init_url = url
  58. self.init_proxy = proxy
  59. self.retries_before_fail = retries_before_fail
  60. url_parsed = urlparse.urlsplit (url)
  61. self.top_path = url_parsed[ 2 ]
  62. # make sure top_path has a trailing /
  63. if self.top_path == None or self.top_path == '':
  64. self.top_path = '/'
  65. elif len (self.top_path) > 1 and self.top_path[-1:] != '/':
  66. self.top_path += '/'
  67. if dav_debug:
  68. syslog.syslog ('new WebDAV %s : %s' % (str (url), str (proxy)))
  69. if proxy:
  70. proxy_parsed = urlparse.urlsplit (proxy)
  71. self.host_header = url_parsed[ 1 ]
  72. host_and_port = proxy_parsed[ 1 ].split (':')
  73. self.host = host_and_port[ 0 ]
  74. if len (host_and_port) > 1:
  75. self.port = int(host_and_port[ 1 ])
  76. else:
  77. self.port = 80
  78. else: # no proxy
  79. host_and_port = url_parsed[ 1 ].split (':')
  80. self.host_header = None
  81. self.host = host_and_port[ 0 ]
  82. if len (host_and_port) > 1:
  83. self.port = int(host_and_port[ 1 ])
  84. else:
  85. self.port = 80
  86. self.connection = False
  87. self.connect ()
  88. def log (self, msg, depth=0):
  89. """ log docstring """
  90. if dav_debug and depth == 0:
  91. host = str (self.init_url)
  92. if host == 'http://int.tuco.lindenlab.com:80/asset/':
  93. host = 'tuco'
  94. if host == 'http://harriet.lindenlab.com/asset-keep/':
  95. host = 'harriet/asset-keep'
  96. if host == 'http://harriet.lindenlab.com/asset-flag/':
  97. host = 'harriet/asset-flag'
  98. if host == 'http://harriet.lindenlab.com/asset/':
  99. host = 'harriet/asset'
  100. if host == 'http://ozzy.lindenlab.com/asset/':
  101. host = 'ozzy/asset'
  102. if host == 'http://station11.lindenlab.com:12041/:':
  103. host = 'station11:12041'
  104. proxy = str (self.init_proxy)
  105. if proxy == 'None':
  106. proxy = ''
  107. if proxy == 'http://int.tuco.lindenlab.com:3128/':
  108. proxy = 'tuco'
  109. syslog.syslog ('WebDAV (%s:%s) %s' % (host, proxy, str (msg)))
  110. def connect (self):
  111. """ connect docstring """
  112. self.log ('connect')
  113. self.connection = httplib.HTTPConnection (self.host, self.port)
  114. def __err (self, response, details):
  115. """ __err docstring """
  116. raise DAVError (response.status, response.reason, response.read (),
  117. str (self.init_url) + ':' + \
  118. str (self.init_proxy) + ':' + str (details))
  119. def request (self, method, path, body=None, headers=None,
  120. read_all=True, body_hook = None, recurse=0, allow_cache=True):
  121. """ request docstring """
  122. # self.log ('request %s %s' % (method, path))
  123. if headers == None:
  124. headers = {}
  125. if not allow_cache:
  126. headers['Pragma'] = 'no-cache'
  127. headers['cache-control'] = 'no-cache'
  128. try:
  129. if method.lower () != 'purge':
  130. if path.startswith ('/'):
  131. path = path[1:]
  132. if self.host_header: # use proxy
  133. headers[ 'host' ] = self.host_header
  134. fullpath = 'http://%s%s%s' % (self.host_header,
  135. self.top_path, path)
  136. else: # no proxy
  137. fullpath = self.top_path + path
  138. else:
  139. fullpath = path
  140. self.connection.request (method, fullpath, body, headers)
  141. if body_hook:
  142. body_hook ()
  143. # signal.signal (signal.SIGALRM, alarm_handler)
  144. # try:
  145. # signal.alarm (120)
  146. # signal.alarm (0)
  147. # except Timeout, e:
  148. # if recurse < 6:
  149. # return self.retry_request (method, path, body, headers,
  150. # read_all, body_hook, recurse)
  151. # else:
  152. # raise DAVError (0, 'timeout', self.host,
  153. # (method, path, body, headers, recurse))
  154. response = self.connection.getresponse ()
  155. if read_all:
  156. while len (response.read (1024)) > 0:
  157. pass
  158. if (response.status == 500 or \
  159. response.status == 503 or \
  160. response.status == 403) and \
  161. recurse < self.retries_before_fail:
  162. return self.retry_request (method, path, body, headers,
  163. read_all, body_hook, recurse)
  164. return response
  165. except (httplib.ResponseNotReady,
  166. httplib.BadStatusLine,
  167. socket.error):
  168. # if the server hangs up on us (keepalive off, broken pipe),
  169. # we need to reconnect and try again.
  170. if recurse < self.retries_before_fail:
  171. return self.retry_request (method, path, body, headers,
  172. read_all, body_hook, recurse)
  173. raise DAVError (0, 'reconnect failed', self.host,
  174. (method, path, body, headers, recurse))
  175. def retry_request (self, method, path, body, headers,
  176. read_all, body_hook, recurse):
  177. """ retry_request docstring """
  178. time.sleep (10.0 * recurse)
  179. self.connect ()
  180. return self.request (method, path, body, headers,
  181. read_all, body_hook, recurse+1)
  182. def propfind (self, path, body=None, depth=1):
  183. """ propfind docstring """
  184. # self.log ('propfind %s' % path)
  185. headers = {'Content-Type':'text/xml; charset="utf-8"',
  186. 'Depth':str(depth)}
  187. response = self.request ('PROPFIND', path, body, headers, False)
  188. if response.status == 207:
  189. return response # Multi-Status
  190. self.__err (response, ('PROPFIND', path, body, headers, 0))
  191. def purge (self, path):
  192. """ issue a squid purge command """
  193. headers = {'Accept':'*/*'}
  194. response = self.request ('PURGE', path, None, headers)
  195. if response.status == 200 or response.status == 404:
  196. # 200 if it was purge, 404 if it wasn't there.
  197. return response
  198. self.__err (response, ('PURGE', path, None, headers))
  199. def get_file_size (self, path):
  200. """
  201. Use propfind to ask a webdav server what the size of
  202. a file is. If used on a directory (collection) return 0
  203. """
  204. self.log ('get_file_size %s' % path)
  205. # "getcontentlength" property
  206. # 8.1.1 Example - Retrieving Named Properties
  207. # http://docs.python.org/lib/module-xml.dom.html
  208. nsurl = 'http://apache.org/dav/props/'
  209. doc = xml.dom.minidom.Document ()
  210. propfind_element = doc.createElementNS (nsurl, "D:propfind")
  211. propfind_element.setAttributeNS (nsurl, 'xmlns:D', 'DAV:')
  212. doc.appendChild (propfind_element)
  213. prop_element = doc.createElementNS (nsurl, "D:prop")
  214. propfind_element.appendChild (prop_element)
  215. con_len_element = doc.createElementNS (nsurl, "D:getcontentlength")
  216. prop_element.appendChild (con_len_element)
  217. response = self.propfind (path, doc.toxml ())
  218. doc.unlink ()
  219. resp_doc = xml.dom.minidom.parseString (response.read ())
  220. cln = resp_doc.getElementsByTagNameNS ('DAV:','getcontentlength')[ 0 ]
  221. try:
  222. content_length = int (cln.childNodes[ 0 ].nodeValue)
  223. except IndexError:
  224. return 0
  225. resp_doc.unlink ()
  226. return content_length
  227. def file_exists (self, path):
  228. """
  229. do an http head on the given file. return True if it succeeds
  230. """
  231. self.log ('file_exists %s' % path)
  232. expect_gzip = path.endswith ('.gz')
  233. response = self.request ('HEAD', path)
  234. got_gzip = response.getheader ('Content-Encoding', '').strip ()
  235. if got_gzip.lower () == 'x-gzip' and expect_gzip == False:
  236. # the asset server fakes us out if we ask for the non-gzipped
  237. # version of an asset, but the server has the gzipped version.
  238. return False
  239. return response.status == 200
  240. def mkdir (self, path):
  241. """ mkdir docstring """
  242. self.log ('mkdir %s' % path)
  243. headers = {}
  244. response = self.request ('MKCOL', path, None, headers)
  245. if response.status == 201:
  246. return # success
  247. if response.status == 405:
  248. return # directory already existed?
  249. self.__err (response, ('MKCOL', path, None, headers, 0))
  250. def delete (self, path):
  251. """ delete docstring """
  252. self.log ('delete %s' % path)
  253. headers = {'Depth':'infinity'} # collections require infinity
  254. response = self.request ('DELETE', path, None, headers)
  255. if response.status == 204:
  256. return # no content
  257. if response.status == 404:
  258. return # hmm
  259. self.__err (response, ('DELETE', path, None, headers, 0))
  260. def list_directory (self, path, dir_filter=None, allow_cache=True,
  261. minimum_cache_time=False):
  262. """
  263. Request an http directory listing and parse the filenames out of lines
  264. like: '<LI><A HREF="X"> X</A>'. If a filter function is provided,
  265. only return filenames that the filter returns True for.
  266. This is sort of grody, but it seems faster than other ways of getting
  267. this information from an isilon.
  268. """
  269. self.log ('list_directory %s' % path)
  270. def try_match (lline, before, after):
  271. """ try_match docstring """
  272. try:
  273. blen = len (before)
  274. asset_start_index = lline.index (before)
  275. asset_end_index = lline.index (after, asset_start_index + blen)
  276. asset = line[ asset_start_index + blen : asset_end_index ]
  277. if not dir_filter or dir_filter (asset):
  278. return [ asset ]
  279. return []
  280. except ValueError:
  281. return []
  282. if len (path) > 0 and path[-1:] != '/':
  283. path += '/'
  284. response = self.request ('GET', path, None, {}, False,
  285. allow_cache=allow_cache)
  286. if allow_cache and minimum_cache_time: # XXX
  287. print response.getheader ('Date')
  288. # s = "2005-12-06T12:13:14"
  289. # from datetime import datetime
  290. # from time import strptime
  291. # datetime(*strptime(s, "%Y-%m-%dT%H:%M:%S")[0:6])
  292. # datetime.datetime(2005, 12, 6, 12, 13, 14)
  293. if response.status != 200:
  294. self.__err (response, ('GET', path, None, {}, 0))
  295. assets = []
  296. for line in response.read ().split ('\n'):
  297. lline = line.lower ()
  298. if lline.find ("parent directory") == -1:
  299. # isilon file
  300. assets += try_match (lline, '<li><a href="', '"> ')
  301. # apache dir
  302. assets += try_match (lline, 'alt="[dir]"> <a href="', '/">')
  303. # apache file
  304. assets += try_match (lline, 'alt="[ ]"> <a href="', '">')
  305. return assets
  306. def __tmp_filename (self, path_and_file):
  307. """ __tmp_filename docstring """
  308. head, tail = os.path.split (path_and_file)
  309. if head != '':
  310. return head + '/.' + tail + '.' + str (os.getpid ())
  311. else:
  312. return head + '.' + tail + '.' + str (os.getpid ())
  313. def __put__ (self, filesize, body_hook, remotefile):
  314. """ __put__ docstring """
  315. headers = {'Content-Length' : str (filesize)}
  316. remotefile_tmp = self.__tmp_filename (remotefile)
  317. response = self.request ('PUT', remotefile_tmp, None,
  318. headers, True, body_hook)
  319. if not response.status in (201, 204): # created, no content
  320. self.__err (response, ('PUT', remotefile, None, headers, 0))
  321. if filesize != self.get_file_size (remotefile_tmp):
  322. try:
  323. self.delete (remotefile_tmp)
  324. except:
  325. pass
  326. raise DAVError (0, 'tmp upload error', remotefile_tmp)
  327. # move the file to its final location
  328. try:
  329. self.rename (remotefile_tmp, remotefile)
  330. except DAVError, exc:
  331. if exc.status == 403: # try to clean up the tmp file
  332. try:
  333. self.delete (remotefile_tmp)
  334. except:
  335. pass
  336. raise
  337. if filesize != self.get_file_size (remotefile):
  338. raise DAVError (0, 'file upload error', str (remotefile_tmp))
  339. def put_string (self, strng, remotefile):
  340. """ put_string docstring """
  341. self.log ('put_string %d -> %s' % (len (strng), remotefile))
  342. filesize = len (strng)
  343. def body_hook ():
  344. """ body_hook docstring """
  345. self.connection.send (strng)
  346. self.__put__ (filesize, body_hook, remotefile)
  347. def put_file (self, localfile, remotefile):
  348. """
  349. Send a local file to a remote webdav store. First, upload to
  350. a temporary filename. Next make sure the file is the size we
  351. expected. Next, move the file to its final location. Next,
  352. check the file size at the final location.
  353. """
  354. self.log ('put_file %s -> %s' % (localfile, remotefile))
  355. filesize = os.path.getsize (localfile)
  356. def body_hook ():
  357. """ body_hook docstring """
  358. handle = open (localfile)
  359. while True:
  360. data = handle.read (1300)
  361. if len (data) == 0:
  362. break
  363. self.connection.send (data)
  364. handle.close ()
  365. self.__put__ (filesize, body_hook, remotefile)
  366. def create_empty_file (self, remotefile):
  367. """ create an empty file """
  368. self.log ('touch_file %s' % (remotefile))
  369. headers = {'Content-Length' : '0'}
  370. response = self.request ('PUT', remotefile, None, headers)
  371. if not response.status in (201, 204): # created, no content
  372. self.__err (response, ('PUT', remotefile, None, headers, 0))
  373. if self.get_file_size (remotefile) != 0:
  374. raise DAVError (0, 'file upload error', str (remotefile))
  375. def __get_file_setup (self, remotefile, check_size=True):
  376. """ __get_file_setup docstring """
  377. if check_size:
  378. remotesize = self.get_file_size (remotefile)
  379. response = self.request ('GET', remotefile, None, {}, False)
  380. if response.status != 200:
  381. self.__err (response, ('GET', remotefile, None, {}, 0))
  382. try:
  383. content_length = int (response.getheader ("Content-Length"))
  384. except TypeError:
  385. content_length = None
  386. if check_size:
  387. if content_length != remotesize:
  388. raise DAVError (0, 'file DL size error', remotefile)
  389. return (response, content_length)
  390. def __get_file_read (self, writehandle, response, content_length):
  391. """ __get_file_read docstring """
  392. if content_length != None:
  393. so_far_length = 0
  394. while so_far_length < content_length:
  395. data = response.read (content_length - so_far_length)
  396. if len (data) == 0:
  397. raise DAVError (0, 'short file download')
  398. so_far_length += len (data)
  399. writehandle.write (data)
  400. while len (response.read ()) > 0:
  401. pass
  402. else:
  403. while True:
  404. data = response.read ()
  405. if (len (data) < 1):
  406. break
  407. writehandle.write (data)
  408. def get_file (self, remotefile, localfile, check_size=True):
  409. """
  410. Get a remote file from a webdav server. Download to a local
  411. tmp file, then move into place. Sanity check file sizes as
  412. we go.
  413. """
  414. self.log ('get_file %s -> %s' % (remotefile, localfile))
  415. (response, content_length) = \
  416. self.__get_file_setup (remotefile, check_size)
  417. localfile_tmp = self.__tmp_filename (localfile)
  418. handle = open (localfile_tmp, 'w')
  419. self.__get_file_read (handle, response, content_length)
  420. handle.close ()
  421. if check_size:
  422. if content_length != os.path.getsize (localfile_tmp):
  423. raise DAVError (0, 'file DL size error',
  424. remotefile+','+localfile)
  425. os.rename (localfile_tmp, localfile)
  426. def get_file_as_string (self, remotefile, check_size=True):
  427. """
  428. download a file from a webdav server and return it as a string.
  429. """
  430. self.log ('get_file_as_string %s' % remotefile)
  431. (response, content_length) = \
  432. self.__get_file_setup (remotefile, check_size)
  433. # (tmp_handle, tmp_filename) = tempfile.mkstemp ()
  434. tmp_handle = os.tmpfile ()
  435. self.__get_file_read (tmp_handle, response, content_length)
  436. tmp_handle.seek (0)
  437. ret = tmp_handle.read ()
  438. tmp_handle.close ()
  439. # os.unlink (tmp_filename)
  440. return ret
  441. def get_post_as_string (self, remotefile, body):
  442. """
  443. Do an http POST, send body, get response and return it.
  444. """
  445. self.log ('get_post_as_string %s' % remotefile)
  446. # headers = {'Content-Type':'application/x-www-form-urlencoded'}
  447. headers = {'Content-Type':'text/xml; charset="utf-8"'}
  448. # b64body = urlsafe_b64encode (asset_url)
  449. response = self.request ('POST', remotefile, body, headers, False)
  450. if response.status != 200:
  451. self.__err (response, ('POST', remotefile, body, headers, 0))
  452. try:
  453. content_length = int (response.getheader ('Content-Length'))
  454. except TypeError:
  455. content_length = None
  456. tmp_handle = os.tmpfile ()
  457. self.__get_file_read (tmp_handle, response, content_length)
  458. tmp_handle.seek (0)
  459. ret = tmp_handle.read ()
  460. tmp_handle.close ()
  461. return ret
  462. def __destination_command (self, verb, remotesrc, dstdav, remotedst):
  463. """
  464. self and dstdav should point to the same http server.
  465. """
  466. if len (remotedst) > 0 and remotedst[ 0 ] == '/':
  467. remotedst = remotedst[1:]
  468. headers = {'Destination': 'http://%s:%d%s%s' % (dstdav.host,
  469. dstdav.port,
  470. dstdav.top_path,
  471. remotedst)}
  472. response = self.request (verb, remotesrc, None, headers)
  473. if response.status == 201:
  474. return # created
  475. if response.status == 204:
  476. return # no content
  477. self.__err (response, (verb, remotesrc, None, headers, 0))
  478. def rename (self, remotesrc, remotedst):
  479. """ rename a file on a webdav server """
  480. self.log ('rename %s -> %s' % (remotesrc, remotedst))
  481. self.__destination_command ('MOVE', remotesrc, self, remotedst)
  482. def xrename (self, remotesrc, dstdav, remotedst):
  483. """ rename a file on a webdav server """
  484. self.log ('xrename %s -> %s' % (remotesrc, remotedst))
  485. self.__destination_command ('MOVE', remotesrc, dstdav, remotedst)
  486. def copy (self, remotesrc, remotedst):
  487. """ copy a file on a webdav server """
  488. self.log ('copy %s -> %s' % (remotesrc, remotedst))
  489. self.__destination_command ('COPY', remotesrc, self, remotedst)
  490. def xcopy (self, remotesrc, dstdav, remotedst):
  491. """ copy a file on a webdav server """
  492. self.log ('xcopy %s -> %s' % (remotesrc, remotedst))
  493. self.__destination_command ('COPY', remotesrc, dstdav, remotedst)
  494. def put_string (data, url):
  495. """
  496. upload string s to a url
  497. """
  498. url_parsed = urlparse.urlsplit (url)
  499. dav = WebDAV ('%s://%s/' % (url_parsed[ 0 ], url_parsed[ 1 ]))
  500. dav.put_string (data, url_parsed[ 2 ])
  501. def get_string (url, check_size=True):
  502. """
  503. return the contents of a url as a string
  504. """
  505. url_parsed = urlparse.urlsplit (url)
  506. dav = WebDAV ('%s://%s/' % (url_parsed[ 0 ], url_parsed[ 1 ]))
  507. return dav.get_file_as_string (url_parsed[ 2 ], check_size)