PageRenderTime 59ms CodeModel.GetById 19ms app.highlight 32ms RepoModel.GetById 1ms app.codeStats 0ms

/indra/lib/python/indra/ipc/webdav.py

https://bitbucket.org/lindenlab/viewer-beta/
Python | 597 lines | 588 code | 0 blank | 9 comment | 0 complexity | 43385a57e3eec16ef660c90511e88fc4 MD5 | raw file
  1"""
  2@file webdav.py
  3@brief Classes to make manipulation of a webdav store easier.
  4
  5$LicenseInfo:firstyear=2007&license=mit$
  6
  7Copyright (c) 2007-2009, Linden Research, Inc.
  8
  9Permission is hereby granted, free of charge, to any person obtaining a copy
 10of this software and associated documentation files (the "Software"), to deal
 11in the Software without restriction, including without limitation the rights
 12to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 13copies of the Software, and to permit persons to whom the Software is
 14furnished to do so, subject to the following conditions:
 15
 16The above copyright notice and this permission notice shall be included in
 17all copies or substantial portions of the Software.
 18
 19THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 20IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 21FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 22AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 23LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 24OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 25THE SOFTWARE.
 26$/LicenseInfo$
 27"""
 28
 29import sys, os, httplib, urlparse
 30import socket, time
 31import xml.dom.minidom
 32import syslog
 33# import signal
 34
 35__revision__ = '0'
 36
 37dav_debug = False
 38
 39
 40# def urlsafe_b64decode (enc):
 41#     return base64.decodestring (enc.replace ('_', '/').replace ('-', '+'))
 42
 43# def urlsafe_b64encode (str):
 44#     return base64.encodestring (str).replace ('+', '-').replace ('/', '_')
 45
 46
 47class DAVError (Exception):
 48    """ Base class for exceptions in this module. """
 49    def __init__ (self, status=0, message='', body='', details=''):
 50        self.status = status
 51        self.message = message
 52        self.body = body
 53        self.details = details
 54        Exception.__init__ (self, '%d:%s:%s%s' % (self.status, self.message,
 55                                                   self.body, self.details))
 56
 57    def print_to_stderr (self):
 58        """ print_to_stderr docstring """
 59        print >> sys.stderr, str (self.status) + ' ' + self.message
 60        print >> sys.stderr, str (self.details)
 61
 62
 63class Timeout (Exception):
 64    """ Timeout docstring """
 65    def __init__ (self, arg=''):
 66        Exception.__init__ (self, arg)
 67
 68
 69def alarm_handler (signum, frame):
 70    """ alarm_handler docstring """
 71    raise Timeout ('caught alarm')
 72
 73
 74class WebDAV:
 75    """ WebDAV docstring """
 76    def __init__ (self, url, proxy=None, retries_before_fail=6):
 77        self.init_url = url
 78        self.init_proxy = proxy
 79        self.retries_before_fail = retries_before_fail
 80        url_parsed = urlparse.urlsplit (url)
 81
 82        self.top_path = url_parsed[ 2 ]
 83        # make sure top_path has a trailing /
 84        if self.top_path == None or self.top_path == '':
 85            self.top_path = '/'
 86        elif len (self.top_path) > 1 and self.top_path[-1:] != '/':
 87            self.top_path += '/'
 88
 89        if dav_debug:
 90            syslog.syslog ('new WebDAV %s : %s' % (str (url), str (proxy)))
 91
 92        if proxy:
 93            proxy_parsed = urlparse.urlsplit (proxy)
 94            self.host_header = url_parsed[ 1 ]
 95            host_and_port = proxy_parsed[ 1 ].split (':')
 96            self.host = host_and_port[ 0 ]
 97            if len (host_and_port) > 1:
 98                self.port = int(host_and_port[ 1 ])
 99            else:
100                self.port = 80
101        else: # no proxy
102            host_and_port = url_parsed[ 1 ].split (':')
103            self.host_header = None
104            self.host = host_and_port[ 0 ]
105            if len (host_and_port) > 1:
106                self.port = int(host_and_port[ 1 ])
107            else:
108                self.port = 80
109
110        self.connection = False
111        self.connect ()
112
113
114    def log (self, msg, depth=0):
115        """ log docstring """
116        if dav_debug and depth == 0:
117            host = str (self.init_url)
118            if host == 'http://int.tuco.lindenlab.com:80/asset/':
119                host = 'tuco'
120            if host == 'http://harriet.lindenlab.com/asset-keep/':
121                host = 'harriet/asset-keep'
122            if host == 'http://harriet.lindenlab.com/asset-flag/':
123                host = 'harriet/asset-flag'
124            if host == 'http://harriet.lindenlab.com/asset/':
125                host = 'harriet/asset'
126            if host == 'http://ozzy.lindenlab.com/asset/':
127                host = 'ozzy/asset'
128            if host == 'http://station11.lindenlab.com:12041/:':
129                host = 'station11:12041'
130            proxy = str (self.init_proxy)
131            if proxy == 'None':
132                proxy = ''
133            if proxy == 'http://int.tuco.lindenlab.com:3128/':
134                proxy = 'tuco'
135            syslog.syslog ('WebDAV (%s:%s) %s' % (host, proxy, str (msg)))
136
137
138    def connect (self):
139        """ connect docstring """
140        self.log ('connect')
141        self.connection = httplib.HTTPConnection (self.host, self.port)
142
143    def __err (self, response, details):
144        """ __err docstring """
145        raise DAVError (response.status, response.reason, response.read (),
146                        str (self.init_url) + ':' + \
147                        str (self.init_proxy) + ':' + str (details))
148
149    def request (self, method, path, body=None, headers=None,
150                 read_all=True, body_hook = None, recurse=0, allow_cache=True):
151        """ request docstring """
152        # self.log ('request %s %s' % (method, path))
153        if headers == None:
154            headers = {}
155        if not allow_cache:
156            headers['Pragma'] = 'no-cache'
157            headers['cache-control'] = 'no-cache'
158        try:
159            if method.lower () != 'purge':
160                if path.startswith ('/'):
161                    path = path[1:]
162                if self.host_header: # use proxy
163                    headers[ 'host' ] = self.host_header
164                    fullpath = 'http://%s%s%s' % (self.host_header,
165                                                  self.top_path, path)
166                else: # no proxy
167                    fullpath = self.top_path + path
168            else:
169                fullpath = path
170
171            self.connection.request (method, fullpath, body, headers)
172            if body_hook:
173                body_hook ()
174
175            # signal.signal (signal.SIGALRM, alarm_handler)
176            # try:
177            #     signal.alarm (120)
178            #     signal.alarm (0)
179            # except Timeout, e:
180            #     if recurse < 6:
181            #         return self.retry_request (method, path, body, headers,
182            #                                    read_all, body_hook, recurse)
183            #     else:
184            #         raise DAVError (0, 'timeout', self.host,
185            #                         (method, path, body, headers, recurse))
186
187            response = self.connection.getresponse ()
188
189            if read_all:
190                while len (response.read (1024)) > 0:
191                    pass
192            if (response.status == 500 or \
193                response.status == 503 or \
194                response.status == 403) and \
195                recurse < self.retries_before_fail:
196                return self.retry_request (method, path, body, headers,
197                                           read_all, body_hook, recurse)
198            return response
199        except (httplib.ResponseNotReady,
200                httplib.BadStatusLine,
201                socket.error):
202            # if the server hangs up on us (keepalive off, broken pipe),
203            # we need to reconnect and try again.
204            if recurse < self.retries_before_fail:
205                return self.retry_request (method, path, body, headers,
206                                           read_all, body_hook, recurse)
207            raise DAVError (0, 'reconnect failed', self.host,
208                            (method, path, body, headers, recurse))
209
210
211    def retry_request (self, method, path, body, headers,
212                       read_all, body_hook, recurse):
213        """ retry_request docstring """
214        time.sleep (10.0 * recurse)
215        self.connect ()
216        return self.request (method, path, body, headers,
217                             read_all, body_hook, recurse+1)
218
219
220
221    def propfind (self, path, body=None, depth=1):
222        """ propfind docstring """
223        # self.log ('propfind %s' % path)
224        headers = {'Content-Type':'text/xml; charset="utf-8"',
225                   'Depth':str(depth)}
226        response = self.request ('PROPFIND', path, body, headers, False)
227        if response.status == 207:
228            return response # Multi-Status
229        self.__err (response, ('PROPFIND', path, body, headers, 0))
230
231
232    def purge (self, path):
233        """ issue a squid purge command """
234        headers = {'Accept':'*/*'}
235        response = self.request ('PURGE', path, None, headers)
236        if response.status == 200 or response.status == 404:
237            # 200 if it was purge, 404 if it wasn't there.
238            return response
239        self.__err (response, ('PURGE', path, None, headers))
240
241
242    def get_file_size (self, path):
243        """
244        Use propfind to ask a webdav server what the size of
245        a file is.  If used on a directory (collection) return 0
246        """
247        self.log ('get_file_size %s' % path)
248        # "getcontentlength" property
249        # 8.1.1 Example - Retrieving Named Properties
250        # http://docs.python.org/lib/module-xml.dom.html
251        nsurl = 'http://apache.org/dav/props/'
252        doc = xml.dom.minidom.Document ()
253        propfind_element = doc.createElementNS (nsurl, "D:propfind")
254        propfind_element.setAttributeNS (nsurl, 'xmlns:D', 'DAV:')
255        doc.appendChild (propfind_element)
256        prop_element = doc.createElementNS (nsurl, "D:prop")
257        propfind_element.appendChild (prop_element)
258        con_len_element = doc.createElementNS (nsurl, "D:getcontentlength")
259        prop_element.appendChild (con_len_element)
260
261        response = self.propfind (path, doc.toxml ())
262        doc.unlink ()
263
264        resp_doc = xml.dom.minidom.parseString (response.read ())
265        cln = resp_doc.getElementsByTagNameNS ('DAV:','getcontentlength')[ 0 ]
266        try:
267            content_length = int (cln.childNodes[ 0 ].nodeValue)
268        except IndexError:
269            return 0
270        resp_doc.unlink ()
271        return content_length
272
273
274    def file_exists (self, path):
275        """
276        do an http head on the given file.  return True if it succeeds
277        """
278        self.log ('file_exists %s' % path)
279        expect_gzip = path.endswith ('.gz')
280        response = self.request ('HEAD', path)
281        got_gzip = response.getheader ('Content-Encoding', '').strip ()
282        if got_gzip.lower () == 'x-gzip' and expect_gzip == False:
283            # the asset server fakes us out if we ask for the non-gzipped
284            # version of an asset, but the server has the gzipped version.
285            return False
286        return response.status == 200
287
288
289    def mkdir (self, path):
290        """ mkdir docstring """
291        self.log ('mkdir %s' % path)
292        headers = {}
293        response = self.request ('MKCOL', path, None, headers)
294        if response.status == 201:
295            return # success
296        if response.status == 405:
297            return # directory already existed?
298        self.__err (response, ('MKCOL', path, None, headers, 0))
299
300
301    def delete (self, path):
302        """ delete docstring """
303        self.log ('delete %s' % path)
304        headers = {'Depth':'infinity'} # collections require infinity
305        response = self.request ('DELETE', path, None, headers)
306        if response.status == 204:
307            return # no content
308        if response.status == 404:
309            return # hmm
310        self.__err (response, ('DELETE', path, None, headers, 0))
311
312
313    def list_directory (self, path, dir_filter=None, allow_cache=True,
314                        minimum_cache_time=False):
315        """
316        Request an http directory listing and parse the filenames out of lines
317        like: '<LI><A HREF="X"> X</A>'. If a filter function is provided,
318        only return filenames that the filter returns True for.
319
320        This is sort of grody, but it seems faster than other ways of getting
321        this information from an isilon.
322        """
323        self.log ('list_directory %s' % path)
324
325        def try_match (lline, before, after):
326            """ try_match docstring """
327            try:
328                blen = len (before)
329                asset_start_index = lline.index (before)
330                asset_end_index = lline.index (after, asset_start_index + blen)
331                asset = line[ asset_start_index + blen : asset_end_index ]
332
333                if not dir_filter or dir_filter (asset):
334                    return [ asset ]
335                return []
336            except ValueError:
337                return []
338
339        if len (path) > 0 and path[-1:] != '/':
340            path += '/'
341
342        response = self.request ('GET', path, None, {}, False,
343                                 allow_cache=allow_cache)
344
345        if allow_cache and minimum_cache_time: # XXX
346            print response.getheader ('Date')
347            # s = "2005-12-06T12:13:14"
348            # from datetime import datetime
349            # from time import strptime
350            # datetime(*strptime(s, "%Y-%m-%dT%H:%M:%S")[0:6])
351            # datetime.datetime(2005, 12, 6, 12, 13, 14)
352
353        if response.status != 200:
354            self.__err (response, ('GET', path, None, {}, 0))
355        assets = []
356        for line in response.read ().split ('\n'):
357            lline = line.lower ()
358            if lline.find ("parent directory") == -1:
359                # isilon file
360                assets += try_match (lline, '<li><a href="', '"> ')
361                # apache dir
362                assets += try_match (lline, 'alt="[dir]"> <a href="', '/">')
363                # apache file
364                assets += try_match (lline, 'alt="[   ]"> <a href="', '">')
365        return assets
366
367
368    def __tmp_filename (self, path_and_file):
369        """ __tmp_filename docstring """
370        head, tail = os.path.split (path_and_file)
371        if head != '':
372            return head + '/.' + tail + '.' + str (os.getpid ())
373        else:
374            return head + '.' + tail + '.' + str (os.getpid ())
375
376
377    def __put__ (self, filesize, body_hook, remotefile):
378        """ __put__ docstring """
379        headers = {'Content-Length' : str (filesize)}
380        remotefile_tmp = self.__tmp_filename (remotefile)
381        response = self.request ('PUT', remotefile_tmp, None,
382                                 headers, True, body_hook)
383        if not response.status in (201, 204): # created, no content
384            self.__err (response, ('PUT', remotefile, None, headers, 0))
385        if filesize != self.get_file_size (remotefile_tmp):
386            try:
387                self.delete (remotefile_tmp)
388            except:
389                pass
390            raise DAVError (0, 'tmp upload error', remotefile_tmp)
391        # move the file to its final location
392        try:
393            self.rename (remotefile_tmp, remotefile)
394        except DAVError, exc:
395            if exc.status == 403: # try to clean up the tmp file
396                try:
397                    self.delete (remotefile_tmp)
398                except:
399                    pass
400            raise
401        if filesize != self.get_file_size (remotefile):
402            raise DAVError (0, 'file upload error', str (remotefile_tmp))
403
404
405    def put_string (self, strng, remotefile):
406        """ put_string docstring """
407        self.log ('put_string %d -> %s' % (len (strng), remotefile))
408        filesize = len (strng)
409        def body_hook ():
410            """ body_hook docstring """
411            self.connection.send (strng)
412        self.__put__ (filesize, body_hook, remotefile)
413
414
415    def put_file (self, localfile, remotefile):
416        """
417        Send a local file to a remote webdav store.  First, upload to
418        a temporary filename.  Next make sure the file is the size we
419        expected.  Next, move the file to its final location.  Next,
420        check the file size at the final location.
421        """
422        self.log ('put_file %s -> %s' % (localfile, remotefile))
423        filesize = os.path.getsize (localfile)
424        def body_hook ():
425            """ body_hook docstring """
426            handle = open (localfile)
427            while True:
428                data = handle.read (1300)
429                if len (data) == 0:
430                    break
431                self.connection.send (data)
432            handle.close ()
433        self.__put__ (filesize, body_hook, remotefile)
434
435
436    def create_empty_file (self, remotefile):
437        """ create an empty file """
438        self.log ('touch_file %s' % (remotefile))
439        headers = {'Content-Length' : '0'}
440        response = self.request ('PUT', remotefile, None, headers)
441        if not response.status in (201, 204): # created, no content
442            self.__err (response, ('PUT', remotefile, None, headers, 0))
443        if self.get_file_size (remotefile) != 0:
444            raise DAVError (0, 'file upload error', str (remotefile))
445
446
447    def __get_file_setup (self, remotefile, check_size=True):
448        """ __get_file_setup docstring """
449        if check_size:
450            remotesize = self.get_file_size (remotefile)
451        response = self.request ('GET', remotefile, None, {}, False)
452        if response.status != 200:
453            self.__err (response, ('GET', remotefile, None, {}, 0))
454        try:
455            content_length = int (response.getheader ("Content-Length"))
456        except TypeError:
457            content_length = None
458        if check_size:
459            if content_length != remotesize:
460                raise DAVError (0, 'file DL size error', remotefile)
461        return (response, content_length)
462
463
464    def __get_file_read (self, writehandle, response, content_length):
465        """ __get_file_read docstring """
466        if content_length != None:
467            so_far_length = 0
468            while so_far_length < content_length:
469                data = response.read (content_length - so_far_length)
470                if len (data) == 0:
471                    raise DAVError (0, 'short file download')
472                so_far_length += len (data)
473                writehandle.write (data)
474            while len (response.read ()) > 0:
475                pass
476        else:
477            while True:
478                data = response.read ()
479                if (len (data) < 1):
480                    break
481                writehandle.write (data)
482
483
484    def get_file (self, remotefile, localfile, check_size=True):
485        """
486        Get a remote file from a webdav server.  Download to a local
487        tmp file, then move into place.  Sanity check file sizes as
488        we go.
489        """
490        self.log ('get_file %s -> %s' % (remotefile, localfile))
491        (response, content_length) = \
492                   self.__get_file_setup (remotefile, check_size)
493        localfile_tmp = self.__tmp_filename (localfile)
494        handle = open (localfile_tmp, 'w')
495        self.__get_file_read (handle, response, content_length)
496        handle.close ()
497        if check_size:
498            if content_length != os.path.getsize (localfile_tmp):
499                raise DAVError (0, 'file DL size error',
500                                remotefile+','+localfile)
501        os.rename (localfile_tmp, localfile)
502
503
504    def get_file_as_string (self, remotefile, check_size=True):
505        """
506        download a file from a webdav server and return it as a string.
507        """
508        self.log ('get_file_as_string %s' % remotefile)
509        (response, content_length) = \
510                   self.__get_file_setup (remotefile, check_size)
511        # (tmp_handle, tmp_filename) = tempfile.mkstemp ()
512        tmp_handle = os.tmpfile ()
513        self.__get_file_read (tmp_handle, response, content_length)
514        tmp_handle.seek (0)
515        ret = tmp_handle.read ()
516        tmp_handle.close ()
517        # os.unlink (tmp_filename)
518        return ret
519
520
521    def get_post_as_string (self, remotefile, body):
522        """
523        Do an http POST, send body, get response and return it.
524        """
525        self.log ('get_post_as_string %s' % remotefile)
526        # headers = {'Content-Type':'application/x-www-form-urlencoded'}
527        headers = {'Content-Type':'text/xml; charset="utf-8"'}
528        # b64body = urlsafe_b64encode (asset_url)
529        response = self.request ('POST', remotefile, body, headers, False)
530        if response.status != 200:
531            self.__err (response, ('POST', remotefile, body, headers, 0))
532        try:
533            content_length = int (response.getheader ('Content-Length'))
534        except TypeError:
535            content_length = None
536        tmp_handle = os.tmpfile ()
537        self.__get_file_read (tmp_handle, response, content_length)
538        tmp_handle.seek (0)
539        ret = tmp_handle.read ()
540        tmp_handle.close ()
541        return ret
542
543
544    def __destination_command (self, verb, remotesrc, dstdav, remotedst):
545        """
546        self and dstdav should point to the same http server.
547        """
548        if len (remotedst) > 0 and remotedst[ 0 ] == '/':
549            remotedst = remotedst[1:]
550        headers = {'Destination': 'http://%s:%d%s%s' % (dstdav.host,
551                                                        dstdav.port,
552                                                        dstdav.top_path,
553                                                        remotedst)}
554        response = self.request (verb, remotesrc, None, headers)
555        if response.status == 201:
556            return # created
557        if response.status == 204:
558            return # no content
559        self.__err (response, (verb, remotesrc, None, headers, 0))
560
561
562    def rename (self, remotesrc, remotedst):
563        """ rename a file on a webdav server """
564        self.log ('rename %s -> %s' % (remotesrc, remotedst))
565        self.__destination_command ('MOVE', remotesrc, self, remotedst)
566    def xrename (self, remotesrc, dstdav, remotedst):
567        """ rename a file on a webdav server """
568        self.log ('xrename %s -> %s' % (remotesrc, remotedst))
569        self.__destination_command ('MOVE', remotesrc, dstdav, remotedst)
570
571
572    def copy (self, remotesrc, remotedst):
573        """ copy a file on a webdav server """
574        self.log ('copy %s -> %s' % (remotesrc, remotedst))
575        self.__destination_command ('COPY', remotesrc, self, remotedst)
576    def xcopy (self, remotesrc, dstdav, remotedst):
577        """ copy a file on a webdav server """
578        self.log ('xcopy %s -> %s' % (remotesrc, remotedst))
579        self.__destination_command ('COPY', remotesrc, dstdav, remotedst)
580
581
582def put_string (data, url):
583    """
584    upload string s to a url
585    """
586    url_parsed = urlparse.urlsplit (url)
587    dav = WebDAV ('%s://%s/' % (url_parsed[ 0 ], url_parsed[ 1 ]))
588    dav.put_string (data, url_parsed[ 2 ])
589
590
591def get_string (url, check_size=True):
592    """
593    return the contents of a url as a string
594    """
595    url_parsed = urlparse.urlsplit (url)
596    dav = WebDAV ('%s://%s/' % (url_parsed[ 0 ], url_parsed[ 1 ]))
597    return dav.get_file_as_string (url_parsed[ 2 ], check_size)