PageRenderTime 64ms CodeModel.GetById 32ms RepoModel.GetById 0ms app.codeStats 0ms

/filestore.py

https://github.com/kramer314/sagecell
Python | 492 lines | 476 code | 3 blank | 13 comment | 0 complexity | beb31076ae370bcf0fd937e4440c00eb MD5 | raw file
  1. """
  2. These classes implement ways to store files in the server.
  3. """
  4. from util import log
  5. class FileStore(object):
  6. """
  7. An object that abstracts a filesystem. This is the base class for filestores.
  8. """
  9. def __init__(self):
  10. raise NotImplementedError
  11. def new_file(self, **kwargs):
  12. """
  13. Return a file handle for a new write-only file with the
  14. given properties. If the file already exists, it will
  15. overwritten.
  16. :arg \*\*kwargs: the properties of the new file (one should be
  17. ``filename="[filename]"``)
  18. :returns: an open file handle for the new file
  19. :rtype: file handle
  20. """
  21. raise NotImplementedError
  22. def delete_files(self, **kwargs):
  23. """
  24. Delete every file in the filestore whose properties match
  25. the keyword arguments.
  26. :arg \*\*kwargs: all files whose MongoDB properties match these
  27. will be deleted
  28. """
  29. raise NotImplementedError
  30. def get_file(self, **kwargs):
  31. """
  32. Return a read-only file handle for a given file
  33. with the properties given by the keyword arguments.
  34. If the file does not exist, return ``None``.
  35. :arg \*\*kwargs: the properties of the desired file
  36. :returns: the opened file, or ``None`` if no file exists
  37. :rtype: file handle
  38. """
  39. raise NotImplementedError
  40. def create_file(self, file_handle, **kwargs):
  41. """
  42. Copy an existing file into the filestore.
  43. :arg file file_handle: a file handle open for reading
  44. :arg \*\*kwargs: labels for the new file (one shoud be
  45. ``filename="[filename]"``)
  46. """
  47. raise NotImplementedError
  48. def copy_file(self, file_handle, **kwargs):
  49. """Copy a file from the filestore into another file.
  50. :arg file file_handle: a file handle open for writing
  51. :arg \*\*kwargs: labels to identify the file to copy
  52. """
  53. raise NotImplementedError
  54. def create_secret(self, session):
  55. """
  56. Generate a new :mod:`hmac` object and associate it
  57. with the session. Used only with "untrusted" database
  58. adaptors. (See :ref:`trusted`.)
  59. :arg str session: the ID of the new session
  60. """
  61. raise NotImplementedError
  62. def new_context(self):
  63. """
  64. Reconnect to the filestore. This function should be
  65. called before the first filestore access in each new process.
  66. """
  67. def new_context_copy(self):
  68. """
  69. Create a copy of this object for use in a single thread.
  70. :returns: a new filestore object
  71. :rtype: FileStore
  72. """
  73. try:
  74. from sagecell_config import mongo_config
  75. except ImportError:
  76. # we may not be able to import sagecell_config if we are untrusted
  77. mongo_config=None
  78. DEBUG = False
  79. def Debugger(func):
  80. if DEBUG:
  81. def decorated(*args, **kwargs):
  82. print "****************Entering ",func.func_name
  83. print " args ",args, kwargs
  84. #try:
  85. # print "filename: %s"%(args[0]._filename(**kwargs),)
  86. #except Exception as a:
  87. # print "Couldn't get filename", a
  88. ret = func(*args, **kwargs)
  89. print ret
  90. return ret
  91. return decorated
  92. else:
  93. return func
  94. from sqlalchemy import create_engine, Column, Integer, String
  95. from sqlalchemy.types import Binary
  96. from sqlalchemy.orm import sessionmaker
  97. from sqlalchemy.ext.declarative import declarative_base
  98. from StringIO import StringIO
  99. class FileStoreSQLAlchemy(FileStore):
  100. """
  101. A filestore in a SQLAlchemy database.
  102. :arg str fs_file: the SQLAlchemy URI for a database file
  103. """
  104. def __init__(self, fs_file=None):
  105. if fs_file is not None:
  106. engine = create_engine(fs_file)
  107. self.SQLSession = sessionmaker(bind=engine)
  108. FileStoreSQLAlchemy.Base.metadata.create_all(engine)
  109. self.new_context()
  110. @Debugger
  111. def new_file(self, session, filename, **kwargs):
  112. """
  113. See :meth:`FileStore.new_file`
  114. """
  115. self.delete_files(session, filename)
  116. log("FS Creating %s/%s"%(session, filename))
  117. return FileStoreSQLAlchemy.DBFileWriter(self, session, filename)
  118. @Debugger
  119. def delete_files(self, session=None, filename=None, **kwargs):
  120. """
  121. See :meth:`FileStore.new_file`
  122. """
  123. q = self.dbsession.query(FileStoreSQLAlchemy.StoredFile)
  124. if session is not None:
  125. q = q.filter_by(session=session)
  126. if filename is not None:
  127. q = q.filter_by(filename=filename)
  128. q.delete()
  129. self.dbsession.commit()
  130. @Debugger
  131. def get_file(self, session, filename, **kwargs):
  132. """
  133. See :meth:`FileStore.get_file`
  134. """
  135. return StringIO(self.dbsession.query(FileStoreSQLAlchemy.StoredFile.contents) \
  136. .filter_by(session=session, filename=filename).first().contents)
  137. @Debugger
  138. def create_file(self, file_handle, session, filename, **kwargs):
  139. """
  140. See :meth:`FileStore.create_file`
  141. """
  142. f = FileStoreSQLAlchemy.StoredFile(session=session, filename=filename)
  143. if type(file_handle) is FileStoreSQLAlchemy.DBFileWriter:
  144. contents = file_handle.getvalue()
  145. else:
  146. contents = file_handle.read()
  147. f.contents = contents
  148. self.dbsession.add(f)
  149. self.dbsession.commit()
  150. @Debugger
  151. def copy_file(self, file_handle, session, filename, **kwargs):
  152. """
  153. See :meth:`FileStore.copy_file`
  154. """
  155. self.dbsession.add(FileStoreSQLAlchemy.StoredFile(session=session,
  156. filename=filename, contents=file_handle.read()))
  157. self.dbsession.commit()
  158. @Debugger
  159. def new_context(self):
  160. """
  161. See :meth:`FileStore.new_context`
  162. """
  163. self.dbsession = self.SQLSession()
  164. @Debugger
  165. def new_context_copy(self):
  166. """
  167. See :meth:`FileStore.new_context_copy`
  168. """
  169. new = type(self)()
  170. new.SQLSession = self.SQLSession
  171. new.new_context()
  172. return new
  173. Base = declarative_base()
  174. class StoredFile(Base):
  175. """A file stored in the database"""
  176. __tablename__ = 'filestore'
  177. n = Column(Integer, primary_key=True)
  178. session = Column(String)
  179. filename = Column(String)
  180. contents = Column(Binary)
  181. class DBFileWriter(StringIO, object):
  182. """
  183. A file-like object that writes its contents to the database when it is
  184. closed.
  185. :arg FileStoreSQLAlchemy filestore: the filestore object to write to
  186. :arg str session: the ID of the session that is the source of this file
  187. :arg str filename: the name of the file
  188. """
  189. def __init__(self, filestore, session, filename):
  190. self.filestore = filestore
  191. self.session = session
  192. self.filename = filename
  193. super(type(self), self).__init__()
  194. def __enter__(self):
  195. return self
  196. def __exit__(self, *args):
  197. self.close()
  198. def close(self):
  199. self.filestore.create_file(self, self.session, self.filename)
  200. super(type(self), self).close()
  201. try:
  202. from gridfs import GridFS
  203. import pymongo
  204. except ImportError:
  205. pass
  206. class FileStoreMongo(FileStore):
  207. """
  208. Filestore database using GridFS (see :mod:`gridfs`)
  209. :arg pymongo.database.Database connection: MongoDB database object
  210. """
  211. def __init__(self, connection):
  212. self._conn=connection
  213. self.new_context()
  214. self._fs=GridFS(self.database)
  215. def _filename(self, **kwargs):
  216. return {'session': kwargs.get('session', kwargs.get('cell_id', 'SESSION NOT FOUND')), 'filename': kwargs['filename']}
  217. @Debugger
  218. def new_file(self, **kwargs):
  219. """
  220. See :meth:`FileStore.new_file`
  221. :rtype: :class:`gridfs.grid_file.GridIn`
  222. """
  223. self.delete_files(**kwargs)
  224. log("FS Creating %s"%self._filename(**kwargs))
  225. return self._fs.new_file(**self._filename(**kwargs))
  226. @Debugger
  227. def delete_files(self, **kwargs):
  228. """
  229. See :meth:`FileStore.delete_files`
  230. """
  231. while self._fs.exists(self._filename(**kwargs)):
  232. self._fs.delete(self._fs.get_last_version(**self._filename(**kwargs))._id)
  233. @Debugger
  234. def get_file(self, **kwargs):
  235. """
  236. See :meth:`FileStore.get_file`
  237. :rtype: :class:`gridfs.grid_file.GridOut`
  238. """
  239. if self._fs.exists(self._filename(**kwargs)):
  240. return self._fs.get(self._fs.get_last_version(**self._filename(**kwargs))._id)
  241. else:
  242. return None
  243. @Debugger
  244. def create_file(self, file_handle, **kwargs):
  245. """
  246. See :meth:`FileStore.create_file`
  247. """
  248. with self.new_file(**kwargs) as f:
  249. f.write(file_handle.read())
  250. @Debugger
  251. def copy_file(self, file_handle, **kwargs):
  252. """
  253. See :meth:`FileStore.copy_file`
  254. """
  255. file_handle.write(self.get_file(**kwargs).read())
  256. @Debugger
  257. def new_context(self):
  258. """
  259. See :meth:`FileStore.new_context`
  260. """
  261. self.database=pymongo.database.Database(self._conn, mongo_config['mongo_db'])
  262. uri=mongo_config['mongo_uri']
  263. if '@' in uri:
  264. # strip off optional mongodb:// part
  265. if uri.startswith('mongodb://'):
  266. uri=uri[len('mongodb://'):]
  267. result=self.database.authenticate(uri[:uri.index(':')],uri[uri.index(':')+1:uri.index('@')])
  268. if result==0:
  269. raise Exception("MongoDB authentication problem")
  270. @Debugger
  271. def new_context_copy(self):
  272. """
  273. See :meth:`FileStore.new_context_copy`
  274. """
  275. return type(self)(self._conn)
  276. valid_untrusted_methods=()
  277. from flask import safe_join
  278. import os
  279. class FileStoreFilesystem(FileStore):
  280. """
  281. Filestore using the file system
  282. The levels parameter controls how the session is split up to give
  283. subdirectories. For example, if ``levels=4``, then session
  284. ``0c490701-b1b0-40b8-88ea-70b61a580cf2`` files are stored in
  285. subdirectory :file:`0/c/4/9/0c490701-b1b0-40b8-88ea-70b61a580cf2`.
  286. This prevents having too many directories in the root directory.
  287. :arg str dir: A directory in which to store files
  288. :arg int levels: The number of levels to use for splitting up session directories
  289. """
  290. def __init__(self, dir, levels=0):
  291. self._dir = dir
  292. self._levels=levels
  293. def _filename(self, **kwargs):
  294. if 'session' in kwargs:
  295. session=kwargs['session']
  296. elif 'cell_id' in kwargs:
  297. session = kwargs['cell_id']
  298. else:
  299. session = "SESSION_NOT_FOUND"
  300. session_subdir = list(str(session)[:self._levels])+[str(session)]
  301. # Use Flask's safe_join to make sure we don't overwrite something crucial
  302. session_dir = safe_join(self._dir, os.path.join(*session_subdir))
  303. if not os.path.isdir(session_dir):
  304. os.makedirs(session_dir)
  305. return safe_join(session_dir, kwargs['filename'])
  306. @Debugger
  307. def new_file(self, **kwargs):
  308. """
  309. See :meth:`FileStore.new_file`
  310. """
  311. return open(self._filename(**kwargs), 'w')
  312. @Debugger
  313. def delete_files(self, session, filename):
  314. """
  315. See :meth:`FileStore.delete_files`
  316. """
  317. os.path.remove(self._filename(session=session, filename=filename))
  318. @Debugger
  319. def get_file(self, session, filename):
  320. """
  321. See :meth:`FileStore.get_file`
  322. """
  323. f=self._filename(session=session, filename=filename)
  324. if os.path.exists(f):
  325. return open(f, 'r')
  326. else:
  327. return None
  328. @Debugger
  329. def create_file(self, file_handle, **kwargs):
  330. """
  331. See :meth:`FileStore.create_file`
  332. """
  333. with self.new_file(**kwargs) as f:
  334. f.write(file_handle.read())
  335. @Debugger
  336. def copy_file(self, file_handle, **kwargs):
  337. """
  338. See :meth:`FileStore.copy_file`
  339. """
  340. file_handle.write(self.get_file(**kwargs).read())
  341. @Debugger
  342. def new_context(self):
  343. """
  344. Empty function
  345. """
  346. pass
  347. valid_untrusted_methods=()
  348. import zmq
  349. from db_zmq import db_method
  350. from uuid import uuid4
  351. from json import dumps
  352. from os import fstat
  353. import mmap
  354. class FileStoreZMQ(FileStoreMongo):
  355. u"""
  356. A connection to a filestore database over \xd8MQ.
  357. This can be used in the same way as a normal filestore,
  358. but without risk of compromising the database.
  359. :arg str address: the address the database should connect with
  360. """
  361. def __init__(self, address):
  362. self.address=address
  363. self._xreq=None
  364. @property
  365. def socket(self):
  366. """
  367. The ``socket`` property is automatically initialized the first
  368. time it is called. We do this since we shouldn't create a
  369. context in a parent process. Instead, we'll wait until we
  370. actually start using the db api to create a context. If you
  371. use the same class in a child process, you should first call
  372. the :meth:`new_context` method.
  373. """
  374. if self._xreq is None:
  375. self.new_context()
  376. return self._xreq
  377. def new_context(self):
  378. u"""
  379. Reconnect to \xd8MQ. This function should be
  380. called before the first database access in each new process.
  381. """
  382. self._context=zmq.Context()
  383. self._xreq=self._context.socket(zmq.XREQ)
  384. self._xreq.connect(self.address)
  385. log(u"ZMQ connecting to %s"%self.address)
  386. def create_file(self, file_handle, hmac, **kwargs):
  387. """
  388. See :meth:`FileStore.create_file`
  389. :arg hmac: an object to be updated with the contents
  390. of the message to be sent
  391. :type hmac: :mod:`hmac` object
  392. """
  393. # Use mmap if the filesize is larger than 1MiB;
  394. # otherwise just copy the string to memory before sending it
  395. if fstat(file_handle.fileno()).st_size>2**20:
  396. f=mmap.mmap(file_handle.fileno(),0,access=mmap.ACCESS_READ)
  397. else:
  398. f=file_handle.read()
  399. msg_str=dumps({'msg_type':'create_file',"header":str(uuid4()),
  400. 'content':kwargs})
  401. log("Sending: msg_str: %r, old_digest: %r"%(msg_str, hmac.digest()))
  402. hmac.update(msg_str)
  403. log("New digest: %r"%hmac.digest())
  404. message=[msg_str, hmac.digest(), f]
  405. self.socket.send_multipart(message,copy=False,track=True).wait()
  406. self.socket.recv()
  407. def copy_file(self, file_handle, hmac, **kwargs):
  408. """
  409. See :meth:`FileStore.copy_file`
  410. :arg hmac: an object to be updated with the contents
  411. of the message to be sent
  412. :type hmac: :mod:`hmac` object
  413. """
  414. msg_str=dumps({'msg_type':'copy_file','content':kwargs})
  415. hmac.update(msg_str)
  416. self.socket.send_multipart([msg_str, hmac.digest()])
  417. file_handle.write(self.socket.recv())
  418. create_secret=db_method('create_secret',['session'], True)
  419. new_file=db_method('new_file',['session', 'filename'], True)
  420. delete_files=db_method('delete_files',['session', 'filename'], True)
  421. get_file=db_method('get_file',['session', 'filename'], True)