/src/backy2/io/file.py

https://github.com/wamdam/backy2 · Python · 232 lines · 176 code · 41 blank · 15 comment · 47 complexity · dae89c673b13b65e55ebf99fb69b475a MD5 · raw file

  1. #!/usr/bin/env python
  2. # -*- encoding: utf-8 -*-
  3. from backy2.logging import logger
  4. from backy2.io import IO as _IO
  5. from collections import namedtuple
  6. import os
  7. import queue
  8. import re
  9. import threading
  10. import time
  11. STATUS_NOTHING = 0
  12. STATUS_READING = 1
  13. STATUS_WRITING = 2
  14. STATUS_SEEKING = 3
  15. STATUS_FADVISE = 4
  16. if hasattr(os, 'posix_fadvise'):
  17. posix_fadvise = os.posix_fadvise
  18. else: # pragma: no cover
  19. logger.warn('Running without `posix_fadvise`.')
  20. os.POSIX_FADV_RANDOM = None
  21. os.POSIX_FADV_SEQUENTIAL = None
  22. os.POSIX_FADV_WILLNEED = None
  23. os.POSIX_FADV_DONTNEED = None
  24. def posix_fadvise(*args, **kw):
  25. return
  26. class IO(_IO):
  27. mode = None
  28. WRITE_QUEUE_LENGTH = 20
  29. READ_QUEUE_LENGTH = 20
  30. def __init__(self, config, block_size, hash_function):
  31. self.simultaneous_reads = config.getint('simultaneous_reads', 1)
  32. self.simultaneous_writes = config.getint('simultaneous_reads', 1)
  33. self.block_size = block_size
  34. self.hash_function = hash_function
  35. self._reader_threads = []
  36. self._writer_threads = []
  37. self.reader_thread_status = {}
  38. self.writer_thread_status = {}
  39. self._inqueue = queue.Queue() # infinite size for all the blocks
  40. self._outqueue = queue.Queue(self.simultaneous_reads + self.READ_QUEUE_LENGTH) # data of read blocks
  41. self._write_queue = queue.Queue(self.simultaneous_writes + self.WRITE_QUEUE_LENGTH) # blocks to be written
  42. def open_r(self, io_name):
  43. self.mode = 'r'
  44. _s = re.match('^file://(.+)$', io_name)
  45. if not _s:
  46. raise RuntimeError('Not a valid io name: {} . Need a file path, e.g. file:///somepath/file'.format(io_name))
  47. self.io_name = _s.groups()[0]
  48. for i in range(self.simultaneous_reads):
  49. _reader_thread = threading.Thread(target=self._reader, args=(i,))
  50. _reader_thread.daemon = True
  51. _reader_thread.start()
  52. self._reader_threads.append(_reader_thread)
  53. self.reader_thread_status[i] = STATUS_NOTHING
  54. def open_w(self, io_name, size=None, force=False):
  55. # parameter size is version's size.
  56. self.mode = 'w'
  57. _s = re.match('^file://(.+)$', io_name)
  58. if not _s:
  59. raise RuntimeError('Not a valid io name: {} . Need a file path, e.g. file:///somepath/file'.format(io_name))
  60. self.io_name = _s.groups()[0]
  61. if os.path.exists(self.io_name):
  62. if not force:
  63. logger.error('Target already exists: {}'.format(io_name))
  64. exit('Error opening restore target. You must force the restore.')
  65. else:
  66. if self.size() < size:
  67. logger.error('Target size is too small. Has {}b, need {}b.'.format(self.size(), size))
  68. exit('Error opening restore target.')
  69. else:
  70. # create the file
  71. with open(self.io_name, 'wb') as f:
  72. f.seek(size - 1)
  73. f.write(b'\0')
  74. for i in range(self.simultaneous_writes):
  75. _writer_thread = threading.Thread(target=self._writer, args=(i,))
  76. _writer_thread.daemon = True
  77. _writer_thread.start()
  78. self._writer_threads.append(_writer_thread)
  79. self.writer_thread_status[i] = STATUS_NOTHING
  80. def size(self):
  81. source_size = 0
  82. with open(self.io_name, 'rb') as source_file:
  83. #posix_fadvise(source_file.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL)
  84. # determine source size
  85. source_file.seek(0, 2) # to the end
  86. source_size = source_file.tell()
  87. source_file.seek(0)
  88. return source_size
  89. def _writer(self, id_):
  90. """ self._write_queue contains a list of (Block, data) to be written.
  91. """
  92. with open(self.io_name, 'rb+') as _write_file:
  93. while True:
  94. entry = self._write_queue.get()
  95. if entry is None:
  96. logger.debug("IO writer {} finishing.".format(id_))
  97. self._write_queue.task_done()
  98. break
  99. block, data, callback = entry
  100. offset = block.id * self.block_size
  101. self.writer_thread_status[id_] = STATUS_SEEKING
  102. _write_file.seek(offset)
  103. self.writer_thread_status[id_] = STATUS_WRITING
  104. written = _write_file.write(data)
  105. posix_fadvise(_write_file.fileno(), offset, offset + written, os.POSIX_FADV_DONTNEED)
  106. self.writer_thread_status[id_] = STATUS_NOTHING
  107. assert written == len(data)
  108. if callback:
  109. callback()
  110. self._write_queue.task_done()
  111. def _reader(self, id_):
  112. """ self._inqueue contains block_ids to be read.
  113. self._outqueue contains (block_id, data, data_checksum)
  114. """
  115. with open(self.io_name, 'rb') as source_file:
  116. while True:
  117. entry = self._inqueue.get()
  118. if entry is None:
  119. logger.debug("IO {} finishing.".format(id_))
  120. self._outqueue.put(None) # also let the outqueue end
  121. self._inqueue.task_done()
  122. break
  123. block_id, read, metadata = entry
  124. if not read:
  125. self._outqueue.put((block_id, None, None, metadata))
  126. else:
  127. offset = block_id * self.block_size
  128. t1 = time.time()
  129. self.reader_thread_status[id_] = STATUS_SEEKING
  130. source_file.seek(offset)
  131. t2 = time.time()
  132. self.reader_thread_status[id_] = STATUS_READING
  133. data = source_file.read(self.block_size)
  134. t3 = time.time()
  135. # throw away cache
  136. self.reader_thread_status[id_] = STATUS_FADVISE
  137. posix_fadvise(source_file.fileno(), offset, offset + self.block_size, os.POSIX_FADV_DONTNEED)
  138. self.reader_thread_status[id_] = STATUS_NOTHING
  139. if not data:
  140. raise RuntimeError('EOF reached on source when there should be data.')
  141. data_checksum = self.hash_function(data).hexdigest()
  142. self._outqueue.put((block_id, data, data_checksum, metadata))
  143. self._inqueue.task_done()
  144. def read(self, block_id, sync=False, read=True, metadata=None):
  145. """ Adds a read job, passes through metadata.
  146. read False means the real data will not be read."""
  147. self._inqueue.put((block_id, read, metadata))
  148. if sync:
  149. rblock_id, data, data_checksum, metadata = self.get()
  150. if rblock_id != block_id:
  151. raise RuntimeError('Do not mix threaded reading with sync reading!')
  152. return data
  153. def get(self):
  154. d = self._outqueue.get()
  155. self._outqueue.task_done()
  156. return d
  157. def write(self, block, data, callback=None):
  158. """ Adds a write job"""
  159. self._write_queue.put((block, data, callback))
  160. def queue_status(self):
  161. return {
  162. 'rq_filled': self._outqueue.qsize() / self._outqueue.maxsize, # 0..1
  163. 'wq_filled': self._write_queue.qsize() / self._write_queue.maxsize,
  164. }
  165. def thread_status(self):
  166. return "IOR: N{} R{} S{} F{} IQ{} OQ{} IOW: N{} W{} S{} F{} QL{}".format(
  167. len([t for t in self.reader_thread_status.values() if t==STATUS_NOTHING]),
  168. len([t for t in self.reader_thread_status.values() if t==STATUS_READING]),
  169. len([t for t in self.reader_thread_status.values() if t==STATUS_SEEKING]),
  170. len([t for t in self.reader_thread_status.values() if t==STATUS_FADVISE]),
  171. self._inqueue.qsize(),
  172. self._outqueue.qsize(),
  173. len([t for t in self.writer_thread_status.values() if t==STATUS_NOTHING]),
  174. len([t for t in self.writer_thread_status.values() if t==STATUS_WRITING]),
  175. len([t for t in self.writer_thread_status.values() if t==STATUS_SEEKING]),
  176. len([t for t in self.writer_thread_status.values() if t==STATUS_FADVISE]),
  177. self._write_queue.qsize(),
  178. )
  179. def close(self):
  180. if self.mode == 'r':
  181. for _reader_thread in self._reader_threads:
  182. self._inqueue.put(None) # ends the threads
  183. for _reader_thread in self._reader_threads:
  184. _reader_thread.join()
  185. elif self.mode == 'w':
  186. t1 = time.time()
  187. for _writer_thread in self._writer_threads:
  188. self._write_queue.put(None) # ends the threads
  189. for _writer_thread in self._writer_threads:
  190. _writer_thread.join()
  191. t2 = time.time()