PageRenderTime 56ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/scrapy/utils/queue.py

https://github.com/noplay/scrapy
Python | 177 lines | 139 code | 34 blank | 4 comment | 19 complexity | 58258b71a2c20036d15b01e7ac3e463a MD5 | raw file
  1. from __future__ import with_statement
  2. import os
  3. import struct
  4. import glob
  5. from collections import deque
  6. from scrapy.utils.py26 import json
  7. class FifoMemoryQueue(object):
  8. """Memory FIFO queue."""
  9. def __init__(self):
  10. self.q = deque()
  11. def push(self, obj):
  12. self.q.appendleft(obj)
  13. def pop(self):
  14. if self.q:
  15. return self.q.pop()
  16. def close(self):
  17. pass
  18. def __len__(self):
  19. return len(self.q)
  20. class LifoMemoryQueue(FifoMemoryQueue):
  21. """Memory LIFO queue."""
  22. def push(self, obj):
  23. self.q.append(obj)
  24. class FifoDiskQueue(object):
  25. """Persistent FIFO queue."""
  26. szhdr_format = ">L"
  27. szhdr_size = struct.calcsize(szhdr_format)
  28. def __init__(self, path, chunksize=100000):
  29. self.path = path
  30. if not os.path.exists(path):
  31. os.makedirs(path)
  32. self.info = self._loadinfo(chunksize)
  33. self.chunksize = self.info['chunksize']
  34. self.headf = self._openchunk(self.info['head'][0], 'ab+')
  35. self.tailf = self._openchunk(self.info['tail'][0])
  36. self.tailf.seek(self.info['tail'][2])
  37. def push(self, string):
  38. hnum, hpos = self.info['head']
  39. hpos += 1
  40. szhdr = struct.pack(self.szhdr_format, len(string))
  41. os.write(self.headf.fileno(), szhdr + string)
  42. if hpos == self.chunksize:
  43. hpos = 0
  44. hnum += 1
  45. self.headf.close()
  46. self.headf = self._openchunk(hnum, 'ab+')
  47. self.info['size'] += 1
  48. self.info['head'] = hnum, hpos
  49. def _openchunk(self, number, mode='r'):
  50. return open(os.path.join(self.path, 'q%05d' % number), mode)
  51. def pop(self):
  52. tnum, tcnt, toffset = self.info['tail']
  53. if [tnum, tcnt] >= self.info['head']:
  54. return
  55. tfd = self.tailf.fileno()
  56. szhdr = os.read(tfd, self.szhdr_size)
  57. if not szhdr:
  58. return
  59. size, = struct.unpack(self.szhdr_format, szhdr)
  60. data = os.read(tfd, size)
  61. tcnt += 1
  62. toffset += self.szhdr_size + size
  63. if tcnt == self.chunksize and tnum <= self.info['head'][0]:
  64. tcnt = toffset = 0
  65. tnum += 1
  66. self.tailf.close()
  67. os.remove(self.tailf.name)
  68. self.tailf = self._openchunk(tnum)
  69. self.info['size'] -= 1
  70. self.info['tail'] = tnum, tcnt, toffset
  71. return data
  72. def close(self):
  73. self.headf.close()
  74. self.tailf.close()
  75. self._saveinfo(self.info)
  76. if len(self) == 0:
  77. self._cleanup()
  78. def __len__(self):
  79. return self.info['size']
  80. def _loadinfo(self, chunksize):
  81. infopath = self._infopath()
  82. if os.path.exists(infopath):
  83. with open(infopath) as f:
  84. info = json.load(f)
  85. else:
  86. info = {
  87. 'chunksize': chunksize,
  88. 'size': 0,
  89. 'tail': [0, 0, 0],
  90. 'head': [0, 0],
  91. }
  92. return info
  93. def _saveinfo(self, info):
  94. with open(self._infopath(), 'w') as f:
  95. json.dump(info, f)
  96. def _infopath(self):
  97. return os.path.join(self.path, 'info.json')
  98. def _cleanup(self):
  99. for x in glob.glob(os.path.join(self.path, 'q*')):
  100. os.remove(x)
  101. os.remove(os.path.join(self.path, 'info.json'))
  102. if not os.listdir(self.path):
  103. os.rmdir(self.path)
  104. class LifoDiskQueue(object):
  105. """Persistent LIFO queue."""
  106. SIZE_FORMAT = ">L"
  107. SIZE_SIZE = struct.calcsize(SIZE_FORMAT)
  108. def __init__(self, path):
  109. self.path = path
  110. if os.path.exists(path):
  111. self.f = open(path, 'rb+')
  112. qsize = self.f.read(self.SIZE_SIZE)
  113. self.size, = struct.unpack(self.SIZE_FORMAT, qsize)
  114. self.f.seek(0, os.SEEK_END)
  115. else:
  116. self.f = open(path, 'wb+')
  117. self.f.write(struct.pack(self.SIZE_FORMAT, 0))
  118. self.size = 0
  119. def push(self, string):
  120. self.f.write(string)
  121. ssize = struct.pack(self.SIZE_FORMAT, len(string))
  122. self.f.write(ssize)
  123. self.size += 1
  124. def pop(self):
  125. if not self.size:
  126. return
  127. self.f.seek(-self.SIZE_SIZE, os.SEEK_END)
  128. size, = struct.unpack(self.SIZE_FORMAT, self.f.read())
  129. self.f.seek(-size-self.SIZE_SIZE, os.SEEK_END)
  130. data = self.f.read(size)
  131. self.f.seek(-size, os.SEEK_CUR)
  132. self.f.truncate()
  133. self.size -= 1
  134. return data
  135. def close(self):
  136. if self.size:
  137. self.f.seek(0)
  138. self.f.write(struct.pack(self.SIZE_FORMAT, self.size))
  139. self.f.close()
  140. if not self.size:
  141. os.remove(self.path)
  142. def __len__(self):
  143. return self.size