PageRenderTime 54ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/jit/backend/llsupport/asmmemmgr.py

https://bitbucket.org/pypy/pypy/
Python | 322 lines | 259 code | 30 blank | 33 comment | 33 complexity | d428cd36357e5bf5b6f512fe57fd7b46 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. import sys
  2. from pypy.rlib.rarithmetic import intmask, r_uint, LONG_BIT
  3. from pypy.rlib.objectmodel import we_are_translated
  4. from pypy.rlib import rmmap
  5. from pypy.rlib.debug import debug_start, debug_print, debug_stop
  6. from pypy.rlib.debug import have_debug_prints
  7. from pypy.rpython.lltypesystem import lltype, llmemory, rffi
  8. class AsmMemoryManager(object):
  9. LARGE_ALLOC_SIZE = 1024 * 1024 # 1MB
  10. MIN_FRAGMENT = 64
  11. NUM_INDICES = 32 # good for all sizes between 64 bytes and ~490 KB
  12. _allocated = None
  13. def __init__(self, large_alloc_size = LARGE_ALLOC_SIZE,
  14. min_fragment = MIN_FRAGMENT,
  15. num_indices = NUM_INDICES):
  16. self.total_memory_allocated = r_uint(0)
  17. self.total_mallocs = r_uint(0)
  18. self.large_alloc_size = large_alloc_size
  19. self.min_fragment = min_fragment
  20. self.num_indices = num_indices
  21. self.free_blocks = {} # map {start: stop}
  22. self.free_blocks_end = {} # map {stop: start}
  23. self.blocks_by_size = [[] for i in range(self.num_indices)]
  24. def malloc(self, minsize, maxsize):
  25. """Allocate executable memory, between minsize and maxsize bytes,
  26. and return a pair (start, stop). Does not perform any rounding
  27. of minsize and maxsize.
  28. """
  29. result = self._allocate_block(minsize)
  30. (start, stop) = result
  31. smaller_stop = start + maxsize
  32. if smaller_stop + self.min_fragment <= stop:
  33. self._add_free_block(smaller_stop, stop)
  34. stop = smaller_stop
  35. result = (start, stop)
  36. self.total_mallocs += r_uint(stop - start)
  37. return result # pair (start, stop)
  38. def free(self, start, stop):
  39. """Free a block (start, stop) returned by a previous malloc()."""
  40. self.total_mallocs -= r_uint(stop - start)
  41. self._add_free_block(start, stop)
  42. def open_malloc(self, minsize):
  43. """Allocate at least minsize bytes. Returns (start, stop)."""
  44. result = self._allocate_block(minsize)
  45. (start, stop) = result
  46. self.total_mallocs += r_uint(stop - start)
  47. return result
  48. def open_free(self, middle, stop):
  49. """Used for freeing the end of an open-allocated block of memory."""
  50. if stop - middle >= self.min_fragment:
  51. self.total_mallocs -= r_uint(stop - middle)
  52. self._add_free_block(middle, stop)
  53. return True
  54. else:
  55. return False # too small to record
  56. def _allocate_large_block(self, minsize):
  57. # Compute 'size' from 'minsize': it must be rounded up to
  58. # 'large_alloc_size'. Additionally, we use the following line
  59. # to limit how many mmap() requests the OS will see in total:
  60. minsize = max(minsize, intmask(self.total_memory_allocated >> 4))
  61. size = minsize + self.large_alloc_size - 1
  62. size = (size // self.large_alloc_size) * self.large_alloc_size
  63. data = rmmap.alloc(size)
  64. if not we_are_translated():
  65. if self._allocated is None:
  66. self._allocated = []
  67. self._allocated.append((data, size))
  68. if sys.maxint > 2147483647:
  69. # Hack to make sure that mcs are not within 32-bits of one
  70. # another for testing purposes
  71. rmmap.hint.pos += 0x80000000 - size
  72. self.total_memory_allocated += r_uint(size)
  73. data = rffi.cast(lltype.Signed, data)
  74. return self._add_free_block(data, data + size)
  75. def _get_index(self, length):
  76. i = 0
  77. while length > self.min_fragment:
  78. length = (length * 3) >> 2
  79. i += 1
  80. if i == self.num_indices - 1:
  81. break
  82. return i
  83. def _add_free_block(self, start, stop):
  84. # Merge with the block on the left
  85. if start in self.free_blocks_end:
  86. left_start = self.free_blocks_end[start]
  87. self._del_free_block(left_start, start)
  88. start = left_start
  89. # Merge with the block on the right
  90. if stop in self.free_blocks:
  91. right_stop = self.free_blocks[stop]
  92. self._del_free_block(stop, right_stop)
  93. stop = right_stop
  94. # Add it to the dicts
  95. self.free_blocks[start] = stop
  96. self.free_blocks_end[stop] = start
  97. i = self._get_index(stop - start)
  98. self.blocks_by_size[i].append(start)
  99. return start
  100. def _del_free_block(self, start, stop):
  101. del self.free_blocks[start]
  102. del self.free_blocks_end[stop]
  103. i = self._get_index(stop - start)
  104. self.blocks_by_size[i].remove(start)
  105. def _allocate_block(self, length):
  106. # First look in the group of index i0 if there is a block that is
  107. # big enough. Following an idea found in the Linux malloc.c, we
  108. # prefer the oldest entries rather than the newest one, to let
  109. # them have enough time to coalesce into bigger blocks. It makes
  110. # a big difference on the purely random test (30% of total usage).
  111. i0 = self._get_index(length)
  112. bbs = self.blocks_by_size[i0]
  113. for j in range(len(bbs)):
  114. start = bbs[j]
  115. stop = self.free_blocks[start]
  116. if start + length <= stop:
  117. del bbs[j]
  118. break # found a block big enough
  119. else:
  120. # Then look in the larger groups
  121. i = i0 + 1
  122. while i < self.num_indices:
  123. if len(self.blocks_by_size[i]) > 0:
  124. # any block found in a larger group is big enough
  125. start = self.blocks_by_size[i].pop(0)
  126. stop = self.free_blocks[start]
  127. break
  128. i += 1
  129. else:
  130. # Exhausted the memory. Allocate the resulting block.
  131. start = self._allocate_large_block(length)
  132. stop = self.free_blocks[start]
  133. i = self._get_index(stop - start)
  134. assert self.blocks_by_size[i][-1] == start
  135. self.blocks_by_size[i].pop()
  136. #
  137. del self.free_blocks[start]
  138. del self.free_blocks_end[stop]
  139. return (start, stop)
  140. def _delete(self):
  141. "NOT_RPYTHON"
  142. if self._allocated:
  143. for data, size in self._allocated:
  144. rmmap.free(data, size)
  145. self._allocated = None
  146. class MachineDataBlockWrapper(object):
  147. def __init__(self, asmmemmgr, allblocks):
  148. self.asmmemmgr = asmmemmgr
  149. self.allblocks = allblocks
  150. self.rawstart = 0
  151. self.rawposition = 0
  152. self.rawstop = 0
  153. def done(self):
  154. if self.rawstart != 0:
  155. if self.asmmemmgr.open_free(self.rawposition, self.rawstop):
  156. self.rawstop = self.rawposition
  157. self.allblocks.append((self.rawstart, self.rawstop))
  158. self.rawstart = 0
  159. self.rawposition = 0
  160. self.rawstop = 0
  161. def _allocate_next_block(self, minsize):
  162. self.done()
  163. self.rawstart, self.rawstop = self.asmmemmgr.open_malloc(minsize)
  164. self.rawposition = self.rawstart
  165. def malloc_aligned(self, size, alignment):
  166. p = self.rawposition
  167. p = (p + alignment - 1) & (-alignment)
  168. if p + size > self.rawstop:
  169. self._allocate_next_block(size + alignment - 1)
  170. p = self.rawposition
  171. p = (p + alignment - 1) & (-alignment)
  172. assert p + size <= self.rawstop
  173. self.rawposition = p + size
  174. return p
  175. class BlockBuilderMixin(object):
  176. _mixin_ = True
  177. # A base class to generate assembler. It is equivalent to just a list
  178. # of chars, but it is potentially more efficient for that usage.
  179. # It works by allocating the assembler SUBBLOCK_SIZE bytes at a time.
  180. # Ideally, this number should be a power of two that fits the GC's most
  181. # compact allocation scheme (which is so far 35 * WORD for minimark.py).
  182. WORD = LONG_BIT // 8
  183. SUBBLOCK_SIZE = 32 * WORD
  184. SUBBLOCK_PTR = lltype.Ptr(lltype.GcForwardReference())
  185. SUBBLOCK = lltype.GcStruct('SUBBLOCK',
  186. ('prev', SUBBLOCK_PTR),
  187. ('data', lltype.FixedSizeArray(lltype.Char, SUBBLOCK_SIZE)))
  188. SUBBLOCK_PTR.TO.become(SUBBLOCK)
  189. gcroot_markers = None
  190. def __init__(self, translated=None):
  191. if translated is None:
  192. translated = we_are_translated()
  193. if translated:
  194. self.init_block_builder()
  195. else:
  196. self._become_a_plain_block_builder()
  197. def init_block_builder(self):
  198. self._cursubblock = lltype.nullptr(self.SUBBLOCK)
  199. self._baserelpos = -self.SUBBLOCK_SIZE
  200. self._make_new_subblock()
  201. def _make_new_subblock(self):
  202. nextsubblock = lltype.malloc(self.SUBBLOCK)
  203. nextsubblock.prev = self._cursubblock
  204. self._cursubblock = nextsubblock
  205. self._cursubindex = 0
  206. self._baserelpos += self.SUBBLOCK_SIZE
  207. _make_new_subblock._dont_inline_ = True
  208. def writechar(self, char):
  209. index = self._cursubindex
  210. if index == self.SUBBLOCK_SIZE:
  211. self._make_new_subblock()
  212. index = 0
  213. self._cursubblock.data[index] = char
  214. self._cursubindex = index + 1
  215. def overwrite(self, index, char):
  216. assert 0 <= index < self.get_relative_pos()
  217. block = self._cursubblock
  218. index -= self._baserelpos
  219. while index < 0:
  220. block = block.prev
  221. index += self.SUBBLOCK_SIZE
  222. block.data[index] = char
  223. def get_relative_pos(self):
  224. return self._baserelpos + self._cursubindex
  225. def copy_to_raw_memory(self, addr):
  226. # indirection for _become_a_plain_block_builder() and for subclasses
  227. self._copy_to_raw_memory(addr)
  228. def _copy_to_raw_memory(self, addr):
  229. block = self._cursubblock
  230. blocksize = self._cursubindex
  231. targetindex = self._baserelpos
  232. while targetindex >= 0:
  233. dst = rffi.cast(rffi.CCHARP, addr + targetindex)
  234. for j in range(blocksize):
  235. dst[j] = block.data[j]
  236. block = block.prev
  237. blocksize = self.SUBBLOCK_SIZE
  238. targetindex -= self.SUBBLOCK_SIZE
  239. assert not block
  240. def _dump(self, addr, logname, backend=None):
  241. debug_start(logname)
  242. if have_debug_prints():
  243. #
  244. if backend is not None:
  245. debug_print('BACKEND', backend)
  246. #
  247. from pypy.jit.backend.hlinfo import highleveljitinfo
  248. if highleveljitinfo.sys_executable:
  249. debug_print('SYS_EXECUTABLE', highleveljitinfo.sys_executable)
  250. #
  251. HEX = '0123456789ABCDEF'
  252. dump = []
  253. src = rffi.cast(rffi.CCHARP, addr)
  254. for p in range(self.get_relative_pos()):
  255. o = ord(src[p])
  256. dump.append(HEX[o >> 4])
  257. dump.append(HEX[o & 15])
  258. debug_print('CODE_DUMP',
  259. '@%x' % addr,
  260. '+0 ', # backwards compatibility
  261. ''.join(dump))
  262. #
  263. debug_stop(logname)
  264. def materialize(self, asmmemmgr, allblocks, gcrootmap=None):
  265. size = self.get_relative_pos()
  266. malloced = asmmemmgr.malloc(size, size)
  267. allblocks.append(malloced)
  268. rawstart = malloced[0]
  269. self.copy_to_raw_memory(rawstart)
  270. if self.gcroot_markers is not None:
  271. assert gcrootmap is not None
  272. for pos, mark in self.gcroot_markers:
  273. gcrootmap.put(rawstart + pos, mark)
  274. return rawstart
  275. def _become_a_plain_block_builder(self):
  276. # hack purely for speed of tests
  277. self._data = []
  278. self.writechar = self._data.append
  279. self.overwrite = self._data.__setitem__
  280. self.get_relative_pos = self._data.__len__
  281. def plain_copy_to_raw_memory(addr):
  282. dst = rffi.cast(rffi.CCHARP, addr)
  283. for i, c in enumerate(self._data):
  284. dst[i] = c
  285. self._copy_to_raw_memory = plain_copy_to_raw_memory
  286. def insert_gcroot_marker(self, mark):
  287. if self.gcroot_markers is None:
  288. self.gcroot_markers = []
  289. self.gcroot_markers.append((self.get_relative_pos(), mark))