PageRenderTime 50ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 1ms

/rpython/jit/metainterp/optimizeopt/test/test_schedule.py

https://bitbucket.org/pypy/pypy/
Python | 464 lines | 450 code | 11 blank | 3 comment | 7 complexity | b6f8049c9d64c874dcc02b05283b9600 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. import py
  2. from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
  3. from rpython.jit.metainterp.optimizeopt.util import equaloplists
  4. from rpython.jit.metainterp.optimizeopt.renamer import Renamer
  5. from rpython.jit.metainterp.optimizeopt.vector import (VecScheduleState,
  6. Pack, Pair, NotAProfitableLoop, VectorizingOptimizer, X86_CostModel,
  7. PackSet)
  8. from rpython.jit.metainterp.optimizeopt.dependency import Node, DependencyGraph
  9. from rpython.jit.metainterp.optimizeopt.schedule import Scheduler
  10. from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
  11. from rpython.jit.metainterp.optimizeopt.test.test_dependency import (DependencyBaseTest)
  12. from rpython.jit.metainterp.optimizeopt.test.test_vecopt import (FakeMetaInterpStaticData,
  13. FakeJitDriverStaticData, FakePackSet)
  14. from rpython.jit.metainterp.resoperation import rop, ResOperation, VectorizationInfo
  15. from rpython.jit.tool.oparser import parse as opparse
  16. from rpython.jit.tool.oparser_model import get_model
  17. from rpython.jit.backend.detect_cpu import getcpuclass
  18. CPU = getcpuclass()
  19. if not CPU.vector_extension:
  20. py.test.skip("this cpu %s has no implemented vector backend" % CPU)
  21. class FakeVecScheduleState(VecScheduleState):
  22. def __init__(self):
  23. self.expanded_map = {}
  24. class SchedulerBaseTest(DependencyBaseTest):
  25. def setup_class(self):
  26. self.namespace = {
  27. 'double': self.floatarraydescr,
  28. 'float': self.float32arraydescr,
  29. 'long': self.arraydescr,
  30. 'int': self.int32arraydescr,
  31. 'short': self.int16arraydescr,
  32. 'char': self.chararraydescr,
  33. }
  34. def pack(self, loop, l, r, input_type=None, output_type=None):
  35. return Pack(loop.graph.nodes[l:r])
  36. def schedule(self, loop, packs, vec_reg_size=16,
  37. prepend_invariant=False, overwrite_funcs=None):
  38. cm = X86_CostModel(self.cpu, 0)
  39. cm.profitable = lambda: True
  40. pairs = []
  41. for pack in packs:
  42. for i in range(len(pack.operations)-1):
  43. o1 = pack.operations[i]
  44. o2 = pack.operations[i+1]
  45. pair = Pair(o1,o2)
  46. pairs.append(pair)
  47. packset = FakePackSet(pairs)
  48. state = VecScheduleState(loop.graph, packset, self.cpu, cm)
  49. for name, overwrite in (overwrite_funcs or {}).items():
  50. setattr(state, name, overwrite)
  51. renamer = Renamer()
  52. metainterp_sd = FakeMetaInterpStaticData(self.cpu)
  53. jitdriver_sd = FakeJitDriverStaticData()
  54. opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, 0)
  55. opt.packset = packset
  56. opt.combine_packset()
  57. opt.schedule(state)
  58. # works for now. might be the wrong class?
  59. # wrap label + operations + jump it in tree loop otherwise
  60. loop = state.graph.loop
  61. if prepend_invariant:
  62. loop.operations = loop.prefix + loop.operations
  63. return loop
  64. class Test(SchedulerBaseTest, LLtypeMixin):
  65. def test_next_must_not_loop_forever(self):
  66. scheduler = Scheduler()
  67. def delay(node, state):
  68. node.count += 1
  69. return True
  70. scheduler.delay = delay
  71. class State(object): pass
  72. class Node(object): emitted = False; pack = None; count = 0
  73. state = State()
  74. state.worklist = [Node(), Node(), Node(), Node(), Node()]
  75. assert scheduler.next(state) is None
  76. for node in state.worklist:
  77. assert node.count == 1
  78. # must return here, then the test passed
  79. def test_split_pack(self):
  80. loop1 = self.parse_trace("""
  81. f10 = raw_load_f(p0, i0, descr=double)
  82. f11 = raw_load_f(p0, i1, descr=double)
  83. f12 = raw_load_f(p0, i2, descr=double)
  84. """)
  85. ps = PackSet(16)
  86. ps.packs = [self.pack(loop1, 0, 3)]
  87. op1 = ps.packs[0].operations[0]
  88. op2 = ps.packs[0].operations[1]
  89. ps.split_overloaded_packs()
  90. assert len(ps.packs) == 1
  91. assert ps.packs[0].leftmost() is op1.getoperation()
  92. assert ps.packs[0].rightmost() is op2.getoperation()
  93. def test_schedule_split_load(self):
  94. loop1 = self.parse_trace("""
  95. f10 = raw_load_f(p0, i0, descr=float)
  96. f11 = raw_load_f(p0, i1, descr=float)
  97. f12 = raw_load_f(p0, i2, descr=float)
  98. f13 = raw_load_f(p0, i3, descr=float)
  99. f14 = raw_load_f(p0, i4, descr=float)
  100. f15 = raw_load_f(p0, i5, descr=float)
  101. """)
  102. pack1 = self.pack(loop1, 0, 6)
  103. loop2 = self.schedule(loop1, [pack1])
  104. loop3 = self.parse_trace("""
  105. v10[4xi32] = vec_raw_load_f(p0, i0, descr=float)
  106. f10 = raw_load_f(p0, i4, descr=float)
  107. f11 = raw_load_f(p0, i5, descr=float)
  108. """, False)
  109. self.assert_equal(loop2, loop3)
  110. def test_int_to_float(self):
  111. loop1 = self.parse_trace("""
  112. i10 = raw_load_i(p0, i0, descr=long)
  113. i11 = raw_load_i(p0, i1, descr=long)
  114. i12 = int_signext(i10, 4)
  115. i13 = int_signext(i11, 4)
  116. f10 = cast_int_to_float(i12)
  117. f11 = cast_int_to_float(i13)
  118. """)
  119. pack1 = self.pack(loop1, 0, 2)
  120. pack2 = self.pack(loop1, 2, 4)
  121. pack3 = self.pack(loop1, 4, 6)
  122. loop2 = self.schedule(loop1, [pack1, pack2, pack3])
  123. loop3 = self.parse_trace("""
  124. v10[2xi64] = vec_raw_load_i(p0, i0, descr=long)
  125. v20[2xi32] = vec_int_signext(v10[2xi64], 4)
  126. v30[2xf64] = vec_cast_int_to_float(v20[2xi32])
  127. """, False)
  128. self.assert_equal(loop2, loop3)
  129. def test_scalar_pack(self):
  130. loop1 = self.parse_trace("""
  131. i10 = int_add(i0, 73)
  132. i11 = int_add(i1, 73)
  133. """)
  134. pack1 = self.pack(loop1, 0, 2)
  135. loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
  136. loop3 = self.parse_trace("""
  137. v10[0xi64] = vec_i()
  138. v20[1xi64] = vec_pack_i(v10[2xi64], i0, 0, 1)
  139. v30[2xi64] = vec_pack_i(v20[2xi64], i1, 1, 1)
  140. v40[2xi64] = vec_expand_i(73)
  141. #
  142. v50[2xi64] = vec_int_add(v30[2xi64], v40[2xi64])
  143. """, False)
  144. self.assert_equal(loop2, loop3)
  145. loop1 = self.parse_trace("""
  146. f10 = float_add(f0, 73.0)
  147. f11 = float_add(f1, 73.0)
  148. """)
  149. pack1 = self.pack(loop1, 0, 2)
  150. loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
  151. loop3 = self.parse_trace("""
  152. v10[0xf64] = vec_f()
  153. v20[1xf64] = vec_pack_f(v10[2xf64], f0, 0, 1)
  154. v30[2xf64] = vec_pack_f(v20[2xf64], f1, 1, 1)
  155. v40[2xf64] = vec_expand_f(73.0)
  156. #
  157. v50[2xf64] = vec_float_add(v30[2xf64], v40[2xf64])
  158. """, False)
  159. self.assert_equal(loop2, loop3)
  160. def test_scalar_remember_expansion(self):
  161. loop1 = self.parse_trace("""
  162. f10 = float_add(f0, f5)
  163. f11 = float_add(f1, f5)
  164. f12 = float_add(f10, f5)
  165. f13 = float_add(f11, f5)
  166. """)
  167. pack1 = self.pack(loop1, 0, 2)
  168. pack2 = self.pack(loop1, 2, 4)
  169. loop2 = self.schedule(loop1, [pack1, pack2], prepend_invariant=True)
  170. loop3 = self.parse_trace("""
  171. v10[0xf64] = vec_f()
  172. v20[1xf64] = vec_pack_f(v10[2xf64], f0, 0, 1)
  173. v30[2xf64] = vec_pack_f(v20[2xf64], f1, 1, 1)
  174. v40[2xf64] = vec_expand_f(f5) # only expaned once
  175. #
  176. v50[2xf64] = vec_float_add(v30[2xf64], v40[2xf64])
  177. v60[2xf64] = vec_float_add(v50[2xf64], v40[2xf64])
  178. """, False)
  179. self.assert_equal(loop2, loop3)
  180. def find_input_arg(self, name, loop):
  181. for arg in loop.inputargs:
  182. if str(arg).startswith(name):
  183. return arg
  184. raise Exception("could not find %s in args %s" % (name, loop.inputargs))
  185. def test_signext_int32(self):
  186. loop1 = self.parse_trace("""
  187. i10 = int_signext(i1, 4)
  188. i11 = int_signext(i1, 4)
  189. """, additional_args=['v10[2xi64]'])
  190. pack1 = self.pack(loop1, 0, 2)
  191. var = loop1.inputargs[-1]
  192. vi = VectorizationInfo(None)
  193. vi.datatype = 'i'
  194. vi.bytesize = 8
  195. vi.count = 2
  196. vi.signed = True
  197. var.set_forwarded(vi)
  198. loop2 = self.schedule(loop1, [pack1], prepend_invariant=True,
  199. overwrite_funcs = {
  200. 'getvector_of_box': lambda v: (0, var),
  201. })
  202. loop3 = self.parse_trace("""
  203. v11[2xi32] = vec_int_signext(v10[2xi64], 4)
  204. """, False, additional_args=['v10[2xi64]'])
  205. self.assert_equal(loop2, loop3)
  206. def test_cast_float_to_int(self):
  207. loop1 = self.parse_trace("""
  208. f10 = raw_load_f(p0, i1, descr=double)
  209. f11 = raw_load_f(p0, i2, descr=double)
  210. f12 = raw_load_f(p0, i3, descr=double)
  211. f13 = raw_load_f(p0, i4, descr=double)
  212. f14 = raw_load_f(p0, i5, descr=double)
  213. f15 = raw_load_f(p0, i6, descr=double)
  214. f16 = raw_load_f(p0, i7, descr=double)
  215. f17 = raw_load_f(p0, i8, descr=double)
  216. #
  217. i10 = cast_float_to_int(f10)
  218. i11 = cast_float_to_int(f11)
  219. i12 = cast_float_to_int(f12)
  220. i13 = cast_float_to_int(f13)
  221. i14 = cast_float_to_int(f14)
  222. i15 = cast_float_to_int(f15)
  223. i16 = cast_float_to_int(f16)
  224. i17 = cast_float_to_int(f17)
  225. #
  226. i18 = int_signext(i10, 2)
  227. i19 = int_signext(i11, 2)
  228. i20 = int_signext(i12, 2)
  229. i21 = int_signext(i13, 2)
  230. i22 = int_signext(i14, 2)
  231. i23 = int_signext(i15, 2)
  232. i24 = int_signext(i16, 2)
  233. i25 = int_signext(i17, 2)
  234. #
  235. raw_store(p1, i1, i18, descr=short)
  236. raw_store(p1, i2, i19, descr=short)
  237. raw_store(p1, i3, i20, descr=short)
  238. raw_store(p1, i4, i21, descr=short)
  239. raw_store(p1, i5, i22, descr=short)
  240. raw_store(p1, i6, i23, descr=short)
  241. raw_store(p1, i7, i24, descr=short)
  242. raw_store(p1, i8, i25, descr=short)
  243. """)
  244. pack1 = self.pack(loop1, 0, 8)
  245. pack2 = self.pack(loop1, 8, 16)
  246. pack3 = self.pack(loop1, 16, 24)
  247. pack4 = self.pack(loop1, 24, 32)
  248. def void(b,c):
  249. pass
  250. loop2 = self.schedule(loop1, [pack1,pack2,pack3,pack4],
  251. overwrite_funcs={
  252. '_prevent_signext': void
  253. })
  254. loop3 = self.parse_trace("""
  255. v10[2xf64] = vec_raw_load_f(p0, i1, descr=double)
  256. v11[2xf64] = vec_raw_load_f(p0, i3, descr=double)
  257. v12[2xf64] = vec_raw_load_f(p0, i5, descr=double)
  258. v13[2xf64] = vec_raw_load_f(p0, i7, descr=double)
  259. v14[2xi32] = vec_cast_float_to_int(v10[2xf64])
  260. v15[2xi32] = vec_cast_float_to_int(v11[2xf64])
  261. v16[2xi32] = vec_cast_float_to_int(v12[2xf64])
  262. v17[2xi32] = vec_cast_float_to_int(v13[2xf64])
  263. v22[4xi32] = vec_pack_i(v14[2xi32], v15[2xi32], 2, 2)
  264. v18[4xi16] = vec_int_signext(v22[4xi32],2)
  265. v23[6xi16] = vec_pack_i(v16[2xi32], v17[2xi32], 2, 2)
  266. v20[4xi16] = vec_int_signext(v23[4xi32],2)
  267. v24[8xi16] = vec_pack_i(v18[4xi16], v20[4xi16], 4, 4)
  268. vec_raw_store(p1, i1, v24[8xi16], descr=short)
  269. """, False)
  270. self.assert_equal(loop2, loop3)
  271. def test_cast_float_to_single_float(self):
  272. loop1 = self.parse_trace("""
  273. f10 = raw_load_f(p0, i1, descr=double)
  274. f11 = raw_load_f(p0, i2, descr=double)
  275. f12 = raw_load_f(p0, i3, descr=double)
  276. f13 = raw_load_f(p0, i4, descr=double)
  277. #
  278. i10 = cast_float_to_singlefloat(f10)
  279. i11 = cast_float_to_singlefloat(f11)
  280. i12 = cast_float_to_singlefloat(f12)
  281. i13 = cast_float_to_singlefloat(f13)
  282. #
  283. raw_store(p1, i1, i10, descr=float)
  284. raw_store(p1, i2, i11, descr=float)
  285. raw_store(p1, i3, i12, descr=float)
  286. raw_store(p1, i4, i13, descr=float)
  287. """)
  288. pack1 = self.pack(loop1, 0, 4)
  289. pack2 = self.pack(loop1, 4, 8)
  290. pack3 = self.pack(loop1, 8, 12)
  291. loop2 = self.schedule(loop1, [pack1,pack2,pack3])
  292. loop3 = self.parse_trace("""
  293. v44[2xf64] = vec_raw_load_f(p0, i1, descr=double)
  294. v45[2xf64] = vec_raw_load_f(p0, i3, descr=double)
  295. v46[2xi32] = vec_cast_float_to_singlefloat(v44[2xf64])
  296. v47[2xi32] = vec_cast_float_to_singlefloat(v45[2xf64])
  297. v41[4xi32] = vec_pack_i(v46[2xi32], v47[2xi32], 2, 2)
  298. vec_raw_store(p1, i1, v41[4xi32], descr=float)
  299. """, False)
  300. self.assert_equal(loop2, loop3)
  301. def test_all(self):
  302. loop1 = self.parse_trace("""
  303. i10 = raw_load_i(p0, i1, descr=long)
  304. i11 = raw_load_i(p0, i2, descr=long)
  305. #
  306. i12 = int_and(i10, 255)
  307. i13 = int_and(i11, 255)
  308. #
  309. guard_true(i12) []
  310. guard_true(i13) []
  311. """)
  312. pack1 = self.pack(loop1, 0, 2)
  313. pack2 = self.pack(loop1, 2, 4)
  314. pack3 = self.pack(loop1, 4, 6)
  315. loop2 = self.schedule(loop1, [pack1,pack2,pack3], prepend_invariant=True)
  316. loop3 = self.parse_trace("""
  317. v9[2xi64] = vec_expand_i(255)
  318. v10[2xi64] = vec_raw_load_i(p0, i1, descr=long)
  319. v11[2xi64] = vec_int_and(v10[2xi64], v9[2xi64])
  320. vec_guard_true(v11[2xi64]) []
  321. """, False)
  322. self.assert_equal(loop2, loop3)
  323. def test_split_load_store(self):
  324. loop1 = self.parse_trace("""
  325. i10 = raw_load_i(p0, i1, descr=float)
  326. i11 = raw_load_i(p0, i2, descr=float)
  327. i12 = raw_load_i(p0, i3, descr=float)
  328. i13 = raw_load_i(p0, i4, descr=float)
  329. raw_store(p0, i3, i10, descr=float)
  330. raw_store(p0, i4, i11, descr=float)
  331. """)
  332. pack1 = self.pack(loop1, 0, 4)
  333. pack2 = self.pack(loop1, 4, 6)
  334. loop2 = self.schedule(loop1, [pack1,pack2], prepend_invariant=True)
  335. loop3 = self.parse_trace("""
  336. v1[4xi32] = vec_raw_load_i(p0, i1, descr=float)
  337. i10 = vec_unpack_i(v1[4xi32], 0, 1)
  338. raw_store(p0, i3, i10, descr=float)
  339. i11 = vec_unpack_i(v1[4xi32], 1, 1)
  340. raw_store(p0, i4, i11, descr=float)
  341. """, False)
  342. # unfortunate ui32 is the type for float32... the unsigned u is for
  343. # the tests
  344. self.assert_equal(loop2, loop3)
  345. def test_split_arith(self):
  346. loop1 = self.parse_trace("""
  347. i10 = int_and(255, i1)
  348. i11 = int_and(255, i1)
  349. """)
  350. pack1 = self.pack(loop1, 0, 2)
  351. loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
  352. loop3 = self.parse_trace("""
  353. v1[2xi64] = vec_expand_i(255)
  354. v2[2xi64] = vec_expand_i(i1)
  355. v3[2xi64] = vec_int_and(v1[2xi64], v2[2xi64])
  356. """, False)
  357. self.assert_equal(loop2, loop3)
  358. def test_split_arith(self):
  359. loop1 = self.parse_trace("""
  360. i10 = int_and(255, i1)
  361. i11 = int_and(255, i1)
  362. """)
  363. pack1 = self.pack(loop1, 0, 2)
  364. loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
  365. loop3 = self.parse_trace("""
  366. v1[2xi64] = vec_expand_i(255)
  367. v2[2xi64] = vec_expand_i(i1)
  368. v3[2xi64] = vec_int_and(v1[2xi64], v2[2xi64])
  369. """, False)
  370. self.assert_equal(loop2, loop3)
  371. def test_no_vec_impl(self):
  372. loop1 = self.parse_trace("""
  373. i10 = int_and(255, i1)
  374. i11 = int_and(255, i2)
  375. i12 = call_pure_i(321, i10)
  376. i13 = call_pure_i(321, i11)
  377. i14 = int_and(i1, i12)
  378. i15 = int_and(i2, i13)
  379. """)
  380. pack1 = self.pack(loop1, 0, 2)
  381. pack4 = self.pack(loop1, 4, 6)
  382. loop2 = self.schedule(loop1, [pack1,pack4], prepend_invariant=True)
  383. loop3 = self.parse_trace("""
  384. v1[2xi64] = vec_expand_i(255)
  385. v2[0xi64] = vec_i()
  386. v3[1xi64] = vec_pack_i(v2[2xi64], i1, 0, 1)
  387. v4[2xi64] = vec_pack_i(v3[2xi64], i2, 1, 1)
  388. v5[2xi64] = vec_int_and(v1[2xi64], v4[2xi64])
  389. i10 = vec_unpack_i(v5[2xi64], 0, 1)
  390. i12 = call_pure_i(321, i10)
  391. i11 = vec_unpack_i(v5[2xi64], 1, 1)
  392. i13 = call_pure_i(321, i11)
  393. v6[0xi64] = vec_i()
  394. v7[1xi64] = vec_pack_i(v6[2xi64], i12, 0, 1)
  395. v8[2xi64] = vec_pack_i(v7[2xi64], i13, 1, 1)
  396. v9[2xi64] = vec_int_and(v4[2xi64], v8[i64])
  397. """, False)
  398. self.assert_equal(loop2, loop3)
  399. def test_split_cast(self):
  400. trace = self.parse_trace("""
  401. f10 = cast_int_to_float(i1)
  402. f11 = cast_int_to_float(i2)
  403. f12 = cast_int_to_float(i3)
  404. f13 = cast_int_to_float(i4)
  405. """)
  406. pack = self.pack(trace, 0, 4)
  407. packs = []
  408. pack.split(packs, 16)
  409. packs.append(pack)
  410. assert len(packs) == 2
  411. def test_combine_packset_nearly_empty_pack(self):
  412. trace = self.parse_trace("""
  413. i10 = int_add(i1, i1)
  414. i11 = int_add(i2, i2)
  415. i12 = int_add(i3, i3)
  416. """)
  417. pack = self.pack(trace, 0, 2)
  418. packset = FakePackSet([pack])
  419. packset.split_overloaded_packs()
  420. assert len(packset.packs) == 1
  421. def test_expand(self):
  422. state = FakeVecScheduleState()
  423. assert state.find_expanded([]) == None
  424. state.expand(['a'], 'a')
  425. assert state.find_expanded(['a']) == 'a'
  426. state.expand(['a','b','c'], 'abc')
  427. assert state.find_expanded(['a','b','c']) == 'abc'
  428. state.expand(['a','d','c'], 'adc')
  429. assert state.find_expanded(['a','b','c']) == 'abc'
  430. assert state.find_expanded(['a','d','c']) == 'adc'
  431. assert state.find_expanded(['d','d','c']) == None
  432. state.expand(['d','d','c'], 'ddc')
  433. assert state.find_expanded(['d','d','c']) == 'ddc'