PageRenderTime 63ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py

https://bitbucket.org/pypy/pypy/
Python | 1367 lines | 1345 code | 19 blank | 3 comment | 9 complexity | 70fa6b533b21c29894f0650102494057 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. import py
  2. import pytest
  3. from rpython.rlib.objectmodel import instantiate
  4. from rpython.jit.metainterp.optimizeopt.test.test_util import (LLtypeMixin,
  5. FakeMetaInterpStaticData, convert_old_style_to_targets,
  6. FakeWarmState)
  7. from rpython.jit.metainterp.optimizeopt.test.test_dependency import DependencyBaseTest
  8. from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
  9. from rpython.jit.metainterp.optimizeopt import optimize_trace
  10. import rpython.jit.metainterp.optimizeopt.optimizer as optimizeopt
  11. import rpython.jit.metainterp.optimizeopt.virtualize as virtualize
  12. from rpython.jit.metainterp.optimizeopt.dependency import DependencyGraph
  13. from rpython.jit.metainterp.optimizeopt.vector import (VectorizingOptimizer,
  14. MemoryRef, isomorphic, Pair, NotAVectorizeableLoop, VectorLoop,
  15. NotAProfitableLoop, GuardStrengthenOpt, CostModel, X86_CostModel,
  16. PackSet, optimize_vector)
  17. from rpython.jit.metainterp.optimizeopt.schedule import (Scheduler,
  18. SchedulerState, VecScheduleState, Pack)
  19. from rpython.jit.metainterp.optimizeopt.optimizer import BasicLoopInfo
  20. from rpython.jit.metainterp.optimize import InvalidLoop
  21. from rpython.jit.metainterp import compile
  22. from rpython.jit.metainterp.resoperation import rop, ResOperation
  23. from rpython.jit.metainterp.optimizeopt.version import LoopVersionInfo
  24. from rpython.jit.backend.llsupport.descr import ArrayDescr
  25. from rpython.jit.metainterp.optimizeopt.dependency import Node, DependencyGraph
  26. from rpython.jit.tool.oparser import OpParser, convert_loop_to_trace
  27. from rpython.jit.backend.detect_cpu import getcpuclass
  28. CPU = getcpuclass()
  29. if not CPU.vector_extension:
  30. py.test.skip("this cpu %s has no implemented vector backend" % CPU)
  31. class FakeJitDriverStaticData(object):
  32. vec=True
  33. class FakePackSet(PackSet):
  34. def __init__(self, packs):
  35. self.packs = packs
  36. self.vec_reg_size = 16
  37. class FakeLoopInfo(LoopVersionInfo):
  38. def __init__(self, loop):
  39. self.target_token = loop.label.getdescr()
  40. self.label_op = loop.label
  41. self.insert_index = -1
  42. self.versions = []
  43. self.leads_to = {}
  44. self.descrs = []
  45. class FakeCostModel(CostModel):
  46. def __init__(self, cpu):
  47. CostModel.__init__(self, cpu, 16)
  48. def record_cast_int(self): pass
  49. def record_pack_savings(self, pack, times): pass
  50. def record_vector_pack(self, box, index, count): pass
  51. def record_vector_unpack(self, box, index, count): pass
  52. def unpack_cost(self, op, index, count): pass
  53. def savings_for_pack(self, pack, times): pass
  54. def profitable(self):
  55. return True
  56. def index_of_first(opnum, operations, pass_by=0):
  57. for i,op in enumerate(operations):
  58. if op.getopnum() == opnum:
  59. if pass_by == 0:
  60. return i
  61. else:
  62. pass_by -= 1
  63. return -1
  64. def find_first_index(loop, opnum, pass_by=0):
  65. """ return the first index of the operation having the same opnum or -1 """
  66. return index_of_first(opnum, loop.operations, pass_by)
  67. ARCH_VEC_REG_SIZE = 16
  68. class FakeWarmState(object):
  69. vec_all = False
  70. vec_cost = 0
  71. class VecTestHelper(DependencyBaseTest):
  72. enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap"
  73. jitdriver_sd = FakeJitDriverStaticData()
  74. def assert_vectorize(self, loop, expected_loop, call_pure_results=None):
  75. jump = ResOperation(rop.JUMP, loop.jump.getarglist(), loop.jump.getdescr())
  76. metainterp_sd = FakeMetaInterpStaticData(self.cpu)
  77. warmstate = FakeWarmState()
  78. loop.operations += [loop.jump]
  79. loop_info = BasicLoopInfo(loop.jump.getarglist(), None, jump)
  80. loop_info.label_op = ResOperation(rop.LABEL, loop.jump.getarglist(), loop.jump.getdescr())
  81. optimize_vector(None, metainterp_sd, self.jitdriver_sd, warmstate,
  82. loop_info, loop.operations)
  83. loop.operations = loop.operations[:-1]
  84. #loop.label = state[0].label_op
  85. #loop.operations = state[1]
  86. self.assert_equal(loop, expected_loop)
  87. def vectoroptimizer(self, loop):
  88. metainterp_sd = FakeMetaInterpStaticData(self.cpu)
  89. jitdriver_sd = FakeJitDriverStaticData()
  90. opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, 0)
  91. opt.orig_label_args = loop.label.getarglist()[:]
  92. return opt
  93. def earlyexit(self, loop):
  94. opt = self.vectoroptimizer(loop)
  95. graph = opt.analyse_index_calculations(loop)
  96. state = SchedulerState(graph)
  97. opt.schedule(state)
  98. return graph.loop
  99. def vectoroptimizer_unrolled(self, loop, unroll_factor = -1):
  100. opt = self.vectoroptimizer(loop)
  101. opt.linear_find_smallest_type(loop)
  102. loop.setup_vectorization()
  103. if unroll_factor == -1 and opt.smallest_type_bytes == 0:
  104. raise NotAVectorizeableLoop()
  105. if unroll_factor == -1:
  106. unroll_factor = opt.get_unroll_count(ARCH_VEC_REG_SIZE)
  107. print ""
  108. print "unroll factor: ", unroll_factor, opt.smallest_type_bytes
  109. self.show_dot_graph(DependencyGraph(loop), "original_" + self.test_name)
  110. graph = opt.analyse_index_calculations(loop)
  111. if graph is not None:
  112. cycle = graph.cycles()
  113. if cycle is not None:
  114. print "CYCLE found %s" % cycle
  115. self.show_dot_graph(graph, "early_exit_" + self.test_name)
  116. assert cycle is None
  117. state = SchedulerState(graph)
  118. opt.schedule(state)
  119. opt.unroll_loop_iterations(loop, unroll_factor)
  120. self.debug_print_operations(loop)
  121. graph = DependencyGraph(loop)
  122. self.last_graph = graph # legacy for test_dependency
  123. self.show_dot_graph(graph, self.test_name)
  124. def gmr(i):
  125. return graph.memory_refs[graph.nodes[i]]
  126. graph.getmemref = gmr
  127. return opt, graph
  128. def init_packset(self, loop, unroll_factor = -1):
  129. opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
  130. opt.find_adjacent_memory_refs(graph)
  131. return opt, graph
  132. def extend_packset(self, loop, unroll_factor = -1):
  133. opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
  134. opt.find_adjacent_memory_refs(graph)
  135. opt.extend_packset()
  136. return opt, graph
  137. def combine_packset(self, loop, unroll_factor = -1):
  138. opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
  139. opt.find_adjacent_memory_refs(graph)
  140. opt.extend_packset()
  141. opt.combine_packset()
  142. return opt, graph
  143. def schedule(self, loop, unroll_factor = -1, with_guard_opt=False):
  144. info = FakeLoopInfo(loop)
  145. info.snapshot(loop)
  146. opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
  147. opt.find_adjacent_memory_refs(graph)
  148. opt.extend_packset()
  149. opt.combine_packset()
  150. costmodel = FakeCostModel(self.cpu)
  151. state = VecScheduleState(graph, opt.packset, self.cpu, costmodel)
  152. opt.schedule(state)
  153. if with_guard_opt:
  154. gso = GuardStrengthenOpt(graph.index_vars)
  155. gso.propagate_all_forward(info, loop)
  156. return opt
  157. def vectorize(self, loop, unroll_factor = -1):
  158. info = FakeLoopInfo(loop)
  159. info.snapshot(loop)
  160. opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
  161. opt.find_adjacent_memory_refs(graph)
  162. opt.extend_packset()
  163. opt.combine_packset()
  164. costmodel = X86_CostModel(self.cpu, 0)
  165. state = VecScheduleState(graph, opt.packset, self.cpu, costmodel)
  166. opt.schedule(state)
  167. if not costmodel.profitable():
  168. raise NotAProfitableLoop()
  169. gso = GuardStrengthenOpt(graph.index_vars)
  170. gso.propagate_all_forward(info, loop)
  171. oplist = loop.operations
  172. loop.operations = loop.prefix[:]
  173. if loop.prefix_label:
  174. loop.operations += [loop.prefix_label]
  175. loop.operations += oplist
  176. return opt
  177. def assert_unroll_loop_equals(self, loop, expected_loop, \
  178. unroll_factor = -1):
  179. self.vectoroptimizer_unrolled(loop, unroll_factor)
  180. self.assert_equal(loop, expected_loop)
  181. def assert_pack(self, pack, indices):
  182. assert len(pack.operations) == len(indices)
  183. for op,i in zip(pack.operations, indices):
  184. assert op.opidx == i
  185. def assert_has_pack_with(self, packset, opindices):
  186. for pack in packset.packs:
  187. for op,i in zip(pack.operations, opindices):
  188. if op.opidx != i:
  189. break
  190. else:
  191. # found a pack that points to the specified operations
  192. break
  193. else:
  194. pytest.fail("could not find a packset that points to %s" % str(opindices))
  195. def assert_packset_empty(self, packset, instr_count, exceptions):
  196. for a,b in exceptions:
  197. self.assert_packset_contains_pair(packset, a, b)
  198. import itertools
  199. combintations = set(itertools.product(range(instr_count),
  200. range(instr_count)))
  201. combintations -= set(exceptions)
  202. for a,b in combintations:
  203. self.assert_packset_not_contains_pair(packset, a, b)
  204. def assert_packset_not_contains_pair(self, packset, x, y):
  205. for pack in packset.packs:
  206. if pack.leftmost(node=True).opidx == x and \
  207. pack.rightmost(node=True).opidx == y:
  208. pytest.fail("must not find packset with indices {x},{y}" \
  209. .format(x=x,y=y))
  210. def assert_packset_contains_pair(self, packset, x, y):
  211. for pack in packset.packs:
  212. if isinstance(pack, Pair):
  213. if pack.leftmost(node=True).opidx == x and \
  214. pack.rightmost(node=True).opidx == y:
  215. break
  216. else:
  217. pytest.fail("can't find a pack set for indices {x},{y}" \
  218. .format(x=x,y=y))
  219. def assert_has_memory_ref_at(self, graph, idx):
  220. idx -= 1 # label is not in the nodes
  221. node = graph.nodes[idx]
  222. assert node in graph.memory_refs, \
  223. "operation %s at pos %d has no memory ref!" % \
  224. (node.getoperation(), node.getindex())
  225. class FakeInput(object):
  226. def __init__(self, type='f', datatype='f', size=8, signed=False):
  227. self.type = type
  228. self.datatype = datatype
  229. self.bytesize = size
  230. self.signed = signed
  231. def arg(type='f', size=8, signed=False, datatype='f'):
  232. return FakeInput(type, datatype, size, signed)
  233. class BaseTestVectorize(VecTestHelper):
  234. def test_opcount_filling_store(self):
  235. descr = ArrayDescr(0,8, None, 'F', concrete_type='f')
  236. pack = Pack([Node(ResOperation(rop.RAW_STORE, [0,0,arg('f',4)], descr), 0),
  237. Node(ResOperation(rop.RAW_STORE, [0,0,arg('f',4)], descr), 0),
  238. ])
  239. assert pack.opcount_filling_vector_register(16) == 2
  240. def test_opcount_filling_guard(self):
  241. descr = ArrayDescr(0,4, None, 'S')
  242. vec = ResOperation(rop.VEC_RAW_LOAD_I, ['a','i'], descr=descr)
  243. vec.count = 4
  244. pack = Pack([Node(ResOperation(rop.GUARD_TRUE, [vec]), 0),
  245. Node(ResOperation(rop.GUARD_TRUE, [vec]), 1),
  246. Node(ResOperation(rop.GUARD_TRUE, [vec]), 2),
  247. Node(ResOperation(rop.GUARD_TRUE, [vec]), 3),
  248. Node(ResOperation(rop.GUARD_TRUE, [vec]), 4),
  249. Node(ResOperation(rop.GUARD_TRUE, [vec]), 5),
  250. ])
  251. assert pack.pack_load(16) == 24-16
  252. assert pack.pack_load(8) == 24-8
  253. assert pack.pack_load(32) == 24-32
  254. assert pack.opcount_filling_vector_register(16) == 4
  255. ops, newops = pack.slice_operations(16)
  256. assert len(ops) == 4
  257. assert len(newops) == 2
  258. assert pack.opcount_filling_vector_register(8) == 2
  259. ops, newops = pack.slice_operations(8)
  260. assert len(ops) == 2
  261. assert len(newops) == 4
  262. def test_move_guard_first(self):
  263. trace = self.parse_trace("""
  264. i10 = int_add(i0, i1)
  265. #
  266. i11 = int_add(i0, i1)
  267. guard_true(i11) []
  268. """)
  269. add = trace.operations[1]
  270. guard = trace.operations[2]
  271. trace = self.earlyexit(trace)
  272. assert trace.operations[0] is add
  273. assert trace.operations[1] is guard
  274. def test_vectorize_guard(self):
  275. trace = self.parse_loop("""
  276. [p0,p1,i0]
  277. i100 = getarrayitem_raw_i(p0,i0,descr=int16arraydescr)
  278. i10 = getarrayitem_raw_i(p0,i0,descr=int32arraydescr)
  279. i20 = int_is_true(i10)
  280. guard_true(i20) [i20]
  281. i1 = int_add(i0, 1)
  282. jump(p0,p1,i1)
  283. """)
  284. self.vectorize(trace)
  285. self.debug_print_operations(trace)
  286. self.ensure_operations([
  287. 'v10[4xi32] = vec_getarrayitem_raw_i(p0,i0,descr=int32arraydescr)',
  288. 'v11[4xi32] = vec_int_is_true(v10[4xi32])',
  289. 'i100 = vec_unpack_i(v11[4xi32], 0, 1)',
  290. 'vec_guard_true(v11[4xi32]) [i100]',
  291. ], trace)
  292. def test_vectorize_skip(self):
  293. ops = """
  294. [p0,i0]
  295. i1 = int_add(i0,1)
  296. i2 = int_le(i1, 10)
  297. guard_true(i2) []
  298. jump(p0,i1)
  299. """
  300. self.assert_vectorize(self.parse_loop(ops), self.parse_loop(ops))
  301. def test_unroll_empty_stays_empty(self):
  302. """ has no operations in this trace, thus it stays empty
  303. after unrolling it 2 times """
  304. ops = """
  305. []
  306. jump()
  307. """
  308. self.assert_unroll_loop_equals(self.parse_loop(ops), self.parse_loop(ops), 2)
  309. def test_vectorize_empty_with_early_exit(self):
  310. ops = """
  311. []
  312. jump()
  313. """
  314. try:
  315. self.schedule(self.parse_loop(ops),1)
  316. py.test.fail("empty loop with no memory references is not vectorizable")
  317. except NotAVectorizeableLoop:
  318. pass
  319. def test_unroll_empty_stays_empty_parameter(self):
  320. """ same as test_unroll_empty_stays_empty but with a parameter """
  321. ops = """
  322. [i0]
  323. jump(i0)
  324. """
  325. self.assert_unroll_loop_equals(self.parse_loop(ops), self.parse_loop(ops), 2)
  326. def test_vect_pointer_fails(self):
  327. """ it currently rejects pointer arrays """
  328. ops = """
  329. [p0,i0]
  330. getarrayitem_gc_r(p0,i0,descr=arraydescr2)
  331. jump(p0,i0)
  332. """
  333. self.assert_vectorize(self.parse_loop(ops), self.parse_loop(ops))
  334. def test_load_primitive_python_list(self):
  335. """ it currently rejects pointer arrays """
  336. ops = """
  337. [p0,i0]
  338. i2 = getarrayitem_gc_i(p0,i0,descr=arraydescr)
  339. i1 = int_add(i0,1)
  340. i3 = getarrayitem_gc_i(p0,i1,descr=arraydescr)
  341. i4 = int_add(i1,1)
  342. jump(p0,i4)
  343. """
  344. opt = """
  345. [p0,i0]
  346. i1 = int_add(i0,1)
  347. i2 = int_add(i0,2)
  348. v3[2xi64] = vec_getarrayitem_gc_i(p0,i0,descr=arraydescr)
  349. jump(p0,i2)
  350. """
  351. loop = self.parse_loop(ops)
  352. vopt = self.vectorize(loop,0)
  353. self.assert_equal(loop, self.parse_loop(opt))
  354. def test_vect_unroll_char(self):
  355. """ a 16 byte vector register can hold 16 bytes thus
  356. it is unrolled 16 times. (it is the smallest type in the trace) """
  357. ops = """
  358. [p0,i0]
  359. raw_load_i(p0,i0,descr=chararraydescr)
  360. jump(p0,i0)
  361. """
  362. opt_ops = """
  363. [p0,i0]
  364. {}
  365. jump(p0,i0)
  366. """.format(('\n' + ' ' *8).join(['raw_load_i(p0,i0,descr=chararraydescr)'] * 16))
  367. self.assert_unroll_loop_equals(self.parse_loop(ops), self.parse_loop(opt_ops))
  368. def test_unroll_vector_addition(self):
  369. """ a more complex trace doing vector addition (smallest type is float
  370. 8 byte) """
  371. ops = """
  372. [p0,p1,p2,i0]
  373. i1 = raw_load_i(p1, i0, descr=floatarraydescr)
  374. i2 = raw_load_i(p2, i0, descr=floatarraydescr)
  375. i3 = int_add(i1,i2)
  376. raw_store(p0, i0, i3, descr=floatarraydescr)
  377. i4 = int_add(i0, 1)
  378. i5 = int_le(i4, 10)
  379. guard_true(i5) []
  380. jump(p0,p1,p2,i4)
  381. """
  382. opt_ops = """
  383. [p0,p1,p2,i0]
  384. i4 = int_add(i0, 1)
  385. i5 = int_le(i4, 10)
  386. guard_true(i5) [p0,p1,p2,i0]
  387. i1 = raw_load_i(p1, i0, descr=floatarraydescr)
  388. i2 = raw_load_i(p2, i0, descr=floatarraydescr)
  389. i3 = int_add(i1,i2)
  390. raw_store(p0, i0, i3, descr=floatarraydescr)
  391. i9 = int_add(i4, 1)
  392. i10 = int_le(i9, 10)
  393. guard_true(i10) [p0,p1,p2,i4]
  394. i6 = raw_load_i(p1, i4, descr=floatarraydescr)
  395. i7 = raw_load_i(p2, i4, descr=floatarraydescr)
  396. i8 = int_add(i6,i7)
  397. raw_store(p0, i4, i8, descr=floatarraydescr)
  398. jump(p0,p1,p2,i9)
  399. """
  400. self.assert_unroll_loop_equals(self.parse_loop(ops), self.parse_loop(opt_ops), 1)
  401. def test_estimate_unroll_factor_smallest_byte_zero(self):
  402. ops = """
  403. [p0,i0]
  404. raw_load_i(p0,i0,descr=arraydescr)
  405. jump(p0,i0)
  406. """
  407. vopt = self.vectoroptimizer(self.parse_loop(ops))
  408. assert 0 == vopt.smallest_type_bytes
  409. assert 0 == vopt.get_unroll_count(ARCH_VEC_REG_SIZE)
  410. def test_array_operation_indices_not_unrolled(self):
  411. ops = """
  412. [p0,i0]
  413. raw_load_i(p0,i0,descr=arraydescr)
  414. jump(p0,i0)
  415. """
  416. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
  417. assert len(graph.memory_refs) == 1
  418. self.assert_has_memory_ref_at(graph, 1)
  419. def test_array_operation_indices_unrolled_1(self):
  420. ops = """
  421. [p0,i0]
  422. raw_load_i(p0,i0,descr=chararraydescr)
  423. jump(p0,i0)
  424. """
  425. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),1)
  426. assert len(graph.memory_refs) == 2
  427. self.assert_has_memory_ref_at(graph, 1)
  428. self.assert_has_memory_ref_at(graph, 2)
  429. def test_array_operation_indices_unrolled_2(self):
  430. ops = """
  431. [p0,i0,i1]
  432. i3 = raw_load_i(p0,i0,descr=chararraydescr)
  433. i4 = raw_load_i(p0,i1,descr=chararraydescr)
  434. jump(p0,i3,i4)
  435. """
  436. loop = self.parse_loop(ops)
  437. vopt, graph = self.vectoroptimizer_unrolled(loop,0)
  438. assert len(graph.memory_refs) == 2
  439. self.assert_has_memory_ref_at(graph, 1)
  440. self.assert_has_memory_ref_at(graph, 2)
  441. #
  442. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),1)
  443. assert len(graph.memory_refs) == 4
  444. for i in [1,2,3,4]:
  445. self.assert_has_memory_ref_at(graph, i)
  446. #
  447. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),3)
  448. assert len(graph.memory_refs) == 8
  449. for i in [1,2,3,4,5,6,7,8]:
  450. self.assert_has_memory_ref_at(graph, i)
  451. def test_array_memory_ref_adjacent_1(self):
  452. ops = """
  453. [p0,i0]
  454. i3 = raw_load_i(p0,i0,descr=chararraydescr)
  455. i1 = int_add(i0,1)
  456. jump(p0,i1)
  457. """
  458. loop = self.parse_loop(ops)
  459. vopt, graph = self.vectoroptimizer_unrolled(loop,1)
  460. vopt.find_adjacent_memory_refs(graph)
  461. assert len(graph.memory_refs) == 2
  462. mref1 = graph.getmemref(find_first_index(loop, rop.RAW_LOAD_I))
  463. mref3 = graph.getmemref(find_first_index(loop, rop.RAW_LOAD_I,1))
  464. assert isinstance(mref1, MemoryRef)
  465. assert isinstance(mref3, MemoryRef)
  466. assert mref1.is_adjacent_to(mref3)
  467. assert mref3.is_adjacent_to(mref1)
  468. def test_array_memory_ref_1(self):
  469. ops = """
  470. [p0,i0]
  471. i3 = raw_load_i(p0,i0,descr=chararraydescr)
  472. jump(p0,i0)
  473. """
  474. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
  475. vopt.find_adjacent_memory_refs(graph)
  476. mref1 = graph.getmemref(0)
  477. assert isinstance(mref1, MemoryRef)
  478. assert mref1.index_var.coefficient_mul == 1
  479. assert mref1.index_var.constant == 0
  480. def test_array_memory_ref_2(self):
  481. ops = """
  482. [p0,i0]
  483. i1 = int_add(i0,1)
  484. i3 = raw_load_i(p0,i1,descr=chararraydescr)
  485. jump(p0,i1)
  486. """
  487. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
  488. vopt.find_adjacent_memory_refs(graph)
  489. mref1 = graph.getmemref(1)
  490. assert isinstance(mref1, MemoryRef)
  491. assert mref1.index_var.coefficient_mul == 1
  492. assert mref1.index_var.constant == 1
  493. def test_array_memory_ref_sub_index(self):
  494. ops = """
  495. [p0,i0]
  496. i1 = int_sub(i0,1)
  497. i3 = raw_load_i(p0,i1,descr=chararraydescr)
  498. jump(p0,i1)
  499. """
  500. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
  501. vopt.find_adjacent_memory_refs(graph)
  502. mref1 = graph.getmemref(1)
  503. assert isinstance(mref1, MemoryRef)
  504. assert mref1.index_var.coefficient_mul == 1
  505. assert mref1.index_var.constant == -1
  506. def test_array_memory_ref_add_mul_index(self):
  507. ops = """
  508. [p0,i0]
  509. i1 = int_add(i0,1)
  510. i2 = int_mul(i1,3)
  511. i3 = raw_load_i(p0,i2,descr=chararraydescr)
  512. jump(p0,i1)
  513. """
  514. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
  515. vopt.find_adjacent_memory_refs(graph)
  516. mref1 = graph.getmemref(2)
  517. assert isinstance(mref1, MemoryRef)
  518. assert mref1.index_var.coefficient_mul == 3
  519. assert mref1.index_var.constant == 3
  520. def test_array_memory_ref_add_mul_index_interleaved(self):
  521. ops = """
  522. [p0,i0]
  523. i1 = int_add(i0,1)
  524. i2 = int_mul(i1,3)
  525. i3 = int_add(i2,5)
  526. i4 = int_mul(i3,6)
  527. i5 = raw_load_i(p0,i4,descr=chararraydescr)
  528. jump(p0,i4)
  529. """
  530. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
  531. vopt.find_adjacent_memory_refs(graph)
  532. mref1 = graph.getmemref(4)
  533. assert isinstance(mref1, MemoryRef)
  534. assert mref1.index_var.coefficient_mul == 18
  535. assert mref1.index_var.constant == 48
  536. ops = """
  537. [p0,i0]
  538. i1 = int_add(i0,1)
  539. i2 = int_mul(i1,3)
  540. i3 = int_add(i2,5)
  541. i4 = int_mul(i3,6)
  542. i5 = int_add(i4,30)
  543. i6 = int_mul(i5,57)
  544. i7 = raw_load_i(p0,i6,descr=chararraydescr)
  545. jump(p0,i6)
  546. """
  547. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
  548. vopt.find_adjacent_memory_refs(graph)
  549. mref1 = graph.getmemref(6)
  550. assert isinstance(mref1, MemoryRef)
  551. assert mref1.index_var.coefficient_mul == 1026
  552. assert mref1.index_var.coefficient_div == 1
  553. assert mref1.index_var.constant == 57*(30) + 57*6*(5) + 57*6*3*(1)
  554. def test_array_memory_ref_sub_mul_index_interleaved(self):
  555. ops = """
  556. [p0,i0]
  557. i1 = int_add(i0,1)
  558. i2 = int_mul(i1,3)
  559. i3 = int_sub(i2,3)
  560. i4 = int_mul(i3,2)
  561. i5 = raw_load_i(p0,i4,descr=chararraydescr)
  562. jump(p0,i4)
  563. """
  564. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
  565. vopt.find_adjacent_memory_refs(graph)
  566. mref1 = graph.getmemref(4)
  567. assert isinstance(mref1, MemoryRef)
  568. assert mref1.index_var.coefficient_mul == 6
  569. assert mref1.index_var.coefficient_div == 1
  570. assert mref1.index_var.constant == 0
  571. def test_array_memory_ref_not_adjacent_1(self):
  572. ops = """
  573. [p0,i0,i4]
  574. i3 = raw_load_i(p0,i0,descr=chararraydescr)
  575. i1 = int_add(i0,1)
  576. i5 = raw_load_i(p0,i4,descr=chararraydescr)
  577. i6 = int_add(i4,1)
  578. jump(p0,i1,i6)
  579. """
  580. loop = self.parse_loop(ops)
  581. vopt, graph = self.vectoroptimizer_unrolled(loop,1)
  582. vopt.find_adjacent_memory_refs(graph)
  583. f = lambda x: find_first_index(loop, rop.RAW_LOAD_I, x)
  584. indices = [f(0),f(1),f(2),f(3)]
  585. for i in indices:
  586. self.assert_has_memory_ref_at(graph, i+1)
  587. assert len(graph.memory_refs) == 4
  588. mref1, mref3, mref5, mref7 = [graph.getmemref(i) for i in indices]
  589. assert isinstance(mref1, MemoryRef)
  590. assert isinstance(mref3, MemoryRef)
  591. assert isinstance(mref5, MemoryRef)
  592. assert isinstance(mref7, MemoryRef)
  593. self.assert_memory_ref_adjacent(mref1, mref5)
  594. self.assert_memory_ref_not_adjacent(mref1, mref3)
  595. self.assert_memory_ref_not_adjacent(mref1, mref7)
  596. self.assert_memory_ref_adjacent(mref3, mref7)
  597. assert mref1.is_adjacent_after(mref5)
  598. def test_array_memory_ref_div(self):
  599. py.test.skip("XXX rewrite or kill this test for the new divisions")
  600. ops = """
  601. [p0,i0]
  602. i1 = int_floordiv(i0,2)
  603. i2 = int_floordiv(i1,8)
  604. i3 = raw_load_i(p0,i2,descr=chararraydescr)
  605. jump(p0,i2)
  606. """
  607. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
  608. vopt.find_adjacent_memory_refs(graph)
  609. mref = graph.getmemref(2)
  610. assert mref.index_var.coefficient_div == 16
  611. ops = """
  612. [p0,i0]
  613. i1 = int_add(i0,8)
  614. i2 = uint_floordiv(i1,2)
  615. i3 = raw_load_i(p0,i2,descr=chararraydescr)
  616. jump(p0,i2)
  617. """
  618. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
  619. vopt.find_adjacent_memory_refs(graph)
  620. mref = graph.getmemref(2)
  621. assert mref.index_var.coefficient_div == 2
  622. assert mref.index_var.constant == 4
  623. ops = """
  624. [p0,i0]
  625. i1 = int_add(i0,8)
  626. i2 = int_floordiv(i1,2)
  627. i3 = raw_load_i(p0,i2,descr=chararraydescr)
  628. i4 = int_add(i0,4)
  629. i5 = int_mul(i4,2)
  630. i6 = raw_load_i(p0,i5,descr=chararraydescr)
  631. jump(p0,i2)
  632. """
  633. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
  634. vopt.find_adjacent_memory_refs(graph)
  635. mref = graph.getmemref(2)
  636. mref2 = graph.getmemref(5)
  637. self.assert_memory_ref_not_adjacent(mref, mref2)
  638. assert mref != mref2
  639. def test_array_memory_ref_diff_calc_but_equal(self):
  640. ops = """
  641. [p0,i0]
  642. i1 = int_add(i0,4)
  643. i2 = int_mul(i1,2)
  644. i3 = raw_load_i(p0,i2,descr=chararraydescr)
  645. i4 = int_add(i0,2)
  646. i5 = int_mul(i4,2)
  647. i6 = int_add(i5,4)
  648. i7 = raw_load_i(p0,i6,descr=chararraydescr)
  649. jump(p0,i2)
  650. """
  651. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
  652. vopt.find_adjacent_memory_refs(graph)
  653. mref = graph.getmemref(2)
  654. mref2 = graph.getmemref(6)
  655. self.assert_memory_ref_not_adjacent(mref, mref2)
  656. assert mref == mref2
  657. def test_array_memory_ref_diff_not_equal(self):
  658. ops = """
  659. [p0,i0]
  660. i1 = int_add(i0,4)
  661. i2 = int_sub(i1,3) # XXX used to be "divide by 2", not sure about it
  662. i3 = raw_load_i(p0,i2,descr=chararraydescr)
  663. i4 = int_add(i0,2)
  664. i5 = int_mul(i4,2)
  665. i6 = int_add(i5,4)
  666. i7 = raw_load_i(p0,i6,descr=chararraydescr)
  667. jump(p0,i2)
  668. """
  669. vopt, graph = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
  670. vopt.find_adjacent_memory_refs(graph)
  671. mref = graph.getmemref(2)
  672. mref2 = graph.getmemref(6)
  673. self.assert_memory_ref_not_adjacent(mref, mref2)
  674. assert mref != mref2
  675. def test_packset_init_simple(self):
  676. ops = """
  677. [p0,i0]
  678. i3 = getarrayitem_raw_i(p0, i0, descr=chararraydescr)
  679. i1 = int_add(i0, 1)
  680. i2 = int_le(i1, 16)
  681. guard_true(i2) [p0, i0]
  682. jump(p0,i1)
  683. """
  684. loop = self.parse_loop(ops)
  685. vopt, graph = self.init_packset(loop,1)
  686. self.assert_independent(graph, 4,8)
  687. assert vopt.packset is not None
  688. assert len(graph.memory_refs) == 2
  689. assert len(vopt.packset.packs) == 1
  690. def test_packset_init_raw_load_not_adjacent_and_adjacent(self):
  691. ops = """
  692. [p0,i0]
  693. i3 = raw_load_i(p0, i0, descr=chararraydescr)
  694. jump(p0,i0)
  695. """
  696. loop = self.parse_loop(ops)
  697. vopt, graph = self.init_packset(loop,3)
  698. assert len(graph.memory_refs) == 4
  699. assert len(vopt.packset.packs) == 0
  700. ops = """
  701. [p0,i0]
  702. i2 = int_add(i0,1)
  703. raw_load_i(p0, i2, descr=chararraydescr)
  704. jump(p0,i2)
  705. """
  706. loop = self.parse_loop(ops)
  707. vopt, graph = self.init_packset(loop,3)
  708. assert len(graph.memory_refs) == 4
  709. assert len(vopt.packset.packs) == 3
  710. for i in range(3):
  711. x = (i+1)*2
  712. y = x + 2
  713. self.assert_independent(graph, x,y)
  714. self.assert_packset_contains_pair(vopt.packset, x,y)
  715. def test_packset_init_2(self):
  716. ops = """
  717. [p0,i0]
  718. i1 = int_add(i0, 1)
  719. i2 = int_le(i1, 16)
  720. guard_true(i2) [p0, i0]
  721. i3 = getarrayitem_raw_i(p0, i1, descr=chararraydescr)
  722. jump(p0,i1)
  723. """
  724. loop = self.parse_loop(ops)
  725. vopt, graph = self.init_packset(loop,15)
  726. assert len(graph.memory_refs) == 16
  727. assert len(vopt.packset.packs) == 15
  728. # assure that memory refs are not adjacent for all
  729. for i in range(15):
  730. for j in range(15):
  731. try:
  732. if i-4 == j or i+4 == j:
  733. mref1 = graph.getmemref(i)
  734. mref2 = graph.getmemref(j)
  735. assert mref1.is_adjacent_to(mref2)
  736. else:
  737. mref1 = graph.getmemref(i)
  738. mref2 = graph.getmemref(j)
  739. assert not mref1.is_adjacent_to(mref2)
  740. except KeyError:
  741. pass
  742. for i in range(15):
  743. x = (i+1)*4
  744. y = x + 4
  745. self.assert_independent(graph, x,y)
  746. self.assert_packset_contains_pair(vopt.packset, x, y)
  747. def test_isomorphic_operations(self):
  748. ops_src = """
  749. [p1,p0,i0]
  750. i3 = getarrayitem_raw_i(p0, i0, descr=chararraydescr)
  751. i1 = int_add(i0, 1)
  752. i2 = int_le(i1, 16)
  753. i4 = getarrayitem_raw_i(p0, i1, descr=chararraydescr)
  754. f5 = getarrayitem_raw_f(p1, i1, descr=floatarraydescr)
  755. f6 = getarrayitem_raw_f(p0, i1, descr=floatarraydescr)
  756. guard_true(i2) [p0, i0]
  757. jump(p1,p0,i1)
  758. """
  759. loop = self.parse_loop(ops_src)
  760. ops = loop.operations
  761. assert isomorphic(ops[0], ops[3])
  762. assert not isomorphic(ops[0], ops[1])
  763. assert not isomorphic(ops[0], ops[5])
  764. def test_packset_extend_simple(self):
  765. ops = """
  766. [p0,i0]
  767. i1 = int_add(i0, 1)
  768. i2 = int_le(i1, 16)
  769. guard_true(i2) [p0, i0]
  770. i3 = getarrayitem_raw_i(p0, i1, descr=chararraydescr)
  771. i4 = int_add(i3, 1)
  772. jump(p0,i1)
  773. """
  774. loop = self.parse_loop(ops)
  775. vopt, graph = self.extend_packset(loop,1)
  776. assert len(graph.memory_refs) == 2
  777. self.assert_independent(graph, 5,10)
  778. assert len(vopt.packset.packs) == 2
  779. self.assert_packset_empty(vopt.packset,
  780. len(loop.operations),
  781. [(5,10), (4,9)])
  782. def test_packset_extend_load_modify_store(self):
  783. ops = """
  784. [p0,i0]
  785. i1 = int_add(i0, 1)
  786. i2 = int_le(i1, 16)
  787. guard_true(i2) [p0, i0]
  788. i3 = getarrayitem_raw_i(p0, i1, descr=chararraydescr)
  789. i4 = int_mul(i3, 2)
  790. setarrayitem_raw(p0, i1, i4, descr=chararraydescr)
  791. jump(p0,i1)
  792. """
  793. loop = self.parse_loop(ops)
  794. vopt, graph = self.extend_packset(loop,1)
  795. assert len(graph.memory_refs) == 4
  796. self.assert_independent(graph, 4,10)
  797. self.assert_independent(graph, 5,11)
  798. self.assert_independent(graph, 6,12)
  799. assert len(vopt.packset.packs) == 3
  800. self.assert_packset_empty(vopt.packset, len(loop.operations),
  801. [(6,12), (5,11), (4,10)])
  802. @pytest.mark.parametrize("descr,packs,packidx",
  803. [('char', 0, []),
  804. ('float', 2, [(0,(1,3)),(1,(5,7))]),
  805. ('', 2, [(0,(1,3)),(1,(5,7))]),
  806. ('float32', 1, [(0,(1,3,5,7))]),
  807. ])
  808. def test_packset_combine_simple(self,descr,packs,packidx):
  809. suffix = '_i'
  810. if 'float' in descr:
  811. suffix = '_f'
  812. ops = """
  813. [p0,i0]
  814. i3 = getarrayitem_raw{suffix}(p0, i0, descr={descr}arraydescr)
  815. i1 = int_add(i0,1)
  816. jump(p0,i1)
  817. """.format(descr=descr,suffix=suffix)
  818. loop = self.parse_loop(ops)
  819. vopt, graph = self.combine_packset(loop,3)
  820. assert len(graph.memory_refs) == 4
  821. assert len(vopt.packset.packs) == packs
  822. for i,t in packidx:
  823. self.assert_pack(vopt.packset.packs[i], t)
  824. @pytest.mark.parametrize("descr,stride,packs,suffix",
  825. [('char',1,0,'_i'),('float',8,4,'_f'),('',8,4,'_i'),('float32',4,2,'_i')])
  826. def test_packset_combine_2_loads_in_trace(self, descr, stride, packs, suffix):
  827. ops = """
  828. [p0,i0]
  829. i3 = raw_load{suffix}(p0, i0, descr={type}arraydescr)
  830. i1 = int_add(i0,{stride})
  831. i4 = raw_load{suffix}(p0, i1, descr={type}arraydescr)
  832. i2 = int_add(i1,{stride})
  833. jump(p0,i2)
  834. """.format(type=descr,stride=stride,suffix=suffix)
  835. loop = self.parse_loop(ops)
  836. vopt, graph = self.combine_packset(loop,3)
  837. assert len(graph.memory_refs) == 8
  838. assert len(vopt.packset.packs) == packs
  839. def test_packset_combine_no_candidates_packset_empty(self):
  840. ops = """
  841. []
  842. jump()
  843. """
  844. try:
  845. self.combine_packset(self.parse_loop(ops),15)
  846. pytest.fail("combine should raise an exception if no pack "
  847. "statements are present")
  848. except NotAVectorizeableLoop:
  849. pass
  850. ops = """
  851. [p0,i0]
  852. f3 = getarrayitem_raw_f(p0, i0, descr=floatarraydescr)
  853. jump(p0,i0)
  854. """
  855. loop = self.parse_loop(ops)
  856. try:
  857. self.combine_packset(loop,15)
  858. pytest.fail("combine should raise an exception if no pack "
  859. "statements are present")
  860. except NotAVectorizeableLoop:
  861. pass
  862. @pytest.mark.parametrize("op,descr,stride",
  863. [('int_add','char',1),
  864. ('int_sub','char',1),
  865. ('int_mul','char',1),
  866. ('float_add','float',8),
  867. ('float_sub','float',8),
  868. ('float_mul','float',8),
  869. ('float_add','float32',4),
  870. ('float_sub','float32',4),
  871. ('float_mul','float32',4),
  872. ('int_add','',8),
  873. ('int_sub','',8),
  874. ('int_mul','',8),
  875. ])
  876. def test_packset_vector_operation(self, op, descr, stride):
  877. suffix = '_i'
  878. if 'float' in descr:
  879. suffix = '_f'
  880. ops = """
  881. [p0,p1,p2,i0]
  882. i1 = int_add(i0, {stride})
  883. i10 = int_le(i1, 128)
  884. guard_true(i10) []
  885. i2 = raw_load{suffix}(p0, i0, descr={descr}arraydescr)
  886. i3 = raw_load{suffix}(p1, i0, descr={descr}arraydescr)
  887. i4 = {op}(i2,i3)
  888. raw_store(p2, i0, i4, descr={descr}arraydescr)
  889. jump(p0,p1,p2,i1)
  890. """.format(op=op,descr=descr,stride=stride,suffix=suffix)
  891. loop = self.parse_loop(ops)
  892. vopt, graph = self.combine_packset(loop,3)
  893. assert len(graph.memory_refs) == 12
  894. if stride == 8:
  895. assert len(vopt.packset.packs) == 8
  896. else:
  897. if descr != 'char':
  898. assert len(vopt.packset.packs) == 4
  899. if descr == 'char':
  900. return
  901. for opindices in [(4,11,18,25),(5,12,19,26),
  902. (6,13,20,27),(4,11,18,25)]:
  903. self.assert_has_pack_with(vopt.packset, opindices)
  904. @pytest.mark.parametrize('op,descr,stride',
  905. [('float_add','float',8),
  906. ('float_sub','float',8),
  907. ('float_mul','float',8),
  908. ('int_add','',8),
  909. ('int_sub','',8),
  910. ])
  911. def test_schedule_vector_operation(self, op, descr, stride):
  912. suffix = '_i'
  913. if 'float' in descr:
  914. suffix = '_f'
  915. ops = """
  916. [p0,p1,p2,i0] # 0
  917. i10 = int_le(i0, 128) # 1, 8, 15, 22
  918. guard_true(i10) [p0,p1,p2,i0] # 2, 9, 16, 23
  919. i2 = getarrayitem_raw{suffix}(p0, i0, descr={descr}arraydescr) # 3, 10, 17, 24
  920. i3 = getarrayitem_raw{suffix}(p1, i0, descr={descr}arraydescr) # 4, 11, 18, 25
  921. i4 = {op}(i2,i3) # 5, 12, 19, 26
  922. setarrayitem_raw(p2, i0, i4, descr={descr}arraydescr) # 6, 13, 20, 27
  923. i1 = int_add(i0, {stride}) # 7, 14, 21, 28
  924. jump(p0,p1,p2,i1) # 29
  925. """.format(op=op,descr=descr,stride=1,suffix=suffix)
  926. vops = """
  927. [p0,p1,p2,i0]
  928. i10 = int_le(i0, 128)
  929. guard_true(i10) [p0,p1,p2,i0]
  930. i1 = int_add(i0, {stride})
  931. i11 = int_le(i1, 128)
  932. guard_true(i11) [p0,p1,p2,i1]
  933. i12 = int_add(i1, {stride})
  934. v1 = vec_getarrayitem_raw{suffix}(p0, i0, descr={descr}arraydescr)
  935. v2 = vec_getarrayitem_raw{suffix}(p1, i0, descr={descr}arraydescr)
  936. v3 = {op}(v1,v2)
  937. vec_setarrayitem_raw(p2, i0, v3, descr={descr}arraydescr)
  938. jump(p0,p1,p2,i12)
  939. """.format(op='vec_'+op,descr=descr,stride=1,suffix=suffix)
  940. loop = self.parse_loop(ops)
  941. vopt = self.schedule(loop, 1)
  942. self.assert_equal(loop, self.parse_loop(vops))
  943. def test_vschedule_trace_1(self):
  944. ops = """
  945. [i0, i1, i2, i3, i4]
  946. i6 = int_mul(i0, 8)
  947. i7 = raw_load_i(i2, i6, descr=arraydescr)
  948. i8 = raw_load_i(i3, i6, descr=arraydescr)
  949. i9 = int_add(i7, i8)
  950. raw_store(i4, i6, i9, descr=arraydescr)
  951. i11 = int_add(i0, 1)
  952. i12 = int_lt(i11, i1)
  953. guard_true(i12) [i4, i3, i2, i1, i11]
  954. jump(i11, i1, i2, i3, i4)
  955. """
  956. opt="""
  957. [i0, i1, i2, i3, i4]
  958. i11 = int_add(i0, 1)
  959. i12 = int_lt(i11, i1)
  960. guard_true(i12) [i0,i1,i2,i3,i4]
  961. i6 = int_mul(i0, 8)
  962. i13 = int_add(i11, 1)
  963. i18 = int_lt(i13, i1)
  964. guard_true(i18) [i11,i1,i2,i3,i4]
  965. i14 = int_mul(i11, 8)
  966. v19[2xi64] = vec_raw_load_i(i2, i6, descr=arraydescr)
  967. v20[2xi64] = vec_raw_load_i(i3, i6, descr=arraydescr)
  968. v21[2xi64] = vec_int_add(v19, v20)
  969. vec_raw_store(i4, i6, v21, descr=arraydescr)
  970. jump(i13, i1, i2, i3, i4)
  971. """
  972. loop = self.parse_loop(ops)
  973. vopt = self.schedule(loop,1)
  974. self.assert_equal(loop, self.parse_loop(opt))
  975. def test_collapse_index_guard_1(self):
  976. ops = """
  977. [p0,i0]
  978. i1 = getarrayitem_raw_i(p0, i0, descr=chararraydescr)
  979. i2 = int_add(i0, 1)
  980. i3 = int_lt(i2, 102)
  981. guard_true(i3) [p0,i0]
  982. jump(p0,i2)
  983. """
  984. dead_code = '\n '.join([
  985. "i{t1} = int_add(i0,{i})\n i{s} = int_lt(i{t1}, 102)".format(
  986. i=i+2, t1=i+201, t=i+200, s=i+20)
  987. for i in range(0,14)])
  988. opt="""
  989. [p0,i0]
  990. i200 = int_add(i0, 1)
  991. i400 = int_lt(i200, 102)
  992. i2 = int_add(i0, 16)
  993. i3 = int_lt(i2, 102)
  994. guard_true(i3) [p0,i0]
  995. {dead_code}
  996. i500 = int_add(i0, 16)
  997. i501 = int_lt(i500, 102)
  998. v10[16xi8] = vec_getarrayitem_raw_i(p0, i0, descr=chararraydescr)
  999. jump(p0,i2)
  1000. """.format(dead_code=dead_code)
  1001. loop = self.parse_loop(ops)
  1002. vopt = self.schedule(loop,15,with_guard_opt=True)
  1003. self.assert_equal(loop, self.parse_loop(opt))
  1004. def test_too_small_vector(self):
  1005. ops = """
  1006. [p0,i0]
  1007. i1 = getarrayitem_raw_i(p0, 0, descr=chararraydescr) # constant index
  1008. i2 = getarrayitem_raw_i(p0, 1, descr=chararraydescr) # constant index
  1009. i4 = int_add(i1, i2)
  1010. i3 = int_add(i0,1)
  1011. i5 = int_lt(i3, 10)
  1012. guard_true(i5) [p0, i0]
  1013. jump(p0,i1)
  1014. """
  1015. try:
  1016. self.vectorize(self.parse_loop(ops))
  1017. py.test.fail("loop is not vectorizable")
  1018. except NotAVectorizeableLoop:
  1019. pass
  1020. def test_constant_expansion(self):
  1021. ops = """
  1022. [p0,i0]
  1023. i1 = getarrayitem_raw_i(p0, i0, descr=arraydescr)
  1024. i4 = int_sub(i1, 42)
  1025. i3 = int_add(i0,1)
  1026. i5 = int_lt(i3, 10)
  1027. guard_true(i5) [p0, i0]
  1028. jump(p0,i3)
  1029. """
  1030. opt="""
  1031. [p0,i0]
  1032. v3[2xf64] = vec_expand_i(42)
  1033. label(p0,i0,v3[2xf64])
  1034. i20 = int_add(i0, 1)
  1035. i30 = int_lt(i20, 10)
  1036. i2 = int_add(i0, 2)
  1037. i3 = int_lt(i2, 10)
  1038. guard_true(i3) [p0,i0]
  1039. i4 = int_add(i0, 2)
  1040. i5 = int_lt(i4, 10)
  1041. v1[2xf64] = vec_getarrayitem_raw_i(p0, i0, descr=arraydescr)
  1042. v2[2xf64] = vec_int_sub(v1[2xf64], v3[2xf64])
  1043. jump(p0,i2,v3[2xf64])
  1044. """
  1045. loop = self.parse_loop(ops)
  1046. vopt = self.vectorize(loop,1)
  1047. self.assert_equal(loop, self.parse_loop(opt))
  1048. def test_variable_expansion(self):
  1049. ops = """
  1050. [p0,i0,f3]
  1051. f1 = getarrayitem_raw_f(p0, i0, descr=floatarraydescr)
  1052. f4 = float_add(f1, f3)
  1053. i3 = int_add(i0,1)
  1054. i5 = int_lt(i3, 10)
  1055. guard_true(i5) [p0, i0]
  1056. jump(p0,i3,f3)
  1057. """
  1058. opt="""
  1059. [p0,i0,f3]
  1060. v3[2xf64] = vec_expand_f(f3)
  1061. label(p0,i0,f3,v3[2xf64])
  1062. i20 = int_add(i0, 1)
  1063. i30 = int_lt(i20, 10)
  1064. i2 = int_add(i0, 2)
  1065. i3 = int_lt(i2, 10)
  1066. guard_true(i3) [p0,i0,f3]
  1067. i4 = int_add(i0, 2)
  1068. i5 = int_lt(i4, 10)
  1069. v1[2xf64] = vec_getarrayitem_raw_f(p0, i0, descr=floatarraydescr)
  1070. v2[2xf64] = vec_float_add(v1[2xf64], v3[2xf64])
  1071. jump(p0,i2,f3,v3[2xf64])
  1072. """
  1073. loop = self.parse_loop(ops)
  1074. vopt = self.vectorize(loop,1)
  1075. self.assert_equal(loop, self.parse_loop(opt))
  1076. def test_accumulate_basic(self):
  1077. trace = """
  1078. [p0, i0, f0]
  1079. f1 = raw_load_f(p0, i0, descr=floatarraydescr)
  1080. f2 = float_add(f0, f1)
  1081. i1 = int_add(i0, 8)
  1082. i2 = int_lt(i1, 100)
  1083. guard_true(i2) [p0, i0, f2]
  1084. jump(p0, i1, f2)
  1085. """
  1086. trace_opt = """
  1087. [p0, i0, f0]
  1088. v6[0xf64] = vec_f()
  1089. v7[2xf64] = vec_int_xor(v6[0xf64], v6[0xf64])
  1090. v2[2xf64] = vec_pack_f(v7[2xf64], f0, 0, 1)
  1091. label(p0, i0, v2[2xf64])
  1092. i100 = int_add(i0, 8)
  1093. i200 = int_lt(i100, 100)
  1094. i1 = int_add(i0, 16)
  1095. i2 = int_lt(i1, 100)
  1096. guard_true(i2) [p0, i0, v2[2xf64]]
  1097. i10 = int_add(i0, 16)
  1098. i20 = int_lt(i10, 100)
  1099. v1[2xf64] = vec_raw_load_f(p0, i0, descr=floatarraydescr)
  1100. v3[2xf64] = vec_float_add(v2[2xf64], v1[2xf64])
  1101. jump(p0, i1, v3[2xf64])
  1102. """
  1103. loop = self.parse_loop(trace)
  1104. opt = self.vectorize(loop)
  1105. self.assert_equal(loop, self.parse_loop(trace_opt))
  1106. def test_element_f45_in_guard_failargs(self):
  1107. trace = self.parse_loop("""
  1108. [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, i44, i21, i4, i0, i18]
  1109. f45 = raw_load_f(i21, i44, descr=floatarraydescr)
  1110. guard_not_invalidated() [p38, p12, p9, p14, f45, p39, i37, i44, f35, i40, p42, i43, None, i28, p36, i41]
  1111. i46 = int_add(i44, 8)
  1112. f47 = raw_load_f(i4, i41, descr=floatarraydescr)
  1113. i48 = int_add(i41, 8)
  1114. f49 = float_add(f45, f47)
  1115. raw_store(i0, i37, f49, descr=floatarraydescr)
  1116. i50 = int_add(i28, 1)
  1117. i51 = int_add(i37, 8)
  1118. i52 = int_ge(i50, i18)
  1119. guard_false(i52) [p38, p12, p9, p14, i48, i46, f47, i51, i50, f45, p39, None, None, None, i40, p42, i43, None, None, p36, None]
  1120. jump(p36, i50, p9, i51, p14, f45, p12, p38, f47, p39, i40, i48, p42, i43, i46, i21, i4, i0, i18)
  1121. """)
  1122. trace_opt = self.parse_loop("""
  1123. [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, i44, i21, i4, i0, i18]
  1124. guard_not_invalidated() [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, i44, i21, i4, i0, i18]
  1125. i50 = int_add(i28, 1)
  1126. i20 = int_ge(i50, i18)
  1127. i54 = int_add(i28, 2)
  1128. i638 = int_ge(i54, i18)
  1129. guard_false(i638) [p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43, i44, i21, i4, i0, i18]
  1130. i12 = int_add(i44, 8)
  1131. i56 = int_add(i41, 8)
  1132. i46 = int_add(i37, 8)
  1133. i47 = int_add(i28, 2)
  1134. i52 = int_ge(i47, i18)
  1135. i55 = int_add(i44, 16)
  1136. i629 = int_add(i41, 16)
  1137. i637 = int_add(i37, 16)
  1138. v61[2xf64] = vec_raw_load_f(i21, i44, descr=floatarraydescr)
  1139. v62[2xf64] = vec_raw_load_f(i4, i41, descr=floatarraydescr)
  1140. v63[2xf64] = vec_float_add(v61, v62)
  1141. vec_raw_store(i0, i37, v63, descr=floatarraydescr)
  1142. f100 = vec_unpack_f(v61, 1, 1)
  1143. f101 = vec_unpack_f(v62, 1, 1)
  1144. jump(p36, i637, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, p42, i43, i55, i21, i4, i0, i18)
  1145. """)
  1146. vopt = self.vectorize(trace)
  1147. self.assert_equal(trace, trace_opt)
  1148. def test_shrink_vector_size(self):
  1149. ops = """
  1150. [p0,p1,i1]
  1151. f1 = getarrayitem_raw_f(p0, i1, descr=floatarraydescr)
  1152. i2 = cast_float_to_singlefloat(f1)
  1153. setarrayitem_raw(p1, i1, i2, descr=float32arraydescr)
  1154. i3 = int_add(i1, 1)
  1155. i4 = int_ge(i3, 36)
  1156. guard_false(i4) []
  1157. jump(p0, p1, i3)
  1158. """
  1159. opt = """
  1160. [p0, p1, i1]
  1161. i3 = int_add(i1, 1)
  1162. i4 = int_ge(i3, 36)
  1163. i50 = int_add(i1, 4)
  1164. i51 = int_ge(i50, 36)
  1165. guard_false(i51) [p0, p1, i1]
  1166. i5 = int_add(i1, 2)
  1167. i8 = int_ge(i5, 36)
  1168. i6 = int_add(i1, 3)
  1169. i11 = int_ge(i6, 36)
  1170. i7 = int_add(i1, 4)
  1171. i14 = int_ge(i7, 36)
  1172. v17 = vec_getarrayitem_raw_f(p0, i1, descr=floatarraydescr)
  1173. v19 = vec_cast_float_to_singlefloat(v17)
  1174. v18 = vec_getarrayitem_raw_f(p0, i5, descr=floatarraydescr)
  1175. v20 = vec_cast_float_to_singlefloat(v18)
  1176. v21 = vec_pack_i(v19, v20, 2, 2)
  1177. vec_setarrayitem_raw(p1, i1, v21, descr=float32arraydescr)
  1178. jump(p0, p1, i50)
  1179. """
  1180. loop = self.parse_loop(ops)
  1181. vopt = self.vectorize(loop)
  1182. self.assert_equal(loop, self.parse_loop(opt))
  1183. def test_castup_arith_castdown(self):
  1184. trace = self.parse_loop("""
  1185. [p0,p1,p2,i0,i4]
  1186. i10 = raw_load_i(p0, i0, descr=float32arraydescr)
  1187. i1 = int_add(i0, 4)
  1188. i11 = raw_load_i(p1, i1, descr=float32arraydescr)
  1189. f1 = cast_singlefloat_to_float(i10)
  1190. f2 = cast_singlefloat_to_float(i11)
  1191. f3 = float_add(f1, f2)
  1192. i12 = cast_float_to_singlefloat(f3)
  1193. raw_store(p2, i4, i12, descr=float32arraydescr)
  1194. i5 = int_add(i4, 4)
  1195. i186 = int_lt(i5, 100)
  1196. guard_true(i186) []
  1197. jump(p0,p1,p2,i1,i5)
  1198. """)
  1199. trace_opt = self.parse_loop("""
  1200. [p0, p1, p2, i0, i4]
  1201. i5 = int_add(i4, 4)
  1202. i186 = int_lt(i5, 100)
  1203. i500 = int_add(i4, 16)
  1204. i501 = int_lt(i500, 100)
  1205. guard_true(i501) [p0, p1, p2, i0, i4]
  1206. i189 = int_add(i0, 4)
  1207. i187 = int_add(i4, 8)
  1208. i188 = int_lt(i187, 100)
  1209. i207 = int_add(i0, 8)
  1210. i196 = int_add(i4, 12)
  1211. i197 = int_lt(i196, 100)
  1212. i205 = int_add(i0, 12)
  1213. i400 = int_add(i4, 16)
  1214. i401= int_lt(i400, 100)
  1215. i402 = int_add(i0, 16)
  1216. v228[4xi32] = vec_raw_load_i(p0, i0, descr=float32arraydescr)
  1217. v229[2xf64] = vec_cast_singlefloat_to_float(v228)
  1218. v230 = vec_unpack_i(v228, 2, 2)
  1219. v231 = vec_cast_singlefloat_to_float(v230)
  1220. v232 = vec_raw_load_i(p1, i189, descr=float32arraydescr)
  1221. v233 = vec_cast_singlefloat_to_float(v232)
  1222. v236 = vec_float_add(v229, v233)
  1223. v238 = vec_cast_float_to_singlefloat(v236)
  1224. v234 = vec_unpack_i(v232, 2, 2)
  1225. v235 = vec_cast_singlefloat_to_float(v234)
  1226. v237 = vec_float_add(v231, v235)
  1227. v239 = vec_cast_float_to_singlefloat(v237)
  1228. v240 = vec_pack_i(v238, v239, 2, 2)
  1229. vec_raw_store(p2, i4, v240, descr=float32arraydescr)
  1230. jump(p0, p1, p2, i207, i500)
  1231. """)
  1232. vopt = self.vectorize(trace)
  1233. self.assert_equal(trace, trace_opt)
  1234. def test_sum_int16_prevent(self):
  1235. trace = self.parse_loop("""
  1236. [i0, p1, i2, p3, i4, i5, i6]
  1237. i7 = raw_load_i(i5, i4, descr=int16arraydescr)
  1238. i8 = int_add(i0, i7)
  1239. i10 = int_add(i2, 1)
  1240. i12 = int_add(i4, 2)
  1241. i13 = int_ge(i10, i6)
  1242. guard_false(i13, descr=<rpython.jit.metainterp.compile.ResumeGuardFalseDescr object at 0x7fe5a1848150>) [p3, i10, i8, i12, None, p1, None, None]
  1243. jump(i8, p1, i10, p3, i12, i5, i6)
  1244. """)
  1245. try:
  1246. vopt = self.vectorize(trace)
  1247. py.test.fail()
  1248. except NotAVectorizeableLoop:
  1249. pass
  1250. def test_pass(self):
  1251. trace = self.parse_loop("""
  1252. [p0,i0]
  1253. f0 = raw_load_f(p0, i0, descr=floatarraydescr)
  1254. f1 = float_mul(f0, 0.0)
  1255. i2 = float_eq(f1, f1)
  1256. guard_true(i2) [p0, i0]
  1257. f2 = call_f(0, f0)
  1258. f21 = float_mul(f2, 0.0)
  1259. i3 = float_eq(f21, f21)
  1260. guard_true(i3) [p0, i0]
  1261. raw_store(p0, i0, f21, descr=floatarraydescr)
  1262. i4 = int_add(i0, 8)
  1263. jump(p0, i4)
  1264. """)
  1265. vopt = self.schedule(trace)

Large files files are truncated, but you can click here to view the full file