PageRenderTime 51ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/rpython/jit/metainterp/test/test_vector.py

https://bitbucket.org/pypy/pypy/
Python | 548 lines | 507 code | 34 blank | 7 comment | 45 complexity | 6e2302fb6987979909a1c38397ddcee3 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. import py
  2. from rpython.jit.metainterp.warmspot import ll_meta_interp, get_stats
  3. from rpython.jit.metainterp.test.support import LLJitMixin
  4. from rpython.jit.codewriter.policy import StopAtXPolicy
  5. from rpython.jit.metainterp.resoperation import rop
  6. from rpython.jit.metainterp import history
  7. from rpython.rlib.jit import JitDriver, hint, set_param
  8. from rpython.rlib.objectmodel import compute_hash
  9. from rpython.rlib import rfloat
  10. from rpython.rtyper.lltypesystem import lltype, rffi
  11. from rpython.rlib.rarithmetic import r_uint, intmask
  12. from rpython.rlib.rawstorage import (alloc_raw_storage, raw_storage_setitem,
  13. free_raw_storage, raw_storage_getitem)
  14. from rpython.rlib.objectmodel import specialize, is_annotation_constant
  15. from rpython.jit.backend.detect_cpu import getcpuclass
  16. CPU = getcpuclass()
  17. if not CPU.vector_extension:
  18. py.test.skip("this cpu %s has no implemented vector backend" % CPU)
  19. @specialize.argtype(0,1)
  20. def malloc(T,n):
  21. return lltype.malloc(T, n, flavor='raw', zero=True)
  22. def free(mem):
  23. lltype.free(mem, flavor='raw')
  24. class VectorizeTests:
  25. enable_opts = 'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll'
  26. def setup_method(self, method):
  27. print "RUNNING", method.__name__
  28. def meta_interp(self, f, args, policy=None, vec=True, vec_all=False):
  29. return ll_meta_interp(f, args, enable_opts=self.enable_opts,
  30. policy=policy,
  31. CPUClass=self.CPUClass,
  32. type_system=self.type_system,
  33. vec=vec, vec_all=vec_all)
  34. @py.test.mark.parametrize('i',[3,4,5,6,7,8,9,50])
  35. def test_vectorize_simple_load_arith_store_int_add_index(self,i):
  36. myjitdriver = JitDriver(greens = [],
  37. reds = 'auto',
  38. vectorize=True)
  39. def f(d):
  40. bc = d*rffi.sizeof(rffi.SIGNED)
  41. va = alloc_raw_storage(bc, zero=True)
  42. vb = alloc_raw_storage(bc, zero=True)
  43. vc = alloc_raw_storage(bc, zero=True)
  44. x = 1
  45. for i in range(d):
  46. j = i*rffi.sizeof(rffi.SIGNED)
  47. raw_storage_setitem(va, j, rffi.cast(rffi.SIGNED,i))
  48. raw_storage_setitem(vb, j, rffi.cast(rffi.SIGNED,i))
  49. i = 0
  50. while i < bc:
  51. myjitdriver.jit_merge_point()
  52. a = raw_storage_getitem(rffi.SIGNED,va,i)
  53. b = raw_storage_getitem(rffi.SIGNED,vb,i)
  54. c = a+b
  55. raw_storage_setitem(vc, i, rffi.cast(rffi.SIGNED,c))
  56. i += 1*rffi.sizeof(rffi.SIGNED)
  57. res = 0
  58. for i in range(d):
  59. res += raw_storage_getitem(rffi.SIGNED,vc,i*rffi.sizeof(rffi.SIGNED))
  60. free_raw_storage(va)
  61. free_raw_storage(vb)
  62. free_raw_storage(vc)
  63. return res
  64. res = self.meta_interp(f, [i])
  65. assert res == f(i)
  66. @py.test.mark.parametrize('i',[1,2,3,8,17,128,130,131,142,143])
  67. def test_vectorize_array_get_set(self,i):
  68. myjitdriver = JitDriver(greens = [],
  69. reds = 'auto',
  70. vectorize=True)
  71. T = lltype.Array(rffi.INT, hints={'nolength': True})
  72. def f(d):
  73. i = 0
  74. va = lltype.malloc(T, d, flavor='raw', zero=True)
  75. vb = lltype.malloc(T, d, flavor='raw', zero=True)
  76. vc = lltype.malloc(T, d, flavor='raw', zero=True)
  77. for j in range(d):
  78. va[j] = rffi.r_int(j)
  79. vb[j] = rffi.r_int(j)
  80. while i < d:
  81. myjitdriver.jit_merge_point()
  82. a = va[i]
  83. b = vb[i]
  84. ec = intmask(a)+intmask(b)
  85. vc[i] = rffi.r_int(ec)
  86. i += 1
  87. res = 0
  88. for j in range(d):
  89. res += intmask(vc[j])
  90. lltype.free(va, flavor='raw')
  91. lltype.free(vb, flavor='raw')
  92. lltype.free(vc, flavor='raw')
  93. return res
  94. res = self.meta_interp(f, [i])
  95. assert res == f(i)
  96. @py.test.mark.parametrize('i',[1,2,3,4,9])
  97. def test_vector_register_too_small_vector(self, i):
  98. myjitdriver = JitDriver(greens = [],
  99. reds = 'auto',
  100. vectorize=True)
  101. T = lltype.Array(rffi.SHORT, hints={'nolength': True})
  102. def g(d, va, vb):
  103. i = 0
  104. while i < d:
  105. myjitdriver.jit_merge_point()
  106. a = va[i]
  107. b = vb[i]
  108. ec = intmask(a) + intmask(b)
  109. va[i] = rffi.r_short(ec)
  110. i += 1
  111. def f(d):
  112. i = 0
  113. va = lltype.malloc(T, d+100, flavor='raw', zero=True)
  114. vb = lltype.malloc(T, d+100, flavor='raw', zero=True)
  115. for j in range(d+100):
  116. va[j] = rffi.r_short(1)
  117. vb[j] = rffi.r_short(2)
  118. g(d+100, va, vb)
  119. g(d, va, vb) # this iteration might not fit into the vector register
  120. res = intmask(va[d])
  121. lltype.free(va, flavor='raw')
  122. lltype.free(vb, flavor='raw')
  123. return res
  124. res = self.meta_interp(f, [i])
  125. assert res == f(i) == 3
  126. def test_vectorize_max(self):
  127. myjitdriver = JitDriver(greens = [],
  128. reds = 'auto',
  129. vectorize=True)
  130. def fmax(v1, v2):
  131. return v1 if v1 >= v2 or rfloat.isnan(v2) else v2
  132. T = lltype.Array(rffi.DOUBLE, hints={'nolength': True})
  133. def f(d):
  134. i = 0
  135. va = lltype.malloc(T, d, flavor='raw', zero=True)
  136. for j in range(d):
  137. va[j] = float(j)
  138. va[13] = 128.0
  139. m = -128.0
  140. while i < d:
  141. myjitdriver.jit_merge_point()
  142. a = va[i]
  143. m = fmax(a, m)
  144. i += 1
  145. lltype.free(va, flavor='raw')
  146. return m
  147. res = self.meta_interp(f, [30])
  148. assert res == f(30) == 128
  149. @py.test.mark.parametrize('type,func,init,insert,at,count,breaks',
  150. # all
  151. [(rffi.DOUBLE, lambda x: not bool(x), 1.0, None, -1,32, False),
  152. (rffi.DOUBLE, lambda x: x == 0.0, 1.0, None, -1,33, False),
  153. (rffi.DOUBLE, lambda x: x == 0.0, 1.0, 0.0, 33,34, True),
  154. (rffi.DOUBLE, lambda x: x == 0.0, 1.0, 0.1, 4,34, False),
  155. (lltype.Signed, lambda x: not bool(x), 1, None, -1,32, False),
  156. (lltype.Signed, lambda x: not bool(x), 1, 0, 14,32, True),
  157. (lltype.Signed, lambda x: not bool(x), 1, 0, 15,31, True),
  158. (lltype.Signed, lambda x: not bool(x), 1, 0, 4,30, True),
  159. (lltype.Signed, lambda x: x == 0, 1, None, -1,33, False),
  160. (lltype.Signed, lambda x: x == 0, 1, 0, 33,34, True),
  161. # any
  162. (rffi.DOUBLE, lambda x: x != 0.0, 0.0, 1.0, 33,35, True),
  163. (rffi.DOUBLE, lambda x: x != 0.0, 0.0, 1.0, -1,36, False),
  164. (rffi.DOUBLE, lambda x: bool(x), 0.0, 1.0, 33,37, True),
  165. (rffi.DOUBLE, lambda x: bool(x), 0.0, 1.0, -1,38, False),
  166. (lltype.Signed, lambda x: x != 0, 0, 1, 33,35, True),
  167. (lltype.Signed, lambda x: x != 0, 0, 1, -1,36, False),
  168. (lltype.Signed, lambda x: bool(x), 0, 1, 33,37, True),
  169. (lltype.Signed, lambda x: bool(x), 0, 1, -1,38, False),
  170. (rffi.INT, lambda x: intmask(x) != 0, rffi.r_int(0), rffi.r_int(1), 33,35, True),
  171. (rffi.INT, lambda x: intmask(x) != 0, rffi.r_int(0), rffi.r_int(1), -1,36, False),
  172. (rffi.INT, lambda x: bool(intmask(x)), rffi.r_int(0), rffi.r_int(1), 33,37, True),
  173. (rffi.INT, lambda x: bool(intmask(x)), rffi.r_int(0), rffi.r_int(1), -1,38, False),
  174. ])
  175. def test_bool_reduction(self, type, func, init, insert, at, count, breaks):
  176. myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True)
  177. T = lltype.Array(type, hints={'nolength': True})
  178. def f(d):
  179. va = lltype.malloc(T, d, flavor='raw', zero=True)
  180. for i in range(d): va[i] = init
  181. if at != -1:
  182. va[at] = insert
  183. i = 0 ; nobreak = False
  184. while i < d:
  185. myjitdriver.jit_merge_point()
  186. b = func(va[i])
  187. if b:
  188. assert b
  189. break
  190. i += 1
  191. else:
  192. nobreak = True
  193. lltype.free(va, flavor='raw')
  194. return not nobreak
  195. res = self.meta_interp(f, [count])
  196. assert res == f(count) == breaks
  197. def test_sum(self):
  198. myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True)
  199. T = lltype.Array(rffi.DOUBLE, hints={'nolength': True})
  200. def f(d):
  201. va = lltype.malloc(T, d, flavor='raw', zero=True)
  202. for j in range(d):
  203. va[j] = float(j)
  204. i = 0
  205. accum = 0
  206. while i < d:
  207. myjitdriver.jit_merge_point()
  208. accum += va[i]
  209. i += 1
  210. lltype.free(va, flavor='raw')
  211. return accum
  212. res = self.meta_interp(f, [60])
  213. assert res == f(60) == sum(range(60))
  214. def test_constant_expand(self):
  215. myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True)
  216. T = lltype.Array(rffi.DOUBLE, hints={'nolength': True})
  217. def f(d):
  218. va = lltype.malloc(T, d, flavor='raw', zero=True)
  219. i = 0
  220. while i < d:
  221. myjitdriver.jit_merge_point()
  222. va[i] = va[i] + 34.5
  223. i += 1
  224. val = va[0]
  225. lltype.free(va, flavor='raw')
  226. return val
  227. res = self.meta_interp(f, [60])
  228. assert res == f(60) == 34.5
  229. def test_constant_expand_vec_all(self):
  230. myjitdriver = JitDriver(greens = [], reds = 'auto')
  231. T = lltype.Array(rffi.DOUBLE, hints={'nolength': True})
  232. def f(d):
  233. va = lltype.malloc(T, d, flavor='raw', zero=True)
  234. i = 0
  235. while i < d:
  236. myjitdriver.jit_merge_point()
  237. if not (i < d):
  238. raise IndexError
  239. va[i] = va[i] + 34.5
  240. i += 1
  241. val = va[0]
  242. lltype.free(va, flavor='raw')
  243. return val
  244. res = self.meta_interp(f, [60], vec_all=True)
  245. assert res == f(60) == 34.5
  246. def test_variable_expand(self):
  247. myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True)
  248. T = lltype.Array(rffi.DOUBLE, hints={'nolength': True})
  249. def f(d,variable):
  250. va = lltype.malloc(T, d, flavor='raw', zero=True)
  251. i = 0
  252. while i < d:
  253. myjitdriver.jit_merge_point()
  254. va[i] = va[i] + variable
  255. i += 1
  256. val = va[0]
  257. lltype.free(va, flavor='raw')
  258. return val
  259. res = self.meta_interp(f, [60,58.4547])
  260. assert res == f(60,58.4547) == 58.4547
  261. @py.test.mark.parametrize('vec,vec_all',[(False,True),(True,False),(True,True),(False,False)])
  262. def test_accum(self, vec, vec_all):
  263. myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=vec)
  264. T = lltype.Array(rffi.DOUBLE)
  265. def f(d, value):
  266. va = lltype.malloc(T, d, flavor='raw', zero=True)
  267. for i in range(d):
  268. va[i] = value
  269. r = 0
  270. i = 0
  271. k = d + 2
  272. # in this case a guard k <= d is inserted which fails right away!
  273. while i < d:
  274. myjitdriver.jit_merge_point()
  275. if not(i < k):
  276. k -= 1
  277. r += va[i]
  278. i += 1
  279. lltype.free(va, flavor='raw')
  280. return r
  281. res = self.meta_interp(f, [60,0.5], vec=vec, vec_all=vec_all)
  282. assert res == f(60,0.5) == 60*0.5
  283. @py.test.mark.parametrize('i',[15])
  284. def test_array_bounds_check_elimination(self,i):
  285. myjitdriver = JitDriver(greens = [],
  286. reds = 'auto',
  287. vectorize=True)
  288. T = lltype.Array(rffi.INT, hints={'nolength': True})
  289. def f(d):
  290. va = lltype.malloc(T, d, flavor='raw', zero=True)
  291. vb = lltype.malloc(T, d, flavor='raw', zero=True)
  292. for j in range(d):
  293. va[j] = rffi.r_int(j)
  294. vb[j] = rffi.r_int(j)
  295. i = 0
  296. while i < d:
  297. myjitdriver.jit_merge_point()
  298. if i < 0:
  299. raise IndexError
  300. if i >= d:
  301. raise IndexError
  302. a = va[i]
  303. if i < 0:
  304. raise IndexError
  305. if i >= d:
  306. raise IndexError
  307. b = vb[i]
  308. ec = intmask(a)+intmask(b)
  309. if i < 0:
  310. raise IndexError
  311. if i >= d:
  312. raise IndexError
  313. va[i] = rffi.r_int(ec)
  314. i += 1
  315. lltype.free(va, flavor='raw')
  316. lltype.free(vb, flavor='raw')
  317. return 0
  318. res = self.meta_interp(f, [i])
  319. assert res == f(i)
  320. @py.test.mark.parametrize('i,v1,v2',[(25,2.5,0.3),(25,2.5,0.3)])
  321. def test_list_vectorize(self,i,v1,v2):
  322. myjitdriver = JitDriver(greens = [],
  323. reds = 'auto')
  324. class ListF(object):
  325. def __init__(self, size, init):
  326. self.list = [init] * size
  327. def __getitem__(self, key):
  328. return self.list[key]
  329. def __setitem__(self, key, value):
  330. self.list[key] = value
  331. def f(d, v1, v2):
  332. a = ListF(d, v1)
  333. b = ListF(d, v2)
  334. i = 0
  335. while i < d:
  336. myjitdriver.jit_merge_point()
  337. a[i] = a[i] + b[i]
  338. i += 1
  339. s = 0
  340. for i in range(d):
  341. s += a[i]
  342. return s
  343. res = self.meta_interp(f, [i,v1,v2], vec_all=True)
  344. # sum helps to generate the rounding error of floating points
  345. # return 69.999 ... instead of 70, (v1+v2)*i == 70.0
  346. assert res == f(i,v1,v2) == sum([v1+v2]*i)
  347. @py.test.mark.parametrize('size',[12])
  348. def test_body_multiple_accesses(self, size):
  349. myjitdriver = JitDriver(greens = [], reds = 'auto')
  350. T = lltype.Array(rffi.CHAR, hints={'nolength': True})
  351. def f(size):
  352. vector_a = malloc(T, size)
  353. vector_b = malloc(T, size)
  354. i = 0
  355. while i < size:
  356. myjitdriver.jit_merge_point()
  357. # should unroll and group them correctly
  358. c1 = vector_a[i]
  359. c2 = vector_a[i+1]
  360. c3 = vector_a[i+2]
  361. #
  362. vector_b[i] = c1
  363. vector_b[i+1] = c2
  364. vector_b[i+2] = c3
  365. i += 3
  366. free(vector_a)
  367. free(vector_b)
  368. return 0
  369. res = self.meta_interp(f, [size], vec_all=True)
  370. assert res == f(size)
  371. def test_max_byte(self):
  372. myjitdriver = JitDriver(greens = [], reds = 'auto')
  373. T = lltype.Array(rffi.SIGNEDCHAR, hints={'nolength': True})
  374. def f(size):
  375. vector_a = malloc(T, size)
  376. for i in range(size):
  377. vector_a[i] = rffi.r_signedchar(1)
  378. for i in range(size/2,size):
  379. vector_a[i] = rffi.r_signedchar(i)
  380. i = 0
  381. max = -127
  382. while i < size:
  383. myjitdriver.jit_merge_point()
  384. a = intmask(vector_a[i])
  385. a = a & 255
  386. if a > max:
  387. max = a
  388. i += 1
  389. free(vector_a)
  390. return max
  391. res = self.meta_interp(f, [128], vec_all=True)
  392. assert res == f(128)
  393. def combinations(types, operators):
  394. import itertools
  395. size = 22
  396. class Typ(object):
  397. def __init__(self, type, storecast, loadcast):
  398. self.type = type
  399. self.storecast = storecast
  400. self.loadcast = loadcast
  401. def __repr__(self):
  402. return self.type.replace(".","_")
  403. sizes = [22]
  404. for t1, t2, t3, op, size in itertools.product(types, types, types, operators, sizes):
  405. yield (size, Typ(*t1), Typ(*t2), Typ(*t3), op[0], op[1])
  406. types = [('rffi.DOUBLE', 'float', 'float'),
  407. ('rffi.SIGNED', 'int', 'int'),
  408. ('rffi.FLOAT', 'rffi.r_singlefloat', 'float'),
  409. ]
  410. operators = [('add', '+'),
  411. ]
  412. for size, typ1, typ2, typ3, opname, op in combinations(types, operators):
  413. _source = """
  414. def test_binary_operations_{name}(self):
  415. myjitdriver = JitDriver(greens = [], reds = 'auto')
  416. T1 = lltype.Array({type_a}, hints={{'nolength': True}})
  417. T2 = lltype.Array({type_b}, hints={{'nolength': True}})
  418. T3 = lltype.Array({type_c}, hints={{'nolength': True}})
  419. def f(size):
  420. vector_a = lltype.malloc(T1, size, flavor='raw')
  421. vector_b = lltype.malloc(T2, size, flavor='raw')
  422. vector_c = lltype.malloc(T3, size, flavor='raw')
  423. for i in range(size):
  424. vector_a[i] = {type_a_storecast}(i+1)
  425. for i in range(size):
  426. vector_b[i] = {type_b_storecast}(i+1)
  427. for i in range(size):
  428. vector_c[i] = {type_c_storecast}(i+1)
  429. i = 0
  430. while i < size:
  431. myjitdriver.jit_merge_point()
  432. a = {type_a_loadcast}(vector_a[i])
  433. b = {type_b_loadcast}(vector_b[i])
  434. c = (a {op} b)
  435. vector_c[i] = {type_c_storecast}(c)
  436. i += 1
  437. lltype.free(vector_a, flavor='raw')
  438. lltype.free(vector_b, flavor='raw')
  439. c = {type_c_loadcast}(0.0)
  440. for i in range(size):
  441. c += {type_c_loadcast}(vector_c[i])
  442. lltype.free(vector_c, flavor='raw')
  443. return c
  444. res = self.meta_interp(f, [{size}], vec_all=True)
  445. assert res == f({size})
  446. """
  447. env = {
  448. 'type_a': typ1.type,
  449. 'type_b': typ2.type,
  450. 'type_c': typ3.type,
  451. 'type_a_loadcast': typ1.loadcast,
  452. 'type_b_loadcast': typ2.loadcast,
  453. 'type_c_loadcast': typ3.loadcast,
  454. 'type_a_storecast': typ1.storecast,
  455. 'type_b_storecast': typ2.storecast,
  456. 'type_c_storecast': typ3.storecast,
  457. 'size': size,
  458. 'name': str(typ1) + '__' + str(typ2) + '__' + str(typ3) + \
  459. '__' + str(size) + '__' + opname,
  460. 'op': op,
  461. }
  462. formatted = _source.format(**env)
  463. exec py.code.Source(formatted).compile()
  464. def test_binary_operations_aa(self):
  465. myjitdriver = JitDriver(greens = [], reds = 'auto')
  466. T1 = lltype.Array(rffi.DOUBLE, hints={'nolength': True})
  467. T3 = lltype.Array(rffi.SIGNED, hints={'nolength': True})
  468. def f(size):
  469. vector_a = lltype.malloc(T1, size, flavor='raw', zero=True)
  470. vector_b = lltype.malloc(T1, size, flavor='raw', zero=True)
  471. vector_c = lltype.malloc(T3, size, flavor='raw', zero=True)
  472. i = 0
  473. while i < size:
  474. myjitdriver.jit_merge_point()
  475. a = (vector_a[i])
  476. b = (vector_b[i])
  477. c = (a + b)
  478. vector_c[i] = int(c)
  479. i += 1
  480. free(vector_a)
  481. free(vector_b)
  482. #c = 0.0
  483. #for i in range(size):
  484. # c += vector_c[i]
  485. lltype.free(vector_c, flavor='raw')
  486. return 0
  487. res = self.meta_interp(f, [22], vec_all=True)
  488. assert res == f(22)
  489. def test_guard_test_location_assert(self):
  490. myjitdriver = JitDriver(greens = [], reds = 'auto')
  491. T1 = lltype.Array(rffi.SIGNED, hints={'nolength': True})
  492. def f(size):
  493. vector_a = lltype.malloc(T1, size, flavor='raw', zero=True)
  494. for i in range(size):
  495. vector_a[i] = 0
  496. i = 0
  497. breaks = 0
  498. while i < size:
  499. myjitdriver.jit_merge_point()
  500. a = vector_a[i]
  501. if a:
  502. breaks = 1
  503. break
  504. del a
  505. i += 1
  506. lltype.free(vector_a, flavor='raw')
  507. return breaks
  508. res = self.meta_interp(f, [22], vec_all=True, vec_guard_ratio=5)
  509. assert res == f(22)
  510. class TestLLtype(LLJitMixin, VectorizeTests):
  511. pass