/media/libvpx/vp8/common/x86/subpixel_mmx.asm

http://github.com/zpao/v8monkey · Assembly · 727 lines · 466 code · 181 blank · 80 comment · 0 complexity · c5007295dc4543de92fdd3602b3e30ec MD5 · raw file

  1. ;
  2. ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license
  5. ; that can be found in the LICENSE file in the root of the source
  6. ; tree. An additional intellectual property rights grant can be found
  7. ; in the file PATENTS. All contributing project authors may
  8. ; be found in the AUTHORS file in the root of the source tree.
  9. ;
  10. %include "vpx_ports/x86_abi_support.asm"
  11. %define BLOCK_HEIGHT_WIDTH 4
  12. %define vp8_filter_weight 128
  13. %define VP8_FILTER_SHIFT 7
  14. ;void vp8_filter_block1d_h6_mmx
  15. ;(
  16. ; unsigned char *src_ptr,
  17. ; unsigned short *output_ptr,
  18. ; unsigned int src_pixels_per_line,
  19. ; unsigned int pixel_step,
  20. ; unsigned int output_height,
  21. ; unsigned int output_width,
  22. ; short * vp8_filter
  23. ;)
  24. global sym(vp8_filter_block1d_h6_mmx)
  25. sym(vp8_filter_block1d_h6_mmx):
  26. push rbp
  27. mov rbp, rsp
  28. SHADOW_ARGS_TO_STACK 7
  29. GET_GOT rbx
  30. push rsi
  31. push rdi
  32. ; end prolog
  33. mov rdx, arg(6) ;vp8_filter
  34. movq mm1, [rdx + 16] ; do both the negative taps first!!!
  35. movq mm2, [rdx + 32] ;
  36. movq mm6, [rdx + 48] ;
  37. movq mm7, [rdx + 64] ;
  38. mov rdi, arg(1) ;output_ptr
  39. mov rsi, arg(0) ;src_ptr
  40. movsxd rcx, dword ptr arg(4) ;output_height
  41. movsxd rax, dword ptr arg(5) ;output_width ; destination pitch?
  42. pxor mm0, mm0 ; mm0 = 00000000
  43. nextrow:
  44. movq mm3, [rsi-2] ; mm3 = p-2..p5
  45. movq mm4, mm3 ; mm4 = p-2..p5
  46. psrlq mm3, 8 ; mm3 = p-1..p5
  47. punpcklbw mm3, mm0 ; mm3 = p-1..p2
  48. pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers.
  49. movq mm5, mm4 ; mm5 = p-2..p5
  50. punpckhbw mm4, mm0 ; mm5 = p2..p5
  51. pmullw mm4, mm7 ; mm5 *= kernel 4 modifiers
  52. paddsw mm3, mm4 ; mm3 += mm5
  53. movq mm4, mm5 ; mm4 = p-2..p5;
  54. psrlq mm5, 16 ; mm5 = p0..p5;
  55. punpcklbw mm5, mm0 ; mm5 = p0..p3
  56. pmullw mm5, mm2 ; mm5 *= kernel 2 modifiers
  57. paddsw mm3, mm5 ; mm3 += mm5
  58. movq mm5, mm4 ; mm5 = p-2..p5
  59. psrlq mm4, 24 ; mm4 = p1..p5
  60. punpcklbw mm4, mm0 ; mm4 = p1..p4
  61. pmullw mm4, mm6 ; mm5 *= kernel 3 modifiers
  62. paddsw mm3, mm4 ; mm3 += mm5
  63. ; do outer positive taps
  64. movd mm4, [rsi+3]
  65. punpcklbw mm4, mm0 ; mm5 = p3..p6
  66. pmullw mm4, [rdx+80] ; mm5 *= kernel 0 modifiers
  67. paddsw mm3, mm4 ; mm3 += mm5
  68. punpcklbw mm5, mm0 ; mm5 = p-2..p1
  69. pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers
  70. paddsw mm3, mm5 ; mm3 += mm5
  71. paddsw mm3, [GLOBAL(rd)] ; mm3 += round value
  72. psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128
  73. packuswb mm3, mm0 ; pack and unpack to saturate
  74. punpcklbw mm3, mm0 ;
  75. movq [rdi], mm3 ; store the results in the destination
  76. %if ABI_IS_32BIT
  77. add rsi, dword ptr arg(2) ;src_pixels_per_line ; next line
  78. add rdi, rax;
  79. %else
  80. movsxd r8, dword ptr arg(2) ;src_pixels_per_line
  81. add rdi, rax;
  82. add rsi, r8 ; next line
  83. %endif
  84. dec rcx ; decrement count
  85. jnz nextrow ; next row
  86. ; begin epilog
  87. pop rdi
  88. pop rsi
  89. RESTORE_GOT
  90. UNSHADOW_ARGS
  91. pop rbp
  92. ret
  93. ;void vp8_filter_block1dc_v6_mmx
  94. ;(
  95. ; short *src_ptr,
  96. ; unsigned char *output_ptr,
  97. ; int output_pitch,
  98. ; unsigned int pixels_per_line,
  99. ; unsigned int pixel_step,
  100. ; unsigned int output_height,
  101. ; unsigned int output_width,
  102. ; short * vp8_filter
  103. ;)
  104. global sym(vp8_filter_block1dc_v6_mmx)
  105. sym(vp8_filter_block1dc_v6_mmx):
  106. push rbp
  107. mov rbp, rsp
  108. SHADOW_ARGS_TO_STACK 8
  109. GET_GOT rbx
  110. push rsi
  111. push rdi
  112. ; end prolog
  113. movq mm5, [GLOBAL(rd)]
  114. push rbx
  115. mov rbx, arg(7) ;vp8_filter
  116. movq mm1, [rbx + 16] ; do both the negative taps first!!!
  117. movq mm2, [rbx + 32] ;
  118. movq mm6, [rbx + 48] ;
  119. movq mm7, [rbx + 64] ;
  120. movsxd rdx, dword ptr arg(3) ;pixels_per_line
  121. mov rdi, arg(1) ;output_ptr
  122. mov rsi, arg(0) ;src_ptr
  123. sub rsi, rdx
  124. sub rsi, rdx
  125. movsxd rcx, DWORD PTR arg(5) ;output_height
  126. movsxd rax, DWORD PTR arg(2) ;output_pitch ; destination pitch?
  127. pxor mm0, mm0 ; mm0 = 00000000
  128. nextrow_cv:
  129. movq mm3, [rsi+rdx] ; mm3 = p0..p8 = row -1
  130. pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers.
  131. movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 2
  132. pmullw mm4, mm7 ; mm4 *= kernel 4 modifiers.
  133. paddsw mm3, mm4 ; mm3 += mm4
  134. movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 0
  135. pmullw mm4, mm2 ; mm4 *= kernel 2 modifiers.
  136. paddsw mm3, mm4 ; mm3 += mm4
  137. movq mm4, [rsi] ; mm4 = p0..p3 = row -2
  138. pmullw mm4, [rbx] ; mm4 *= kernel 0 modifiers.
  139. paddsw mm3, mm4 ; mm3 += mm4
  140. add rsi, rdx ; move source forward 1 line to avoid 3 * pitch
  141. movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 1
  142. pmullw mm4, mm6 ; mm4 *= kernel 3 modifiers.
  143. paddsw mm3, mm4 ; mm3 += mm4
  144. movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 3
  145. pmullw mm4, [rbx +80] ; mm4 *= kernel 3 modifiers.
  146. paddsw mm3, mm4 ; mm3 += mm4
  147. paddsw mm3, mm5 ; mm3 += round value
  148. psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128
  149. packuswb mm3, mm0 ; pack and saturate
  150. movd [rdi],mm3 ; store the results in the destination
  151. ; the subsequent iterations repeat 3 out of 4 of these reads. Since the
  152. ; recon block should be in cache this shouldn't cost much. Its obviously
  153. ; avoidable!!!.
  154. lea rdi, [rdi+rax] ;
  155. dec rcx ; decrement count
  156. jnz nextrow_cv ; next row
  157. pop rbx
  158. ; begin epilog
  159. pop rdi
  160. pop rsi
  161. RESTORE_GOT
  162. UNSHADOW_ARGS
  163. pop rbp
  164. ret
  165. ;void bilinear_predict8x8_mmx
  166. ;(
  167. ; unsigned char *src_ptr,
  168. ; int src_pixels_per_line,
  169. ; int xoffset,
  170. ; int yoffset,
  171. ; unsigned char *dst_ptr,
  172. ; int dst_pitch
  173. ;)
  174. global sym(vp8_bilinear_predict8x8_mmx)
  175. sym(vp8_bilinear_predict8x8_mmx):
  176. push rbp
  177. mov rbp, rsp
  178. SHADOW_ARGS_TO_STACK 6
  179. GET_GOT rbx
  180. push rsi
  181. push rdi
  182. ; end prolog
  183. ;const short *HFilter = bilinear_filters_mmx[xoffset];
  184. ;const short *VFilter = bilinear_filters_mmx[yoffset];
  185. movsxd rax, dword ptr arg(2) ;xoffset
  186. mov rdi, arg(4) ;dst_ptr ;
  187. shl rax, 5 ; offset * 32
  188. lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
  189. add rax, rcx ; HFilter
  190. mov rsi, arg(0) ;src_ptr ;
  191. movsxd rdx, dword ptr arg(5) ;dst_pitch
  192. movq mm1, [rax] ;
  193. movq mm2, [rax+16] ;
  194. movsxd rax, dword ptr arg(3) ;yoffset
  195. pxor mm0, mm0 ;
  196. shl rax, 5 ; offset*32
  197. add rax, rcx ; VFilter
  198. lea rcx, [rdi+rdx*8] ;
  199. movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
  200. ; get the first horizontal line done ;
  201. movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
  202. movq mm4, mm3 ; make a copy of current line
  203. punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
  204. punpckhbw mm4, mm0 ;
  205. pmullw mm3, mm1 ;
  206. pmullw mm4, mm1 ;
  207. movq mm5, [rsi+1] ;
  208. movq mm6, mm5 ;
  209. punpcklbw mm5, mm0 ;
  210. punpckhbw mm6, mm0 ;
  211. pmullw mm5, mm2 ;
  212. pmullw mm6, mm2 ;
  213. paddw mm3, mm5 ;
  214. paddw mm4, mm6 ;
  215. paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
  216. psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
  217. paddw mm4, [GLOBAL(rd)] ;
  218. psraw mm4, VP8_FILTER_SHIFT ;
  219. movq mm7, mm3 ;
  220. packuswb mm7, mm4 ;
  221. add rsi, rdx ; next line
  222. next_row_8x8:
  223. movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
  224. movq mm4, mm3 ; make a copy of current line
  225. punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
  226. punpckhbw mm4, mm0 ;
  227. pmullw mm3, mm1 ;
  228. pmullw mm4, mm1 ;
  229. movq mm5, [rsi+1] ;
  230. movq mm6, mm5 ;
  231. punpcklbw mm5, mm0 ;
  232. punpckhbw mm6, mm0 ;
  233. pmullw mm5, mm2 ;
  234. pmullw mm6, mm2 ;
  235. paddw mm3, mm5 ;
  236. paddw mm4, mm6 ;
  237. movq mm5, mm7 ;
  238. movq mm6, mm7 ;
  239. punpcklbw mm5, mm0 ;
  240. punpckhbw mm6, mm0
  241. pmullw mm5, [rax] ;
  242. pmullw mm6, [rax] ;
  243. paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
  244. psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
  245. paddw mm4, [GLOBAL(rd)] ;
  246. psraw mm4, VP8_FILTER_SHIFT ;
  247. movq mm7, mm3 ;
  248. packuswb mm7, mm4 ;
  249. pmullw mm3, [rax+16] ;
  250. pmullw mm4, [rax+16] ;
  251. paddw mm3, mm5 ;
  252. paddw mm4, mm6 ;
  253. paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
  254. psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
  255. paddw mm4, [GLOBAL(rd)] ;
  256. psraw mm4, VP8_FILTER_SHIFT ;
  257. packuswb mm3, mm4
  258. movq [rdi], mm3 ; store the results in the destination
  259. %if ABI_IS_32BIT
  260. add rsi, rdx ; next line
  261. add rdi, dword ptr arg(5) ;dst_pitch ;
  262. %else
  263. movsxd r8, dword ptr arg(5) ;dst_pitch
  264. add rsi, rdx ; next line
  265. add rdi, r8 ;dst_pitch
  266. %endif
  267. cmp rdi, rcx ;
  268. jne next_row_8x8
  269. ; begin epilog
  270. pop rdi
  271. pop rsi
  272. RESTORE_GOT
  273. UNSHADOW_ARGS
  274. pop rbp
  275. ret
  276. ;void bilinear_predict8x4_mmx
  277. ;(
  278. ; unsigned char *src_ptr,
  279. ; int src_pixels_per_line,
  280. ; int xoffset,
  281. ; int yoffset,
  282. ; unsigned char *dst_ptr,
  283. ; int dst_pitch
  284. ;)
  285. global sym(vp8_bilinear_predict8x4_mmx)
  286. sym(vp8_bilinear_predict8x4_mmx):
  287. push rbp
  288. mov rbp, rsp
  289. SHADOW_ARGS_TO_STACK 6
  290. GET_GOT rbx
  291. push rsi
  292. push rdi
  293. ; end prolog
  294. ;const short *HFilter = bilinear_filters_mmx[xoffset];
  295. ;const short *VFilter = bilinear_filters_mmx[yoffset];
  296. movsxd rax, dword ptr arg(2) ;xoffset
  297. mov rdi, arg(4) ;dst_ptr ;
  298. lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
  299. shl rax, 5
  300. mov rsi, arg(0) ;src_ptr ;
  301. add rax, rcx
  302. movsxd rdx, dword ptr arg(5) ;dst_pitch
  303. movq mm1, [rax] ;
  304. movq mm2, [rax+16] ;
  305. movsxd rax, dword ptr arg(3) ;yoffset
  306. pxor mm0, mm0 ;
  307. shl rax, 5
  308. add rax, rcx
  309. lea rcx, [rdi+rdx*4] ;
  310. movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
  311. ; get the first horizontal line done ;
  312. movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
  313. movq mm4, mm3 ; make a copy of current line
  314. punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
  315. punpckhbw mm4, mm0 ;
  316. pmullw mm3, mm1 ;
  317. pmullw mm4, mm1 ;
  318. movq mm5, [rsi+1] ;
  319. movq mm6, mm5 ;
  320. punpcklbw mm5, mm0 ;
  321. punpckhbw mm6, mm0 ;
  322. pmullw mm5, mm2 ;
  323. pmullw mm6, mm2 ;
  324. paddw mm3, mm5 ;
  325. paddw mm4, mm6 ;
  326. paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
  327. psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
  328. paddw mm4, [GLOBAL(rd)] ;
  329. psraw mm4, VP8_FILTER_SHIFT ;
  330. movq mm7, mm3 ;
  331. packuswb mm7, mm4 ;
  332. add rsi, rdx ; next line
  333. next_row_8x4:
  334. movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
  335. movq mm4, mm3 ; make a copy of current line
  336. punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
  337. punpckhbw mm4, mm0 ;
  338. pmullw mm3, mm1 ;
  339. pmullw mm4, mm1 ;
  340. movq mm5, [rsi+1] ;
  341. movq mm6, mm5 ;
  342. punpcklbw mm5, mm0 ;
  343. punpckhbw mm6, mm0 ;
  344. pmullw mm5, mm2 ;
  345. pmullw mm6, mm2 ;
  346. paddw mm3, mm5 ;
  347. paddw mm4, mm6 ;
  348. movq mm5, mm7 ;
  349. movq mm6, mm7 ;
  350. punpcklbw mm5, mm0 ;
  351. punpckhbw mm6, mm0
  352. pmullw mm5, [rax] ;
  353. pmullw mm6, [rax] ;
  354. paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
  355. psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
  356. paddw mm4, [GLOBAL(rd)] ;
  357. psraw mm4, VP8_FILTER_SHIFT ;
  358. movq mm7, mm3 ;
  359. packuswb mm7, mm4 ;
  360. pmullw mm3, [rax+16] ;
  361. pmullw mm4, [rax+16] ;
  362. paddw mm3, mm5 ;
  363. paddw mm4, mm6 ;
  364. paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
  365. psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
  366. paddw mm4, [GLOBAL(rd)] ;
  367. psraw mm4, VP8_FILTER_SHIFT ;
  368. packuswb mm3, mm4
  369. movq [rdi], mm3 ; store the results in the destination
  370. %if ABI_IS_32BIT
  371. add rsi, rdx ; next line
  372. add rdi, dword ptr arg(5) ;dst_pitch ;
  373. %else
  374. movsxd r8, dword ptr arg(5) ;dst_pitch
  375. add rsi, rdx ; next line
  376. add rdi, r8
  377. %endif
  378. cmp rdi, rcx ;
  379. jne next_row_8x4
  380. ; begin epilog
  381. pop rdi
  382. pop rsi
  383. RESTORE_GOT
  384. UNSHADOW_ARGS
  385. pop rbp
  386. ret
  387. ;void bilinear_predict4x4_mmx
  388. ;(
  389. ; unsigned char *src_ptr,
  390. ; int src_pixels_per_line,
  391. ; int xoffset,
  392. ; int yoffset,
  393. ; unsigned char *dst_ptr,
  394. ; int dst_pitch
  395. ;)
  396. global sym(vp8_bilinear_predict4x4_mmx)
  397. sym(vp8_bilinear_predict4x4_mmx):
  398. push rbp
  399. mov rbp, rsp
  400. SHADOW_ARGS_TO_STACK 6
  401. GET_GOT rbx
  402. push rsi
  403. push rdi
  404. ; end prolog
  405. ;const short *HFilter = bilinear_filters_mmx[xoffset];
  406. ;const short *VFilter = bilinear_filters_mmx[yoffset];
  407. movsxd rax, dword ptr arg(2) ;xoffset
  408. mov rdi, arg(4) ;dst_ptr ;
  409. lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
  410. shl rax, 5
  411. add rax, rcx ; HFilter
  412. mov rsi, arg(0) ;src_ptr ;
  413. movsxd rdx, dword ptr arg(5) ;ldst_pitch
  414. movq mm1, [rax] ;
  415. movq mm2, [rax+16] ;
  416. movsxd rax, dword ptr arg(3) ;yoffset
  417. pxor mm0, mm0 ;
  418. shl rax, 5
  419. add rax, rcx
  420. lea rcx, [rdi+rdx*4] ;
  421. movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
  422. ; get the first horizontal line done ;
  423. movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
  424. punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
  425. pmullw mm3, mm1 ;
  426. movd mm5, [rsi+1] ;
  427. punpcklbw mm5, mm0 ;
  428. pmullw mm5, mm2 ;
  429. paddw mm3, mm5 ;
  430. paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
  431. psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
  432. movq mm7, mm3 ;
  433. packuswb mm7, mm0 ;
  434. add rsi, rdx ; next line
  435. next_row_4x4:
  436. movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
  437. punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
  438. pmullw mm3, mm1 ;
  439. movd mm5, [rsi+1] ;
  440. punpcklbw mm5, mm0 ;
  441. pmullw mm5, mm2 ;
  442. paddw mm3, mm5 ;
  443. movq mm5, mm7 ;
  444. punpcklbw mm5, mm0 ;
  445. pmullw mm5, [rax] ;
  446. paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
  447. psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
  448. movq mm7, mm3 ;
  449. packuswb mm7, mm0 ;
  450. pmullw mm3, [rax+16] ;
  451. paddw mm3, mm5 ;
  452. paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
  453. psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
  454. packuswb mm3, mm0
  455. movd [rdi], mm3 ; store the results in the destination
  456. %if ABI_IS_32BIT
  457. add rsi, rdx ; next line
  458. add rdi, dword ptr arg(5) ;dst_pitch ;
  459. %else
  460. movsxd r8, dword ptr arg(5) ;dst_pitch ;
  461. add rsi, rdx ; next line
  462. add rdi, r8
  463. %endif
  464. cmp rdi, rcx ;
  465. jne next_row_4x4
  466. ; begin epilog
  467. pop rdi
  468. pop rsi
  469. RESTORE_GOT
  470. UNSHADOW_ARGS
  471. pop rbp
  472. ret
  473. SECTION_RODATA
  474. align 16
  475. rd:
  476. times 4 dw 0x40
  477. align 16
  478. global HIDDEN_DATA(sym(vp8_six_tap_mmx))
  479. sym(vp8_six_tap_mmx):
  480. times 8 dw 0
  481. times 8 dw 0
  482. times 8 dw 128
  483. times 8 dw 0
  484. times 8 dw 0
  485. times 8 dw 0
  486. times 8 dw 0
  487. times 8 dw -6
  488. times 8 dw 123
  489. times 8 dw 12
  490. times 8 dw -1
  491. times 8 dw 0
  492. times 8 dw 2
  493. times 8 dw -11
  494. times 8 dw 108
  495. times 8 dw 36
  496. times 8 dw -8
  497. times 8 dw 1
  498. times 8 dw 0
  499. times 8 dw -9
  500. times 8 dw 93
  501. times 8 dw 50
  502. times 8 dw -6
  503. times 8 dw 0
  504. times 8 dw 3
  505. times 8 dw -16
  506. times 8 dw 77
  507. times 8 dw 77
  508. times 8 dw -16
  509. times 8 dw 3
  510. times 8 dw 0
  511. times 8 dw -6
  512. times 8 dw 50
  513. times 8 dw 93
  514. times 8 dw -9
  515. times 8 dw 0
  516. times 8 dw 1
  517. times 8 dw -8
  518. times 8 dw 36
  519. times 8 dw 108
  520. times 8 dw -11
  521. times 8 dw 2
  522. times 8 dw 0
  523. times 8 dw -1
  524. times 8 dw 12
  525. times 8 dw 123
  526. times 8 dw -6
  527. times 8 dw 0
  528. align 16
  529. global HIDDEN_DATA(sym(vp8_bilinear_filters_mmx))
  530. sym(vp8_bilinear_filters_mmx):
  531. times 8 dw 128
  532. times 8 dw 0
  533. times 8 dw 112
  534. times 8 dw 16
  535. times 8 dw 96
  536. times 8 dw 32
  537. times 8 dw 80
  538. times 8 dw 48
  539. times 8 dw 64
  540. times 8 dw 64
  541. times 8 dw 48
  542. times 8 dw 80
  543. times 8 dw 32
  544. times 8 dw 96
  545. times 8 dw 16
  546. times 8 dw 112