/packages/hermes/src/i386/x86p_16.inc

https://github.com/slibre/freepascal · Pascal · 1143 lines · 794 code · 250 blank · 99 comment · 0 complexity · 6bff2aa0fda3b458f53d67b9c9ab3748 MD5 · raw file

  1. {
  2. x86 format converters for HERMES
  3. Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org)
  4. Routines adjusted for Hermes by Christian Nentwich (c.nentwich@cs.ucl.ac.uk)
  5. Used with permission.
  6. This library is free software; you can redistribute it and/or
  7. modify it under the terms of the GNU Lesser General Public
  8. License as published by the Free Software Foundation; either
  9. version 2.1 of the License, or (at your option) any later version
  10. with the following modification:
  11. As a special exception, the copyright holders of this library give you
  12. permission to link this library with independent modules to produce an
  13. executable, regardless of the license terms of these independent modules,and
  14. to copy and distribute the resulting executable under terms of your choice,
  15. provided that you also meet, for each linked independent module, the terms
  16. and conditions of the license of that module. An independent module is a
  17. module which is not derived from or based on this library. If you modify
  18. this library, you may extend this exception to your version of the library,
  19. but you are not obligated to do so. If you do not wish to do so, delete this
  20. exception statement from your version.
  21. This library is distributed in the hope that it will be useful,
  22. but WITHOUT ANY WARRANTY; without even the implied warranty of
  23. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  24. Lesser General Public License for more details.
  25. You should have received a copy of the GNU Lesser General Public
  26. License along with this library; if not, write to the Free Software
  27. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  28. }
  29. procedure ConvertX86p16_32RGB888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
  30. asm
  31. // check short
  32. cmpl $32,%ecx
  33. ja .L3
  34. // short loop
  35. xorl %ebx,%ebx
  36. .L1: movb (%esi),%bl // ebx = lower byte pixel 1
  37. movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax // eax = ARGB8888 of lower byte pixel 1
  38. movb 1(%esi),%bl // ebx = upper byte pixel 1
  39. movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%edx // edx = ARGB8888 of upper byte pixel 1
  40. addl %edx,%eax
  41. movl %eax,(%edi)
  42. addl $2,%esi
  43. addl $4,%edi
  44. decl %ecx
  45. jnz .L1
  46. .L2:
  47. jmp _X86RETURN
  48. .L3: // save ebp
  49. pushl %ebp
  50. // save count
  51. pushl %ecx
  52. // unroll twice
  53. movl %ecx,%ebp
  54. shrl $1,%ebp
  55. // point arrays to end
  56. leal (%esi,%ebp,4),%esi
  57. leal (%edi,%ebp,8),%edi
  58. // negative counter
  59. negl %ebp
  60. // clear
  61. xorl %ebx,%ebx
  62. xorl %ecx,%ecx
  63. // prestep
  64. movb (%esi,%ebp,4),%cl
  65. movb 1(%esi,%ebp,4),%bl
  66. .L4: movl ConvertX86p16_32RGB888_LUT_X86(,%ecx,8),%edx
  67. movb 2(%esi,%ebp,4),%cl
  68. movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%eax
  69. movb 3(%esi,%ebp,4),%bl
  70. addl %edx,%eax
  71. movl ConvertX86p16_32RGB888_LUT_X86(,%ecx,8),%edx
  72. movl %eax,(%edi,%ebp,8)
  73. movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%eax
  74. addl %edx,%eax
  75. movb 4(%esi,%ebp,4),%cl
  76. movl %eax,4(%edi,%ebp,8)
  77. movb 5(%esi,%ebp,4),%bl
  78. incl %ebp
  79. jnz .L4
  80. // tail
  81. popl %ecx
  82. andl $1,%ecx
  83. jz .L6
  84. xorl %ebx,%ebx
  85. movb (%esi),%bl // ebx = lower byte pixel 1
  86. movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax // eax = ARGB8888 of lower byte pixel 1
  87. movb 1(%esi),%bl // ebx = upper byte pixel 1
  88. movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%edx // edx = ARGB8888 of upper byte pixel 1
  89. addl %edx,%eax
  90. movl %eax,(%edi)
  91. addl $2,%esi
  92. addl $4,%edi
  93. .L6: popl %ebp
  94. jmp _X86RETURN
  95. end;
  96. procedure ConvertX86p16_32BGR888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
  97. asm
  98. // check short
  99. cmpl $32,%ecx
  100. ja .L3
  101. // short loop
  102. xorl %ebx,%ebx
  103. .L1: movb (%esi),%bl // ebx = lower byte pixel 1
  104. movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax // eax = ABGR8888 of lower byte pixel 1
  105. movb 1(%esi),%bl // ebx = upper byte pixel 1
  106. movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%edx // edx = ABGR8888 of upper byte pixel 1
  107. addl %edx,%eax
  108. movl %eax,(%edi)
  109. addl $2,%esi
  110. addl $4,%edi
  111. decl %ecx
  112. jnz .L1
  113. .L2:
  114. jmp _X86RETURN
  115. .L3: // save ebp
  116. pushl %ebp
  117. // save count
  118. pushl %ecx
  119. // unroll twice
  120. movl %ecx,%ebp
  121. shrl $1,%ebp
  122. // point arrays to end
  123. leal (%esi,%ebp,4),%esi
  124. leal (%edi,%ebp,8),%edi
  125. // negative counter
  126. negl %ebp
  127. // clear
  128. xorl %ebx,%ebx
  129. xorl %ecx,%ecx
  130. // prestep
  131. movb (%esi,%ebp,4),%cl
  132. movb 1(%esi,%ebp,4),%bl
  133. .L4: movl ConvertX86p16_32BGR888_LUT_X86(,%ecx,8),%edx
  134. movb 2(%esi,%ebp,4),%cl
  135. movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%eax
  136. movb 3(%esi,%ebp,4),%bl
  137. addl %edx,%eax
  138. movl ConvertX86p16_32BGR888_LUT_X86(,%ecx,8),%edx
  139. movl %eax,(%edi,%ebp,8)
  140. movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%eax
  141. addl %edx,%eax
  142. movb 4(%esi,%ebp,4),%cl
  143. movl %eax,4(%edi,%ebp,8)
  144. movb 5(%esi,%ebp,4),%bl
  145. incl %ebp
  146. jnz .L4
  147. // tail
  148. popl %ecx
  149. andl $1,%ecx
  150. jz .L6
  151. xorl %ebx,%ebx
  152. movb (%esi),%bl // ebx = lower byte pixel 1
  153. movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax // eax = ABGR8888 of lower byte pixel 1
  154. movb 1(%esi),%bl // ebx = upper byte pixel 1
  155. movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%edx // edx = ABGR8888 of upper byte pixel 1
  156. addl %edx,%eax
  157. movl %eax,(%edi)
  158. addl $2,%esi
  159. addl $4,%edi
  160. .L6: popl %ebp
  161. jmp _X86RETURN
  162. end;
  163. procedure ConvertX86p16_32RGBA888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
  164. asm
  165. // check short
  166. cmpl $32,%ecx
  167. ja .L3
  168. // short loop
  169. xorl %ebx,%ebx
  170. .L1: movb (%esi),%bl // ebx = lower byte pixel 1
  171. movl ConvertX86p16_32RGBA888_LUT_X86(,%ebx,8),%eax // eax = RGBA8888 of lower byte pixel 1
  172. movb 1(%esi),%bl // ebx = upper byte pixel 1
  173. movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%edx // edx = RGBA8888 of upper byte pixel 1
  174. addl %edx,%eax
  175. movl %eax,(%edi)
  176. addl $2,%esi
  177. addl $4,%edi
  178. decl %ecx
  179. jnz .L1
  180. .L2:
  181. jmp _X86RETURN
  182. .L3: // save ebp
  183. pushl %ebp
  184. // save count
  185. pushl %ecx
  186. // unroll twice
  187. movl %ecx,%ebp
  188. shrl $1,%ebp
  189. // point arrays to end
  190. leal (%esi,%ebp,4),%esi
  191. leal (%edi,%ebp,8),%edi
  192. // negative counter
  193. negl %ebp
  194. // clear
  195. xorl %ebx,%ebx
  196. xorl %ecx,%ecx
  197. // prestep
  198. movb (%esi,%ebp,4),%cl
  199. movb 1(%esi,%ebp,4),%bl
  200. .L4: movl ConvertX86p16_32RGBA888_LUT_X86(,%ecx,8),%edx
  201. movb 2(%esi,%ebp,4),%cl
  202. movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%eax
  203. movb 3(%esi,%ebp,4),%bl
  204. addl %edx,%eax
  205. movl ConvertX86p16_32RGBA888_LUT_X86(,%ecx,8),%edx
  206. movl %eax,(%edi,%ebp,8)
  207. movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%eax
  208. addl %edx,%eax
  209. movb 4(%esi,%ebp,4),%cl
  210. movl %eax,4(%edi,%ebp,8)
  211. movb 5(%esi,%ebp,4),%bl
  212. incl %ebp
  213. jnz .L4
  214. // tail
  215. popl %ecx
  216. andl $1,%ecx
  217. jz .L6
  218. xorl %ebx,%ebx
  219. movb (%esi),%bl // ebx = lower byte pixel 1
  220. movl ConvertX86p16_32RGBA888_LUT_X86(,%ebx,8),%eax // eax = RGBA8888 of lower byte pixel 1
  221. movb 1(%esi),%bl // ebx = upper byte pixel 1
  222. movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%edx // edx = RGBA8888 of upper byte pixel 1
  223. addl %edx,%eax
  224. movl %eax,(%edi)
  225. addl $2,%esi
  226. addl $4,%edi
  227. .L6: popl %ebp
  228. jmp _X86RETURN
  229. end;
  230. procedure ConvertX86p16_32BGRA888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
  231. asm
  232. // check short
  233. cmpl $32,%ecx
  234. ja .L3
  235. // short loop
  236. xorl %ebx,%ebx
  237. .L1: movb (%esi),%bl // ebx = lower byte pixel 1
  238. movl ConvertX86p16_32BGRA888_LUT_X86(,%ebx,8),%eax // eax = BGRA8888 of lower byte pixel 1
  239. movb 1(%esi),%bl // ebx = upper byte pixel 1
  240. movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%edx // edx = BGRA8888 of upper byte pixel 1
  241. addl %edx,%eax
  242. movl %eax,(%edi)
  243. addl $2,%esi
  244. addl $4,%edi
  245. decl %ecx
  246. jnz .L1
  247. .L2:
  248. jmp _X86RETURN
  249. .L3: // save ebp
  250. pushl %ebp
  251. // save count
  252. pushl %ecx
  253. // unroll twice
  254. movl %ecx,%ebp
  255. shrl $1,%ebp
  256. // point arrays to end
  257. leal (%esi,%ebp,4),%esi
  258. leal (%edi,%ebp,8),%edi
  259. // negative counter
  260. negl %ebp
  261. // clear
  262. xorl %ebx,%ebx
  263. xorl %ecx,%ecx
  264. // prestep
  265. movb (%esi,%ebp,4),%cl
  266. movb 1(%esi,%ebp,4),%bl
  267. .L4: movl ConvertX86p16_32BGRA888_LUT_X86(,%ecx,8),%edx
  268. movb 2(%esi,%ebp,4),%cl
  269. movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%eax
  270. movb 3(%esi,%ebp,4),%bl
  271. addl %edx,%eax
  272. movl ConvertX86p16_32BGRA888_LUT_X86(,%ecx,8),%edx
  273. movl %eax,(%edi,%ebp,8)
  274. movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%eax
  275. addl %edx,%eax
  276. movb 4(%esi,%ebp,4),%cl
  277. movl %eax,4(%edi,%ebp,8)
  278. movb 5(%esi,%ebp,4),%bl
  279. incl %ebp
  280. jnz .L4
  281. // tail
  282. popl %ecx
  283. andl $1,%ecx
  284. jz .L6
  285. xorl %ebx,%ebx
  286. movb (%esi),%bl // ebx = lower byte pixel 1
  287. movl ConvertX86p16_32BGRA888_LUT_X86(,%ebx,8),%eax // eax = BGRA8888 of lower byte pixel 1
  288. movb 1(%esi),%bl // ebx = upper byte pixel 1
  289. movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%edx // edx = BGRA8888 of upper byte pixel 1
  290. addl %edx,%eax
  291. movl %eax,(%edi)
  292. addl $2,%esi
  293. addl $4,%edi
  294. .L6: popl %ebp
  295. jmp _X86RETURN
  296. end;
  297. procedure ConvertX86p16_24RGB888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
  298. asm
  299. // check short
  300. cmpl $32,%ecx
  301. ja .L3
  302. // short loop
  303. xorl %edx,%edx
  304. .L1: movb (%esi),%dl
  305. movl ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax // eax = ARGB8888 of lower byte
  306. movb 1(%esi),%dl
  307. movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx // ebx = ARGB8888 of upper byte
  308. addl %ebx,%eax // eax = ARGB8888 pixel
  309. movb %al,(%edi)
  310. movb %ah,1(%edi)
  311. shrl $16,%eax
  312. movb %al,2(%edi)
  313. addl $2,%esi
  314. addl $3,%edi
  315. decl %ecx
  316. jnz .L1
  317. .L2: jmp _X86RETURN
  318. .L3: // clear edx
  319. xorl %edx,%edx
  320. .L4: // head
  321. movl %edi,%eax
  322. andl $0b11,%eax
  323. jz .L5
  324. movb (%esi),%dl
  325. movl ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax // eax = ARGB8888 of lower byte
  326. movb 1(%esi),%dl
  327. movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx // ebx = ARGB8888 of upper byte
  328. addl %ebx,%eax // eax = ARGB8888 pixel
  329. movb %al,(%edi)
  330. movb %ah,1(%edi)
  331. shrl $16,%eax
  332. movb %al,2(%edi)
  333. addl $2,%esi
  334. addl $3,%edi
  335. decl %ecx
  336. jmp .L4
  337. .L5: // unroll 4 times
  338. pushl %ebp
  339. movl %ecx,%ebp
  340. shrl $2,%ebp
  341. // clear ebx
  342. xorl %ebx,%ebx
  343. // save count
  344. pushl %ecx
  345. // prestep
  346. movb (%esi),%bl // ebx = lower byte pixel 1
  347. movb 1(%esi),%dl // edx = upper byte pixel 1
  348. .L6: movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax // eax = ARGB8888 of lower byte pixel 1
  349. movb 2(%esi),%bl // ebx = lower byte pixel 2
  350. movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ecx // ecx = ARGB8888 of upper byte pixel 1
  351. movb 3(%esi),%dl // edx = upper byte pixel 2
  352. pushl %ebp // save ebp
  353. addl %ecx,%eax // eax = ARGB8888 of pixel 1
  354. movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ebp // ebp = ARGB8888 of lower byte pixel 2
  355. movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ecx // ecx = ARGB8888 of upper byte pixel 2
  356. movb 4(%esi),%bl // ebx = lower byte pixel 3
  357. addl %ebp,%ecx // ecx = ARGB8888 of pixel 2
  358. shll $24,%ebp // ebp = [b][0][0][0] of pixel 2
  359. movb 5(%esi),%dl // edx = upper byte pixel 3
  360. shrl $8,%ecx // ecx = [0][0][r][g] pixel 2
  361. addl %ebp,%eax // eax = [b2][r1][g1][b1] (done)
  362. movl %eax,(%edi) // store dword 1
  363. movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%eax // eax = ARGB8888 of upper byte pixel 3
  364. movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ebp // ebp = ARGB8888 of lower byte pixel 3
  365. movb 6(%esi),%bl // ebx = lower byte pixel 4
  366. addl %eax,%ebp // ebp = ARGB8888 of pixel 3
  367. movb 7(%esi),%dl // edx = upper byte pixel 4
  368. shll $16,%ebp // ebp = [g][b][0][0] pixel 3
  369. shrl $16,%eax // al = red component of pixel 3
  370. addl %ecx,%ebp // ebp = [g3][b3][r2][g2] (done)
  371. movl %ebp,4(%edi) // store dword 2
  372. movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ecx // ebx = ARGB8888 of lower byte pixel 4
  373. movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebp // ebp = ARGB8888 of upper byte pixel 4
  374. movb 4*2+0(%esi),%bl // ebx = lower byte pixel 1
  375. addl %ebp,%ecx // ecx = ARGB8888 of pixel 4
  376. movb 4*2+1(%esi),%dl // edx = upper byte pixel 1
  377. shll $8,%ecx // ecx = [r][g][b][0]
  378. popl %ebp // restore ebp
  379. movb %al,%cl // ecx = [r4][g4][b4][r3] (done)
  380. addl $4*2,%esi
  381. movl %ecx,8(%edi) // store dword 3
  382. addl $3*4,%edi
  383. decl %ebp
  384. jz .L7
  385. jmp .L6
  386. .L7: // check tail
  387. popl %ecx
  388. andl $0b11,%ecx
  389. jz .L9
  390. .L8: // tail
  391. movb (%esi),%dl
  392. movl ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax // eax = ARGB8888 of lower byte
  393. movb 1(%esi),%dl
  394. movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx // ebx = ARGB8888 of upper byte
  395. addl %ebx,%eax // eax = ARGB8888 pixel
  396. movb %al,(%edi)
  397. movb %ah,1(%edi)
  398. shrl $16,%eax
  399. movb %al,2(%edi)
  400. addl $2,%esi
  401. addl $3,%edi
  402. decl %ecx
  403. jnz .L8
  404. .L9: popl %ebp
  405. jmp _X86RETURN
  406. end;
  407. procedure ConvertX86p16_24BGR888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
  408. asm
  409. // check short
  410. cmpl $32,%ecx
  411. ja .L3
  412. // short loop
  413. xorl %edx,%edx
  414. .L1: movb (%esi),%dl
  415. movl ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax // eax = ABGR8888 of lower byte
  416. movb 1(%esi),%dl
  417. movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx // ebx = ABGR8888 of upper byte
  418. addl %ebx,%eax // eax = ABGR8888 pixel
  419. movb %al,(%edi)
  420. movb %ah,1(%edi)
  421. shrl $16,%eax
  422. movb %al,2(%edi)
  423. addl $2,%esi
  424. addl $3,%edi
  425. decl %ecx
  426. jnz .L1
  427. .L2:
  428. jmp _X86RETURN
  429. .L3: // clear edx
  430. xorl %edx,%edx
  431. .L4: // head
  432. movl %edi,%eax
  433. andl $0b11,%eax
  434. jz .L5
  435. movb (%esi),%dl
  436. movl ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax // eax = ABGR8888 of lower byte
  437. movb 1(%esi),%dl
  438. movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx // ebx = ABGR8888 of upper byte
  439. addl %ebx,%eax // eax = ABGR8888 pixel
  440. movb %al,(%edi)
  441. movb %ah,1(%edi)
  442. shrl $16,%eax
  443. movb %al,2(%edi)
  444. addl $2,%esi
  445. addl $3,%edi
  446. decl %ecx
  447. jmp .L4
  448. .L5: // unroll 4 times
  449. pushl %ebp
  450. movl %ecx,%ebp
  451. shrl $2,%ebp
  452. // clear ebx
  453. xorl %ebx,%ebx
  454. // save count
  455. pushl %ecx
  456. // prestep
  457. movb (%esi),%bl // ebx = lower byte pixel 1
  458. movb 1(%esi),%dl // edx = upper byte pixel 1
  459. .L6: movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax // eax = ABGR8888 of lower byte pixel 1
  460. movb 2(%esi),%bl // ebx = lower byte pixel 2
  461. movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ecx // ecx = ABGR8888 of upper byte pixel 1
  462. movb 3(%esi),%dl // edx = upper byte pixel 2
  463. pushl %ebp // save ebp
  464. addl %ecx,%eax // eax = ABGR8888 of pixel 1
  465. movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%ecx // ecx = ABGR8888 of lower byte pixel 2
  466. movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp // ebp = ABGR8888 of upper byte pixel 2
  467. movb 4(%esi),%bl // ebx = lower byte pixel 3
  468. addl %ebp,%ecx // ecx = ABGR8888 of pixel 2
  469. shll $24,%ebp // ebp = [r][0][0][0] of pixel 2
  470. movb 5(%esi),%dl // edx = upper byte pixel 3
  471. shrl $8,%ecx // ecx = [0][0][b][g] pixel 2
  472. addl %ebp,%eax // eax = [r2][b1][g1][r1] (done)
  473. movl %eax,(%edi) // store dword 1
  474. movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp // ebp = ABGR8888 of upper byte pixel 3
  475. movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax // eax = ABGR8888 of lower byte pixel 3
  476. movb 6(%esi),%bl // ebx = lower byte pixel 4
  477. addl %eax,%ebp // ebp = ABGR8888 of pixel 3
  478. movb 7(%esi),%dl // edx = upper byte pixel 4
  479. shll $16,%ebp // ebp = [g][r][0][0] pixel 3
  480. shrl $16,%eax // al = blue component of pixel 3
  481. addl %ecx,%ebp // ebp = [g3][r3][b2][g2] (done)
  482. movl %ebp,4(%edi) // store dword 2
  483. movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%ecx // ebx = ABGR8888 of lower byte pixel 4
  484. movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp // ebp = ABGR8888 of upper byte pixel 4
  485. movb 4*2+0(%esi),%bl // ebx = lower byte pixel 1
  486. addl %ebp,%ecx // ecx = ABGR8888 of pixel 4
  487. movb 4*2+1(%esi),%dl // edx = upper byte pixel 1
  488. shll $8,%ecx // ecx = [b][g][r][0]
  489. popl %ebp // restore ebp
  490. movb %al,%cl // ecx = [b4][g4][r4][b3] (done)
  491. addl $4*2,%esi
  492. movl %ecx,8(%edi) // store dword 3
  493. addl $3*4,%edi
  494. decl %ebp
  495. jz .L7
  496. jmp .L6
  497. .L7: // check tail
  498. popl %ecx
  499. andl $0b11,%ecx
  500. jz .L9
  501. .L8: // tail
  502. movb (%esi),%dl
  503. movl ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax // eax = ABGR8888 of lower byte
  504. movb 1(%esi),%dl
  505. movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx // ebx = ABGR8888 of upper byte
  506. addl %ebx,%eax // eax = ABGR8888 pixel
  507. movb %al,(%edi)
  508. movb %ah,1(%edi)
  509. shrl $16,%eax
  510. movb %al,2(%edi)
  511. addl $2,%esi
  512. addl $3,%edi
  513. decl %ecx
  514. jnz .L8
  515. .L9: popl %ebp
  516. jmp _X86RETURN
  517. end;
  518. procedure ConvertX86p16_16BGR565(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
  519. asm
  520. // check short
  521. cmpl $16,%ecx
  522. ja .L3
  523. .L1: // short loop
  524. movb (%esi),%al
  525. movb 1(%esi),%ah
  526. movl %eax,%ebx
  527. movl %eax,%edx
  528. shrl $11,%eax
  529. andl $0b11111,%eax
  530. andl $0b11111100000,%ebx
  531. shll $11,%edx
  532. addl %ebx,%eax
  533. addl %edx,%eax
  534. movb %al,(%edi)
  535. movb %ah,1(%edi)
  536. addl $2,%esi
  537. addl $2,%edi
  538. decl %ecx
  539. jnz .L1
  540. .L2:
  541. jmp _X86RETURN
  542. .L3: // head
  543. movl %edi,%eax
  544. andl $0b11,%eax
  545. jz .L4
  546. movb (%esi),%al
  547. movb 1(%esi),%ah
  548. movl %eax,%ebx
  549. movl %eax,%edx
  550. shrl $11,%eax
  551. andl $0b11111,%eax
  552. andl $0b11111100000,%ebx
  553. shll $11,%edx
  554. addl %ebx,%eax
  555. addl %edx,%eax
  556. movb %al,(%edi)
  557. movb %ah,1(%edi)
  558. addl $2,%esi
  559. addl $2,%edi
  560. decl %ecx
  561. .L4: // save count
  562. pushl %ecx
  563. // unroll twice
  564. shrl $1,%ecx
  565. // point arrays to end
  566. leal (%esi,%ecx,4),%esi
  567. leal (%edi,%ecx,4),%edi
  568. // negative counter
  569. negl %ecx
  570. jmp .L6
  571. .L5: movl %eax,-4(%edi,%ecx,4)
  572. .L6: movl (%esi,%ecx,4),%eax
  573. movl (%esi,%ecx,4),%ebx
  574. andl $0x07E007E0,%eax
  575. movl (%esi,%ecx,4),%edx
  576. andl $0x0F800F800,%ebx
  577. shrl $11,%ebx
  578. andl $0x001F001F,%edx
  579. shll $11,%edx
  580. addl %ebx,%eax
  581. addl %edx,%eax
  582. incl %ecx
  583. jnz .L5
  584. movl %eax,-4(%edi,%ecx,4)
  585. // tail
  586. popl %ecx
  587. andl $1,%ecx
  588. jz .L7
  589. movb (%esi),%al
  590. movb 1(%esi),%ah
  591. movl %eax,%ebx
  592. movl %eax,%edx
  593. shrl $11,%eax
  594. andl $0b11111,%eax
  595. andl $0b11111100000,%ebx
  596. shll $11,%edx
  597. addl %ebx,%eax
  598. addl %edx,%eax
  599. movb %al,(%edi)
  600. movb %ah,1(%edi)
  601. addl $2,%esi
  602. addl $2,%edi
  603. .L7:
  604. jmp _X86RETURN
  605. end;
  606. procedure ConvertX86p16_16RGB555(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
  607. asm
  608. // check short
  609. cmpl $32,%ecx
  610. ja .L3
  611. .L1: // short loop
  612. movb (%esi),%al
  613. movb 1(%esi),%ah
  614. movl %eax,%ebx
  615. shrl $1,%ebx
  616. andl $0b0111111111100000,%ebx
  617. andl $0b0000000000011111,%eax
  618. addl %ebx,%eax
  619. movb %al,(%edi)
  620. movb %ah,1(%edi)
  621. addl $2,%esi
  622. addl $2,%edi
  623. decl %ecx
  624. jnz .L1
  625. .L2:
  626. jmp _X86RETURN
  627. .L3: // head
  628. movl %edi,%eax
  629. andl $0b11,%eax
  630. jz .L4
  631. movb (%esi),%al
  632. movb 1(%esi),%ah
  633. movl %eax,%ebx
  634. shrl $1,%ebx
  635. andl $0b0111111111100000,%ebx
  636. andl $0b0000000000011111,%eax
  637. addl %ebx,%eax
  638. movb %al,(%edi)
  639. movb %ah,1(%edi)
  640. addl $2,%esi
  641. addl $2,%edi
  642. decl %ecx
  643. .L4: // save ebp
  644. pushl %ebp
  645. // save count
  646. pushl %ecx
  647. // unroll four times
  648. shrl $2,%ecx
  649. // point arrays to end
  650. leal (%esi,%ecx,8),%esi
  651. leal (%edi,%ecx,8),%edi
  652. // negative counter
  653. xorl %ebp,%ebp
  654. subl %ecx,%ebp
  655. .L5: movl (%esi,%ebp,8),%eax // agi?
  656. movl 4(%esi,%ebp,8),%ecx
  657. movl %eax,%ebx
  658. movl %ecx,%edx
  659. andl $0x0FFC0FFC0,%eax
  660. andl $0x0FFC0FFC0,%ecx
  661. shrl $1,%eax
  662. andl $0x001F001F,%ebx
  663. shrl $1,%ecx
  664. andl $0x001F001F,%edx
  665. addl %ebx,%eax
  666. addl %edx,%ecx
  667. movl %eax,(%edi,%ebp,8)
  668. movl %ecx,4(%edi,%ebp,8)
  669. incl %ebp
  670. jnz .L5
  671. // tail
  672. popl %ecx
  673. .L6: andl $0b11,%ecx
  674. jz .L7
  675. movb (%esi),%al
  676. movb 1(%esi),%ah
  677. movl %eax,%ebx
  678. shrl $1,%ebx
  679. andl $0b0111111111100000,%ebx
  680. andl $0b0000000000011111,%eax
  681. addl %ebx,%eax
  682. movb %al,(%edi)
  683. movb %ah,1(%edi)
  684. addl $2,%esi
  685. addl $2,%edi
  686. decl %ecx
  687. jmp .L6
  688. .L7: popl %ebp
  689. jmp _X86RETURN
  690. end;
  691. procedure ConvertX86p16_16BGR555(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
  692. asm
  693. // check short
  694. cmpl $16,%ecx
  695. ja .L3
  696. .L1: // short loop
  697. movb (%esi),%al
  698. movb 1(%esi),%ah
  699. movl %eax,%ebx
  700. movl %eax,%edx
  701. shrl $11,%eax
  702. andl $0b11111,%eax
  703. shrl $1,%ebx
  704. andl $0b1111100000,%ebx
  705. shll $10,%edx
  706. andl $0b0111110000000000,%edx
  707. addl %ebx,%eax
  708. addl %edx,%eax
  709. movb %al,(%edi)
  710. movb %ah,1(%edi)
  711. addl $2,%esi
  712. addl $2,%edi
  713. decl %ecx
  714. jnz .L1
  715. .L2:
  716. jmp _X86RETURN
  717. .L3: // head
  718. movl %edi,%eax
  719. andl $0b11,%eax
  720. jz .L4
  721. movb (%esi),%al
  722. movb 1(%esi),%ah
  723. movl %eax,%ebx
  724. movl %eax,%edx
  725. shrl $11,%eax
  726. andl $0b11111,%eax
  727. shrl $1,%ebx
  728. andl $0b1111100000,%ebx
  729. shll $10,%edx
  730. andl $0b0111110000000000,%edx
  731. addl %ebx,%eax
  732. addl %edx,%eax
  733. movb %al,(%edi)
  734. movb %ah,1(%edi)
  735. addl $2,%esi
  736. addl $2,%edi
  737. decl %ecx
  738. .L4: // save count
  739. pushl %ecx
  740. // unroll twice
  741. shrl $1,%ecx
  742. // point arrays to end
  743. leal (%esi,%ecx,4),%esi
  744. leal (%edi,%ecx,4),%edi
  745. // negative counter
  746. negl %ecx
  747. jmp .L6
  748. .L5: movl %eax,-4(%edi,%ecx,4)
  749. .L6: movl (%esi,%ecx,4),%eax
  750. shrl $1,%eax
  751. movl (%esi,%ecx,4),%ebx
  752. andl $0x03E003E0,%eax
  753. movl (%esi,%ecx,4),%edx
  754. andl $0x0F800F800,%ebx
  755. shrl $11,%ebx
  756. andl $0x001F001F,%edx
  757. shll $10,%edx
  758. addl %ebx,%eax
  759. addl %edx,%eax
  760. incl %ecx
  761. jnz .L5
  762. movl %eax,-4(%edi,%ecx,4)
  763. // tail
  764. popl %ecx
  765. andl $1,%ecx
  766. jz .L7
  767. movb (%esi),%al
  768. movb 1(%esi),%ah
  769. movl %eax,%ebx
  770. movl %eax,%edx
  771. shrl $11,%eax
  772. andl $0b11111,%eax
  773. shrl $1,%ebx
  774. andl $0b1111100000,%ebx
  775. shll $10,%edx
  776. andl $0b0111110000000000,%edx
  777. addl %ebx,%eax
  778. addl %edx,%eax
  779. movb %al,(%edi)
  780. movb %ah,1(%edi)
  781. addl $2,%esi
  782. addl $2,%edi
  783. .L7:
  784. jmp _X86RETURN
  785. end;
  786. procedure ConvertX86p16_8RGB332(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
  787. asm
  788. // check short
  789. cmpl $16,%ecx
  790. ja .L3
  791. .L1: // short loop
  792. movb (%esi),%al
  793. movb 1(%esi),%ah
  794. movl %eax,%ebx
  795. movl %eax,%edx
  796. andl $0b11000,%eax // blue
  797. shrl $3,%eax
  798. andl $0b11100000000,%ebx // green
  799. shrl $6,%ebx
  800. andl $0b1110000000000000,%edx // red
  801. shrl $8,%edx
  802. addl %ebx,%eax
  803. addl %edx,%eax
  804. movb %al,(%edi)
  805. addl $2,%esi
  806. incl %edi
  807. decl %ecx
  808. jnz .L1
  809. .L2:
  810. jmp _X86RETURN
  811. .L3: movl %edi,%eax
  812. andl $0b11,%eax
  813. jz .L4
  814. movb (%esi),%al
  815. movb 1(%esi),%ah
  816. movl %eax,%ebx
  817. movl %eax,%edx
  818. andl $0b11000,%eax // blue
  819. shrl $3,%eax
  820. andl $0b11100000000,%ebx // green
  821. shrl $6,%ebx
  822. andl $0b1110000000000000,%edx // red
  823. shrl $8,%edx
  824. addl %ebx,%eax
  825. addl %edx,%eax
  826. movb %al,(%edi)
  827. addl $2,%esi
  828. incl %edi
  829. decl %ecx
  830. jmp .L3
  831. .L4: // save ebp
  832. pushl %ebp
  833. // save count
  834. pushl %ecx
  835. // unroll 4 times
  836. shrl $2,%ecx
  837. // prestep
  838. movb (%esi),%dl
  839. movb 1(%esi),%bl
  840. movb 2(%esi),%dh
  841. .L5: shll $16,%edx
  842. movb 3(%esi),%bh
  843. shll $16,%ebx
  844. movb 4(%esi),%dl
  845. movb 6(%esi),%dh
  846. movb 5(%esi),%bl
  847. andl $0b00011000000110000001100000011000,%edx
  848. movb 7(%esi),%bh
  849. rorl $16+3,%edx
  850. movl %ebx,%eax // setup eax for reds
  851. andl $0b00000111000001110000011100000111,%ebx
  852. andl $0b11100000111000001110000011100000,%eax // reds
  853. rorl $16-2,%ebx
  854. addl $8,%esi
  855. rorl $16,%eax
  856. addl $4,%edi
  857. addl %ebx,%eax
  858. movb 1(%esi),%bl // greens
  859. addl %edx,%eax
  860. movb (%esi),%dl // blues
  861. movl %eax,-4(%edi)
  862. movb 2(%esi),%dh
  863. decl %ecx
  864. jnz .L5
  865. // check tail
  866. popl %ecx
  867. andl $0b11,%ecx
  868. jz .L7
  869. .L6: // tail
  870. movb (%esi),%al
  871. movb 1(%esi),%ah
  872. movl %eax,%ebx
  873. movl %eax,%edx
  874. andl $0b11000,%eax // blue
  875. shrl $3,%eax
  876. andl $0b11100000000,%ebx // green
  877. shrl $6,%ebx
  878. andl $0b1110000000000000,%edx // red
  879. shrl $8,%edx
  880. addl %ebx,%eax
  881. addl %edx,%eax
  882. movb %al,(%edi)
  883. addl $2,%esi
  884. incl %edi
  885. decl %ecx
  886. jnz .L6
  887. .L7: popl %ebp
  888. jmp _X86RETURN
  889. end;