/packages/hermes/src/i386/x86p_16.inc
Pascal | 1143 lines | 794 code | 250 blank | 99 comment | 0 complexity | 6bff2aa0fda3b458f53d67b9c9ab3748 MD5 | raw file
Possible License(s): LGPL-2.0, LGPL-2.1, LGPL-3.0
1{ 2 x86 format converters for HERMES 3 Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org) 4 Routines adjusted for Hermes by Christian Nentwich (c.nentwich@cs.ucl.ac.uk) 5 Used with permission. 6 7 This library is free software; you can redistribute it and/or 8 modify it under the terms of the GNU Lesser General Public 9 License as published by the Free Software Foundation; either 10 version 2.1 of the License, or (at your option) any later version 11 with the following modification: 12 13 As a special exception, the copyright holders of this library give you 14 permission to link this library with independent modules to produce an 15 executable, regardless of the license terms of these independent modules,and 16 to copy and distribute the resulting executable under terms of your choice, 17 provided that you also meet, for each linked independent module, the terms 18 and conditions of the license of that module. An independent module is a 19 module which is not derived from or based on this library. If you modify 20 this library, you may extend this exception to your version of the library, 21 but you are not obligated to do so. If you do not wish to do so, delete this 22 exception statement from your version. 23 24 This library is distributed in the hope that it will be useful, 25 but WITHOUT ANY WARRANTY; without even the implied warranty of 26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 27 Lesser General Public License for more details. 28 29 You should have received a copy of the GNU Lesser General Public 30 License along with this library; if not, write to the Free Software 31 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 32} 33 34 35procedure ConvertX86p16_32RGB888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler; 36asm 37 // check short 38 cmpl $32,%ecx 39 ja .L3 40 41 42 // short loop 43 xorl %ebx,%ebx 44.L1: movb (%esi),%bl // ebx = lower byte pixel 1 45 movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax // eax = ARGB8888 of lower byte pixel 1 46 movb 1(%esi),%bl // ebx = upper byte pixel 1 47 movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%edx // edx = ARGB8888 of upper byte pixel 1 48 addl %edx,%eax 49 movl %eax,(%edi) 50 addl $2,%esi 51 addl $4,%edi 52 decl %ecx 53 jnz .L1 54.L2: 55 jmp _X86RETURN 56 57 58.L3: // save ebp 59 pushl %ebp 60 61 // save count 62 pushl %ecx 63 64 // unroll twice 65 movl %ecx,%ebp 66 shrl $1,%ebp 67 68 // point arrays to end 69 leal (%esi,%ebp,4),%esi 70 leal (%edi,%ebp,8),%edi 71 72 // negative counter 73 negl %ebp 74 75 // clear 76 xorl %ebx,%ebx 77 xorl %ecx,%ecx 78 79 // prestep 80 movb (%esi,%ebp,4),%cl 81 movb 1(%esi,%ebp,4),%bl 82 83.L4: movl ConvertX86p16_32RGB888_LUT_X86(,%ecx,8),%edx 84 movb 2(%esi,%ebp,4),%cl 85 86 movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%eax 87 movb 3(%esi,%ebp,4),%bl 88 89 addl %edx,%eax 90 movl ConvertX86p16_32RGB888_LUT_X86(,%ecx,8),%edx 91 92 movl %eax,(%edi,%ebp,8) 93 movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%eax 94 95 addl %edx,%eax 96 movb 4(%esi,%ebp,4),%cl 97 98 movl %eax,4(%edi,%ebp,8) 99 movb 5(%esi,%ebp,4),%bl 100 101 incl %ebp 102 jnz .L4 103 104 // tail 105 popl %ecx 106 andl $1,%ecx 107 jz .L6 108 xorl %ebx,%ebx 109 movb (%esi),%bl // ebx = lower byte pixel 1 110 movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax // eax = ARGB8888 of lower byte pixel 1 111 movb 1(%esi),%bl // ebx = upper byte pixel 1 112 movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%edx // edx = ARGB8888 of upper byte pixel 1 113 addl %edx,%eax 114 movl %eax,(%edi) 115 addl $2,%esi 116 addl $4,%edi 117 118.L6: popl %ebp 119 jmp _X86RETURN 120end; 121 122 123 124 125 126procedure ConvertX86p16_32BGR888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler; 127asm 128 // check short 129 cmpl $32,%ecx 130 ja .L3 131 132 133 // short loop 134 xorl %ebx,%ebx 135.L1: movb (%esi),%bl // ebx = lower byte pixel 1 136 movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax // eax = ABGR8888 of lower byte pixel 1 137 movb 1(%esi),%bl // ebx = upper byte pixel 1 138 movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%edx // edx = ABGR8888 of upper byte pixel 1 139 addl %edx,%eax 140 movl %eax,(%edi) 141 addl $2,%esi 142 addl $4,%edi 143 decl %ecx 144 jnz .L1 145.L2: 146 jmp _X86RETURN 147 148.L3: // save ebp 149 pushl %ebp 150 151 // save count 152 pushl %ecx 153 154 // unroll twice 155 movl %ecx,%ebp 156 shrl $1,%ebp 157 158 // point arrays to end 159 leal (%esi,%ebp,4),%esi 160 leal (%edi,%ebp,8),%edi 161 162 // negative counter 163 negl %ebp 164 165 // clear 166 xorl %ebx,%ebx 167 xorl %ecx,%ecx 168 169 // prestep 170 movb (%esi,%ebp,4),%cl 171 movb 1(%esi,%ebp,4),%bl 172 173.L4: movl ConvertX86p16_32BGR888_LUT_X86(,%ecx,8),%edx 174 movb 2(%esi,%ebp,4),%cl 175 176 movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%eax 177 movb 3(%esi,%ebp,4),%bl 178 179 addl %edx,%eax 180 movl ConvertX86p16_32BGR888_LUT_X86(,%ecx,8),%edx 181 182 movl %eax,(%edi,%ebp,8) 183 movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%eax 184 185 addl %edx,%eax 186 movb 4(%esi,%ebp,4),%cl 187 188 movl %eax,4(%edi,%ebp,8) 189 movb 5(%esi,%ebp,4),%bl 190 191 incl %ebp 192 jnz .L4 193 194 // tail 195 popl %ecx 196 andl $1,%ecx 197 jz .L6 198 xorl %ebx,%ebx 199 movb (%esi),%bl // ebx = lower byte pixel 1 200 movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax // eax = ABGR8888 of lower byte pixel 1 201 movb 1(%esi),%bl // ebx = upper byte pixel 1 202 movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%edx // edx = ABGR8888 of upper byte pixel 1 203 addl %edx,%eax 204 movl %eax,(%edi) 205 addl $2,%esi 206 addl $4,%edi 207 208.L6: popl %ebp 209 jmp _X86RETURN 210end; 211 212 213 214procedure ConvertX86p16_32RGBA888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler; 215asm 216 // check short 217 cmpl $32,%ecx 218 ja .L3 219 220 221 // short loop 222 xorl %ebx,%ebx 223.L1: movb (%esi),%bl // ebx = lower byte pixel 1 224 movl ConvertX86p16_32RGBA888_LUT_X86(,%ebx,8),%eax // eax = RGBA8888 of lower byte pixel 1 225 movb 1(%esi),%bl // ebx = upper byte pixel 1 226 movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%edx // edx = RGBA8888 of upper byte pixel 1 227 addl %edx,%eax 228 movl %eax,(%edi) 229 addl $2,%esi 230 addl $4,%edi 231 decl %ecx 232 jnz .L1 233.L2: 234 jmp _X86RETURN 235 236.L3: // save ebp 237 pushl %ebp 238 239 // save count 240 pushl %ecx 241 242 // unroll twice 243 movl %ecx,%ebp 244 shrl $1,%ebp 245 246 // point arrays to end 247 leal (%esi,%ebp,4),%esi 248 leal (%edi,%ebp,8),%edi 249 250 // negative counter 251 negl %ebp 252 253 // clear 254 xorl %ebx,%ebx 255 xorl %ecx,%ecx 256 257 // prestep 258 movb (%esi,%ebp,4),%cl 259 movb 1(%esi,%ebp,4),%bl 260 261.L4: movl ConvertX86p16_32RGBA888_LUT_X86(,%ecx,8),%edx 262 movb 2(%esi,%ebp,4),%cl 263 264 movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%eax 265 movb 3(%esi,%ebp,4),%bl 266 267 addl %edx,%eax 268 movl ConvertX86p16_32RGBA888_LUT_X86(,%ecx,8),%edx 269 270 movl %eax,(%edi,%ebp,8) 271 movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%eax 272 273 addl %edx,%eax 274 movb 4(%esi,%ebp,4),%cl 275 276 movl %eax,4(%edi,%ebp,8) 277 movb 5(%esi,%ebp,4),%bl 278 279 incl %ebp 280 jnz .L4 281 282 // tail 283 popl %ecx 284 andl $1,%ecx 285 jz .L6 286 xorl %ebx,%ebx 287 movb (%esi),%bl // ebx = lower byte pixel 1 288 movl ConvertX86p16_32RGBA888_LUT_X86(,%ebx,8),%eax // eax = RGBA8888 of lower byte pixel 1 289 movb 1(%esi),%bl // ebx = upper byte pixel 1 290 movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%edx // edx = RGBA8888 of upper byte pixel 1 291 addl %edx,%eax 292 movl %eax,(%edi) 293 addl $2,%esi 294 addl $4,%edi 295 296.L6: popl %ebp 297 jmp _X86RETURN 298end; 299 300 301 302 303procedure ConvertX86p16_32BGRA888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler; 304asm 305 // check short 306 cmpl $32,%ecx 307 ja .L3 308 309 // short loop 310 xorl %ebx,%ebx 311.L1: movb (%esi),%bl // ebx = lower byte pixel 1 312 movl ConvertX86p16_32BGRA888_LUT_X86(,%ebx,8),%eax // eax = BGRA8888 of lower byte pixel 1 313 movb 1(%esi),%bl // ebx = upper byte pixel 1 314 movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%edx // edx = BGRA8888 of upper byte pixel 1 315 addl %edx,%eax 316 movl %eax,(%edi) 317 addl $2,%esi 318 addl $4,%edi 319 decl %ecx 320 jnz .L1 321.L2: 322 jmp _X86RETURN 323 324.L3: // save ebp 325 pushl %ebp 326 327 // save count 328 pushl %ecx 329 330 // unroll twice 331 movl %ecx,%ebp 332 shrl $1,%ebp 333 334 // point arrays to end 335 leal (%esi,%ebp,4),%esi 336 leal (%edi,%ebp,8),%edi 337 338 // negative counter 339 negl %ebp 340 341 // clear 342 xorl %ebx,%ebx 343 xorl %ecx,%ecx 344 345 // prestep 346 movb (%esi,%ebp,4),%cl 347 movb 1(%esi,%ebp,4),%bl 348 349.L4: movl ConvertX86p16_32BGRA888_LUT_X86(,%ecx,8),%edx 350 movb 2(%esi,%ebp,4),%cl 351 352 movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%eax 353 movb 3(%esi,%ebp,4),%bl 354 355 addl %edx,%eax 356 movl ConvertX86p16_32BGRA888_LUT_X86(,%ecx,8),%edx 357 358 movl %eax,(%edi,%ebp,8) 359 movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%eax 360 361 addl %edx,%eax 362 movb 4(%esi,%ebp,4),%cl 363 364 movl %eax,4(%edi,%ebp,8) 365 movb 5(%esi,%ebp,4),%bl 366 367 incl %ebp 368 jnz .L4 369 370 // tail 371 popl %ecx 372 andl $1,%ecx 373 jz .L6 374 xorl %ebx,%ebx 375 movb (%esi),%bl // ebx = lower byte pixel 1 376 movl ConvertX86p16_32BGRA888_LUT_X86(,%ebx,8),%eax // eax = BGRA8888 of lower byte pixel 1 377 movb 1(%esi),%bl // ebx = upper byte pixel 1 378 movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%edx // edx = BGRA8888 of upper byte pixel 1 379 addl %edx,%eax 380 movl %eax,(%edi) 381 addl $2,%esi 382 addl $4,%edi 383 384.L6: popl %ebp 385 jmp _X86RETURN 386end; 387 388 389 390 391procedure ConvertX86p16_24RGB888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler; 392asm 393 // check short 394 cmpl $32,%ecx 395 ja .L3 396 397 398 // short loop 399 xorl %edx,%edx 400.L1: movb (%esi),%dl 401 movl ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax // eax = ARGB8888 of lower byte 402 movb 1(%esi),%dl 403 movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx // ebx = ARGB8888 of upper byte 404 addl %ebx,%eax // eax = ARGB8888 pixel 405 movb %al,(%edi) 406 movb %ah,1(%edi) 407 shrl $16,%eax 408 movb %al,2(%edi) 409 addl $2,%esi 410 addl $3,%edi 411 decl %ecx 412 jnz .L1 413.L2: jmp _X86RETURN 414 415 416.L3: // clear edx 417 xorl %edx,%edx 418 419.L4: // head 420 movl %edi,%eax 421 andl $0b11,%eax 422 jz .L5 423 movb (%esi),%dl 424 movl ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax // eax = ARGB8888 of lower byte 425 movb 1(%esi),%dl 426 movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx // ebx = ARGB8888 of upper byte 427 addl %ebx,%eax // eax = ARGB8888 pixel 428 movb %al,(%edi) 429 movb %ah,1(%edi) 430 shrl $16,%eax 431 movb %al,2(%edi) 432 addl $2,%esi 433 addl $3,%edi 434 decl %ecx 435 jmp .L4 436 437.L5: // unroll 4 times 438 pushl %ebp 439 movl %ecx,%ebp 440 shrl $2,%ebp 441 442 // clear ebx 443 xorl %ebx,%ebx 444 445 // save count 446 pushl %ecx 447 448 // prestep 449 movb (%esi),%bl // ebx = lower byte pixel 1 450 movb 1(%esi),%dl // edx = upper byte pixel 1 451 452.L6: movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax // eax = ARGB8888 of lower byte pixel 1 453 movb 2(%esi),%bl // ebx = lower byte pixel 2 454 455 movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ecx // ecx = ARGB8888 of upper byte pixel 1 456 movb 3(%esi),%dl // edx = upper byte pixel 2 457 458 pushl %ebp // save ebp 459 addl %ecx,%eax // eax = ARGB8888 of pixel 1 460 461 movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ebp // ebp = ARGB8888 of lower byte pixel 2 462 movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ecx // ecx = ARGB8888 of upper byte pixel 2 463 464 movb 4(%esi),%bl // ebx = lower byte pixel 3 465 addl %ebp,%ecx // ecx = ARGB8888 of pixel 2 466 467 shll $24,%ebp // ebp = [b][0][0][0] of pixel 2 468 movb 5(%esi),%dl // edx = upper byte pixel 3 469 470 shrl $8,%ecx // ecx = [0][0][r][g] pixel 2 471 addl %ebp,%eax // eax = [b2][r1][g1][b1] (done) 472 473 movl %eax,(%edi) // store dword 1 474 movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%eax // eax = ARGB8888 of upper byte pixel 3 475 476 movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ebp // ebp = ARGB8888 of lower byte pixel 3 477 movb 6(%esi),%bl // ebx = lower byte pixel 4 478 479 addl %eax,%ebp // ebp = ARGB8888 of pixel 3 480 movb 7(%esi),%dl // edx = upper byte pixel 4 481 482 shll $16,%ebp // ebp = [g][b][0][0] pixel 3 483 484 shrl $16,%eax // al = red component of pixel 3 485 addl %ecx,%ebp // ebp = [g3][b3][r2][g2] (done) 486 487 movl %ebp,4(%edi) // store dword 2 488 movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ecx // ebx = ARGB8888 of lower byte pixel 4 489 490 movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebp // ebp = ARGB8888 of upper byte pixel 4 491 movb 4*2+0(%esi),%bl // ebx = lower byte pixel 1 492 493 addl %ebp,%ecx // ecx = ARGB8888 of pixel 4 494 movb 4*2+1(%esi),%dl // edx = upper byte pixel 1 495 496 shll $8,%ecx // ecx = [r][g][b][0] 497 popl %ebp // restore ebp 498 499 movb %al,%cl // ecx = [r4][g4][b4][r3] (done) 500 addl $4*2,%esi 501 502 movl %ecx,8(%edi) // store dword 3 503 addl $3*4,%edi 504 505 decl %ebp 506 jz .L7 507 508 jmp .L6 509 510.L7: // check tail 511 popl %ecx 512 andl $0b11,%ecx 513 jz .L9 514 515.L8: // tail 516 movb (%esi),%dl 517 movl ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax // eax = ARGB8888 of lower byte 518 movb 1(%esi),%dl 519 movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx // ebx = ARGB8888 of upper byte 520 addl %ebx,%eax // eax = ARGB8888 pixel 521 movb %al,(%edi) 522 movb %ah,1(%edi) 523 shrl $16,%eax 524 movb %al,2(%edi) 525 addl $2,%esi 526 addl $3,%edi 527 decl %ecx 528 jnz .L8 529 530.L9: popl %ebp 531 jmp _X86RETURN 532end; 533 534 535 536 537procedure ConvertX86p16_24BGR888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler; 538asm 539 // check short 540 cmpl $32,%ecx 541 ja .L3 542 543 544 // short loop 545 xorl %edx,%edx 546.L1: movb (%esi),%dl 547 movl ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax // eax = ABGR8888 of lower byte 548 movb 1(%esi),%dl 549 movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx // ebx = ABGR8888 of upper byte 550 addl %ebx,%eax // eax = ABGR8888 pixel 551 movb %al,(%edi) 552 movb %ah,1(%edi) 553 shrl $16,%eax 554 movb %al,2(%edi) 555 addl $2,%esi 556 addl $3,%edi 557 decl %ecx 558 jnz .L1 559.L2: 560 jmp _X86RETURN 561 562 563.L3: // clear edx 564 xorl %edx,%edx 565 566.L4: // head 567 movl %edi,%eax 568 andl $0b11,%eax 569 jz .L5 570 movb (%esi),%dl 571 movl ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax // eax = ABGR8888 of lower byte 572 movb 1(%esi),%dl 573 movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx // ebx = ABGR8888 of upper byte 574 addl %ebx,%eax // eax = ABGR8888 pixel 575 movb %al,(%edi) 576 movb %ah,1(%edi) 577 shrl $16,%eax 578 movb %al,2(%edi) 579 addl $2,%esi 580 addl $3,%edi 581 decl %ecx 582 jmp .L4 583 584.L5: // unroll 4 times 585 pushl %ebp 586 movl %ecx,%ebp 587 shrl $2,%ebp 588 589 // clear ebx 590 xorl %ebx,%ebx 591 592 // save count 593 pushl %ecx 594 595 // prestep 596 movb (%esi),%bl // ebx = lower byte pixel 1 597 movb 1(%esi),%dl // edx = upper byte pixel 1 598 599.L6: movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax // eax = ABGR8888 of lower byte pixel 1 600 movb 2(%esi),%bl // ebx = lower byte pixel 2 601 602 movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ecx // ecx = ABGR8888 of upper byte pixel 1 603 movb 3(%esi),%dl // edx = upper byte pixel 2 604 605 pushl %ebp // save ebp 606 addl %ecx,%eax // eax = ABGR8888 of pixel 1 607 608 movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%ecx // ecx = ABGR8888 of lower byte pixel 2 609 movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp // ebp = ABGR8888 of upper byte pixel 2 610 611 movb 4(%esi),%bl // ebx = lower byte pixel 3 612 addl %ebp,%ecx // ecx = ABGR8888 of pixel 2 613 614 shll $24,%ebp // ebp = [r][0][0][0] of pixel 2 615 movb 5(%esi),%dl // edx = upper byte pixel 3 616 617 shrl $8,%ecx // ecx = [0][0][b][g] pixel 2 618 addl %ebp,%eax // eax = [r2][b1][g1][r1] (done) 619 620 movl %eax,(%edi) // store dword 1 621 movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp // ebp = ABGR8888 of upper byte pixel 3 622 623 movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax // eax = ABGR8888 of lower byte pixel 3 624 movb 6(%esi),%bl // ebx = lower byte pixel 4 625 626 addl %eax,%ebp // ebp = ABGR8888 of pixel 3 627 movb 7(%esi),%dl // edx = upper byte pixel 4 628 629 shll $16,%ebp // ebp = [g][r][0][0] pixel 3 630 631 shrl $16,%eax // al = blue component of pixel 3 632 addl %ecx,%ebp // ebp = [g3][r3][b2][g2] (done) 633 634 movl %ebp,4(%edi) // store dword 2 635 movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%ecx // ebx = ABGR8888 of lower byte pixel 4 636 637 movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp // ebp = ABGR8888 of upper byte pixel 4 638 movb 4*2+0(%esi),%bl // ebx = lower byte pixel 1 639 640 addl %ebp,%ecx // ecx = ABGR8888 of pixel 4 641 movb 4*2+1(%esi),%dl // edx = upper byte pixel 1 642 643 shll $8,%ecx // ecx = [b][g][r][0] 644 popl %ebp // restore ebp 645 646 movb %al,%cl // ecx = [b4][g4][r4][b3] (done) 647 addl $4*2,%esi 648 649 movl %ecx,8(%edi) // store dword 3 650 addl $3*4,%edi 651 652 decl %ebp 653 jz .L7 654 655 jmp .L6 656 657.L7: // check tail 658 popl %ecx 659 andl $0b11,%ecx 660 jz .L9 661 662.L8: // tail 663 movb (%esi),%dl 664 movl ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax // eax = ABGR8888 of lower byte 665 movb 1(%esi),%dl 666 movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx // ebx = ABGR8888 of upper byte 667 addl %ebx,%eax // eax = ABGR8888 pixel 668 movb %al,(%edi) 669 movb %ah,1(%edi) 670 shrl $16,%eax 671 movb %al,2(%edi) 672 addl $2,%esi 673 addl $3,%edi 674 decl %ecx 675 jnz .L8 676 677.L9: popl %ebp 678 jmp _X86RETURN 679end; 680 681 682 683procedure ConvertX86p16_16BGR565(CONVERT_PARAMETERS); cdecl; nostackframe; assembler; 684asm 685 // check short 686 cmpl $16,%ecx 687 ja .L3 688 689 690.L1: // short loop 691 movb (%esi),%al 692 movb 1(%esi),%ah 693 movl %eax,%ebx 694 movl %eax,%edx 695 shrl $11,%eax 696 andl $0b11111,%eax 697 andl $0b11111100000,%ebx 698 shll $11,%edx 699 addl %ebx,%eax 700 addl %edx,%eax 701 movb %al,(%edi) 702 movb %ah,1(%edi) 703 addl $2,%esi 704 addl $2,%edi 705 decl %ecx 706 jnz .L1 707.L2: 708 jmp _X86RETURN 709 710.L3: // head 711 movl %edi,%eax 712 andl $0b11,%eax 713 jz .L4 714 movb (%esi),%al 715 movb 1(%esi),%ah 716 movl %eax,%ebx 717 movl %eax,%edx 718 shrl $11,%eax 719 andl $0b11111,%eax 720 andl $0b11111100000,%ebx 721 shll $11,%edx 722 addl %ebx,%eax 723 addl %edx,%eax 724 movb %al,(%edi) 725 movb %ah,1(%edi) 726 addl $2,%esi 727 addl $2,%edi 728 decl %ecx 729 730.L4: // save count 731 pushl %ecx 732 733 // unroll twice 734 shrl $1,%ecx 735 736 // point arrays to end 737 leal (%esi,%ecx,4),%esi 738 leal (%edi,%ecx,4),%edi 739 740 // negative counter 741 negl %ecx 742 jmp .L6 743 744.L5: movl %eax,-4(%edi,%ecx,4) 745.L6: movl (%esi,%ecx,4),%eax 746 747 movl (%esi,%ecx,4),%ebx 748 andl $0x07E007E0,%eax 749 750 movl (%esi,%ecx,4),%edx 751 andl $0x0F800F800,%ebx 752 753 shrl $11,%ebx 754 andl $0x001F001F,%edx 755 756 shll $11,%edx 757 addl %ebx,%eax 758 759 addl %edx,%eax 760 incl %ecx 761 762 jnz .L5 763 764 movl %eax,-4(%edi,%ecx,4) 765 766 // tail 767 popl %ecx 768 andl $1,%ecx 769 jz .L7 770 movb (%esi),%al 771 movb 1(%esi),%ah 772 movl %eax,%ebx 773 movl %eax,%edx 774 shrl $11,%eax 775 andl $0b11111,%eax 776 andl $0b11111100000,%ebx 777 shll $11,%edx 778 addl %ebx,%eax 779 addl %edx,%eax 780 movb %al,(%edi) 781 movb %ah,1(%edi) 782 addl $2,%esi 783 addl $2,%edi 784 785.L7: 786 jmp _X86RETURN 787end; 788 789 790 791 792procedure ConvertX86p16_16RGB555(CONVERT_PARAMETERS); cdecl; nostackframe; assembler; 793asm 794 // check short 795 cmpl $32,%ecx 796 ja .L3 797 798 799.L1: // short loop 800 movb (%esi),%al 801 movb 1(%esi),%ah 802 movl %eax,%ebx 803 shrl $1,%ebx 804 andl $0b0111111111100000,%ebx 805 andl $0b0000000000011111,%eax 806 addl %ebx,%eax 807 movb %al,(%edi) 808 movb %ah,1(%edi) 809 addl $2,%esi 810 addl $2,%edi 811 decl %ecx 812 jnz .L1 813.L2: 814 jmp _X86RETURN 815 816.L3: // head 817 movl %edi,%eax 818 andl $0b11,%eax 819 jz .L4 820 movb (%esi),%al 821 movb 1(%esi),%ah 822 movl %eax,%ebx 823 shrl $1,%ebx 824 andl $0b0111111111100000,%ebx 825 andl $0b0000000000011111,%eax 826 addl %ebx,%eax 827 movb %al,(%edi) 828 movb %ah,1(%edi) 829 addl $2,%esi 830 addl $2,%edi 831 decl %ecx 832 833.L4: // save ebp 834 pushl %ebp 835 836 // save count 837 pushl %ecx 838 839 // unroll four times 840 shrl $2,%ecx 841 842 // point arrays to end 843 leal (%esi,%ecx,8),%esi 844 leal (%edi,%ecx,8),%edi 845 846 // negative counter 847 xorl %ebp,%ebp 848 subl %ecx,%ebp 849 850.L5: movl (%esi,%ebp,8),%eax // agi? 851 movl 4(%esi,%ebp,8),%ecx 852 853 movl %eax,%ebx 854 movl %ecx,%edx 855 856 andl $0x0FFC0FFC0,%eax 857 andl $0x0FFC0FFC0,%ecx 858 859 shrl $1,%eax 860 andl $0x001F001F,%ebx 861 862 shrl $1,%ecx 863 andl $0x001F001F,%edx 864 865 addl %ebx,%eax 866 addl %edx,%ecx 867 868 movl %eax,(%edi,%ebp,8) 869 movl %ecx,4(%edi,%ebp,8) 870 871 incl %ebp 872 jnz .L5 873 874 // tail 875 popl %ecx 876.L6: andl $0b11,%ecx 877 jz .L7 878 movb (%esi),%al 879 movb 1(%esi),%ah 880 movl %eax,%ebx 881 shrl $1,%ebx 882 andl $0b0111111111100000,%ebx 883 andl $0b0000000000011111,%eax 884 addl %ebx,%eax 885 movb %al,(%edi) 886 movb %ah,1(%edi) 887 addl $2,%esi 888 addl $2,%edi 889 decl %ecx 890 jmp .L6 891 892.L7: popl %ebp 893 jmp _X86RETURN 894end; 895 896 897 898 899 900procedure ConvertX86p16_16BGR555(CONVERT_PARAMETERS); cdecl; nostackframe; assembler; 901asm 902 // check short 903 cmpl $16,%ecx 904 ja .L3 905 906 907.L1: // short loop 908 movb (%esi),%al 909 movb 1(%esi),%ah 910 movl %eax,%ebx 911 movl %eax,%edx 912 shrl $11,%eax 913 andl $0b11111,%eax 914 shrl $1,%ebx 915 andl $0b1111100000,%ebx 916 shll $10,%edx 917 andl $0b0111110000000000,%edx 918 addl %ebx,%eax 919 addl %edx,%eax 920 movb %al,(%edi) 921 movb %ah,1(%edi) 922 addl $2,%esi 923 addl $2,%edi 924 decl %ecx 925 jnz .L1 926.L2: 927 jmp _X86RETURN 928 929.L3: // head 930 movl %edi,%eax 931 andl $0b11,%eax 932 jz .L4 933 movb (%esi),%al 934 movb 1(%esi),%ah 935 movl %eax,%ebx 936 movl %eax,%edx 937 shrl $11,%eax 938 andl $0b11111,%eax 939 shrl $1,%ebx 940 andl $0b1111100000,%ebx 941 shll $10,%edx 942 andl $0b0111110000000000,%edx 943 addl %ebx,%eax 944 addl %edx,%eax 945 movb %al,(%edi) 946 movb %ah,1(%edi) 947 addl $2,%esi 948 addl $2,%edi 949 decl %ecx 950 951.L4: // save count 952 pushl %ecx 953 954 // unroll twice 955 shrl $1,%ecx 956 957 // point arrays to end 958 leal (%esi,%ecx,4),%esi 959 leal (%edi,%ecx,4),%edi 960 961 // negative counter 962 negl %ecx 963 jmp .L6 964 965.L5: movl %eax,-4(%edi,%ecx,4) 966.L6: movl (%esi,%ecx,4),%eax 967 968 shrl $1,%eax 969 movl (%esi,%ecx,4),%ebx 970 971 andl $0x03E003E0,%eax 972 movl (%esi,%ecx,4),%edx 973 974 andl $0x0F800F800,%ebx 975 976 shrl $11,%ebx 977 andl $0x001F001F,%edx 978 979 shll $10,%edx 980 addl %ebx,%eax 981 982 addl %edx,%eax 983 incl %ecx 984 985 jnz .L5 986 987 movl %eax,-4(%edi,%ecx,4) 988 989 // tail 990 popl %ecx 991 andl $1,%ecx 992 jz .L7 993 movb (%esi),%al 994 movb 1(%esi),%ah 995 movl %eax,%ebx 996 movl %eax,%edx 997 shrl $11,%eax 998 andl $0b11111,%eax 999 shrl $1,%ebx 1000 andl $0b1111100000,%ebx 1001 shll $10,%edx 1002 andl $0b0111110000000000,%edx 1003 addl %ebx,%eax 1004 addl %edx,%eax 1005 movb %al,(%edi) 1006 movb %ah,1(%edi) 1007 addl $2,%esi 1008 addl $2,%edi 1009 1010.L7: 1011 jmp _X86RETURN 1012end; 1013 1014 1015 1016 1017 1018procedure ConvertX86p16_8RGB332(CONVERT_PARAMETERS); cdecl; nostackframe; assembler; 1019asm 1020 // check short 1021 cmpl $16,%ecx 1022 ja .L3 1023 1024 1025.L1: // short loop 1026 movb (%esi),%al 1027 movb 1(%esi),%ah 1028 movl %eax,%ebx 1029 movl %eax,%edx 1030 andl $0b11000,%eax // blue 1031 shrl $3,%eax 1032 andl $0b11100000000,%ebx // green 1033 shrl $6,%ebx 1034 andl $0b1110000000000000,%edx // red 1035 shrl $8,%edx 1036 addl %ebx,%eax 1037 addl %edx,%eax 1038 movb %al,(%edi) 1039 addl $2,%esi 1040 incl %edi 1041 decl %ecx 1042 jnz .L1 1043.L2: 1044 jmp _X86RETURN 1045 1046.L3: movl %edi,%eax 1047 andl $0b11,%eax 1048 jz .L4 1049 movb (%esi),%al 1050 movb 1(%esi),%ah 1051 movl %eax,%ebx 1052 movl %eax,%edx 1053 andl $0b11000,%eax // blue 1054 shrl $3,%eax 1055 andl $0b11100000000,%ebx // green 1056 shrl $6,%ebx 1057 andl $0b1110000000000000,%edx // red 1058 shrl $8,%edx 1059 addl %ebx,%eax 1060 addl %edx,%eax 1061 movb %al,(%edi) 1062 addl $2,%esi 1063 incl %edi 1064 decl %ecx 1065 jmp .L3 1066 1067.L4: // save ebp 1068 pushl %ebp 1069 1070 // save count 1071 pushl %ecx 1072 1073 // unroll 4 times 1074 shrl $2,%ecx 1075 1076 // prestep 1077 movb (%esi),%dl 1078 movb 1(%esi),%bl 1079 movb 2(%esi),%dh 1080 1081.L5: shll $16,%edx 1082 movb 3(%esi),%bh 1083 1084 shll $16,%ebx 1085 movb 4(%esi),%dl 1086 1087 movb 6(%esi),%dh 1088 movb 5(%esi),%bl 1089 1090 andl $0b00011000000110000001100000011000,%edx 1091 movb 7(%esi),%bh 1092 1093 rorl $16+3,%edx 1094 movl %ebx,%eax // setup eax for reds 1095 1096 andl $0b00000111000001110000011100000111,%ebx 1097 andl $0b11100000111000001110000011100000,%eax // reds 1098 1099 rorl $16-2,%ebx 1100 addl $8,%esi 1101 1102 rorl $16,%eax 1103 addl $4,%edi 1104 1105 addl %ebx,%eax 1106 movb 1(%esi),%bl // greens 1107 1108 addl %edx,%eax 1109 movb (%esi),%dl // blues 1110 1111 movl %eax,-4(%edi) 1112 movb 2(%esi),%dh 1113 1114 decl %ecx 1115 jnz .L5 1116 1117 // check tail 1118 popl %ecx 1119 andl $0b11,%ecx 1120 jz .L7 1121 1122.L6: // tail 1123 movb (%esi),%al 1124 movb 1(%esi),%ah 1125 movl %eax,%ebx 1126 movl %eax,%edx 1127 andl $0b11000,%eax // blue 1128 shrl $3,%eax 1129 andl $0b11100000000,%ebx // green 1130 shrl $6,%ebx 1131 andl $0b1110000000000000,%edx // red 1132 shrl $8,%edx 1133 addl %ebx,%eax 1134 addl %edx,%eax 1135 movb %al,(%edi) 1136 addl $2,%esi 1137 incl %edi 1138 decl %ecx 1139 jnz .L6 1140 1141.L7: popl %ebp 1142 jmp _X86RETURN 1143end;