PageRenderTime 214ms CodeModel.GetById 12ms app.highlight 185ms RepoModel.GetById 1ms app.codeStats 0ms

/packages/hermes/src/i386/x86p_16.inc

https://github.com/slibre/freepascal
Pascal | 1143 lines | 794 code | 250 blank | 99 comment | 0 complexity | 6bff2aa0fda3b458f53d67b9c9ab3748 MD5 | raw file
Possible License(s): LGPL-2.0, LGPL-2.1, LGPL-3.0
   1{
   2    x86 format converters for HERMES
   3    Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org)
   4    Routines adjusted for Hermes by Christian Nentwich (c.nentwich@cs.ucl.ac.uk)
   5    Used with permission.
   6
   7    This library is free software; you can redistribute it and/or
   8    modify it under the terms of the GNU Lesser General Public
   9    License as published by the Free Software Foundation; either
  10    version 2.1 of the License, or (at your option) any later version
  11    with the following modification:
  12
  13    As a special exception, the copyright holders of this library give you
  14    permission to link this library with independent modules to produce an
  15    executable, regardless of the license terms of these independent modules,and
  16    to copy and distribute the resulting executable under terms of your choice,
  17    provided that you also meet, for each linked independent module, the terms
  18    and conditions of the license of that module. An independent module is a
  19    module which is not derived from or based on this library. If you modify
  20    this library, you may extend this exception to your version of the library,
  21    but you are not obligated to do so. If you do not wish to do so, delete this
  22    exception statement from your version.
  23
  24    This library is distributed in the hope that it will be useful,
  25    but WITHOUT ANY WARRANTY; without even the implied warranty of
  26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27    Lesser General Public License for more details.
  28
  29    You should have received a copy of the GNU Lesser General Public
  30    License along with this library; if not, write to the Free Software
  31    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  32}
  33
  34
  35procedure ConvertX86p16_32RGB888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
  36asm
  37    // check short
  38    cmpl $32,%ecx
  39    ja .L3
  40
  41
  42    // short loop
  43    xorl %ebx,%ebx
  44.L1: movb (%esi),%bl              // ebx = lower byte pixel 1
  45    movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax   // eax = ARGB8888 of lower byte pixel 1
  46    movb 1(%esi),%bl                                     // ebx = upper byte pixel 1
  47    movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%edx // edx = ARGB8888 of upper byte pixel 1
  48    addl %edx,%eax
  49    movl %eax,(%edi)
  50    addl $2,%esi
  51    addl $4,%edi
  52    decl %ecx
  53    jnz .L1
  54.L2:
  55    jmp _X86RETURN
  56
  57
  58.L3:  // save ebp
  59    pushl %ebp
  60
  61    // save count
  62    pushl %ecx
  63
  64    // unroll twice
  65    movl %ecx,%ebp
  66    shrl $1,%ebp
  67
  68    // point arrays to end
  69    leal (%esi,%ebp,4),%esi
  70    leal (%edi,%ebp,8),%edi
  71
  72    // negative counter
  73    negl %ebp
  74
  75    // clear
  76    xorl %ebx,%ebx
  77    xorl %ecx,%ecx
  78
  79    // prestep
  80    movb (%esi,%ebp,4),%cl
  81    movb 1(%esi,%ebp,4),%bl
  82
  83.L4:    movl ConvertX86p16_32RGB888_LUT_X86(,%ecx,8),%edx
  84        movb 2(%esi,%ebp,4),%cl
  85
  86        movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%eax
  87        movb 3(%esi,%ebp,4),%bl
  88
  89        addl %edx,%eax
  90        movl ConvertX86p16_32RGB888_LUT_X86(,%ecx,8),%edx
  91
  92        movl %eax,(%edi,%ebp,8)
  93        movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%eax
  94
  95        addl %edx,%eax
  96        movb 4(%esi,%ebp,4),%cl
  97
  98        movl %eax,4(%edi,%ebp,8)
  99        movb 5(%esi,%ebp,4),%bl
 100
 101        incl %ebp
 102        jnz .L4
 103
 104    // tail
 105    popl %ecx
 106    andl $1,%ecx
 107    jz .L6
 108    xorl %ebx,%ebx
 109    movb (%esi),%bl                                      // ebx = lower byte pixel 1
 110    movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax   // eax = ARGB8888 of lower byte pixel 1
 111    movb 1(%esi),%bl                                     // ebx = upper byte pixel 1
 112    movl ConvertX86p16_32RGB888_LUT_X86+4(,%ebx,8),%edx // edx = ARGB8888 of upper byte pixel 1
 113    addl %edx,%eax
 114    movl %eax,(%edi)
 115    addl $2,%esi
 116    addl $4,%edi
 117
 118.L6: popl %ebp
 119    jmp _X86RETURN
 120end;
 121
 122
 123
 124
 125
 126procedure ConvertX86p16_32BGR888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
 127asm
 128    // check short
 129    cmpl $32,%ecx
 130    ja .L3
 131
 132
 133    // short loop
 134    xorl %ebx,%ebx
 135.L1: movb (%esi),%bl                                     // ebx = lower byte pixel 1
 136    movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax    // eax = ABGR8888 of lower byte pixel 1
 137    movb 1(%esi),%bl                                     // ebx = upper byte pixel 1
 138    movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%edx  // edx = ABGR8888 of upper byte pixel 1
 139    addl %edx,%eax
 140    movl %eax,(%edi)
 141    addl $2,%esi
 142    addl $4,%edi
 143    decl %ecx
 144    jnz .L1
 145.L2:
 146    jmp _X86RETURN
 147
 148.L3:  // save ebp
 149    pushl %ebp
 150
 151    // save count
 152    pushl %ecx
 153
 154    // unroll twice
 155    movl %ecx,%ebp
 156    shrl $1,%ebp
 157
 158    // point arrays to end
 159    leal (%esi,%ebp,4),%esi
 160    leal (%edi,%ebp,8),%edi
 161
 162    // negative counter
 163    negl %ebp
 164
 165    // clear
 166    xorl %ebx,%ebx
 167    xorl %ecx,%ecx
 168
 169    // prestep
 170    movb (%esi,%ebp,4),%cl
 171    movb 1(%esi,%ebp,4),%bl
 172
 173.L4:    movl ConvertX86p16_32BGR888_LUT_X86(,%ecx,8),%edx
 174        movb 2(%esi,%ebp,4),%cl
 175
 176        movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%eax
 177        movb 3(%esi,%ebp,4),%bl
 178
 179        addl %edx,%eax
 180        movl ConvertX86p16_32BGR888_LUT_X86(,%ecx,8),%edx
 181
 182        movl %eax,(%edi,%ebp,8)
 183        movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%eax
 184
 185        addl %edx,%eax
 186        movb 4(%esi,%ebp,4),%cl
 187
 188        movl %eax,4(%edi,%ebp,8)
 189        movb 5(%esi,%ebp,4),%bl
 190
 191        incl %ebp
 192        jnz .L4
 193
 194    // tail
 195    popl %ecx
 196    andl $1,%ecx
 197    jz .L6
 198    xorl %ebx,%ebx
 199    movb (%esi),%bl                                      // ebx = lower byte pixel 1
 200    movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax    // eax = ABGR8888 of lower byte pixel 1
 201    movb 1(%esi),%bl                                     // ebx = upper byte pixel 1
 202    movl ConvertX86p16_32BGR888_LUT_X86+4(,%ebx,8),%edx  // edx = ABGR8888 of upper byte pixel 1
 203    addl %edx,%eax
 204    movl %eax,(%edi)
 205    addl $2,%esi
 206    addl $4,%edi
 207
 208.L6: popl %ebp
 209    jmp _X86RETURN
 210end;
 211
 212
 213
 214procedure ConvertX86p16_32RGBA888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
 215asm
 216    // check short
 217    cmpl $32,%ecx
 218    ja .L3
 219
 220
 221    // short loop
 222    xorl %ebx,%ebx
 223.L1: movb (%esi),%bl                                      // ebx = lower byte pixel 1
 224    movl ConvertX86p16_32RGBA888_LUT_X86(,%ebx,8),%eax   // eax = RGBA8888 of lower byte pixel 1
 225    movb 1(%esi),%bl                                      // ebx = upper byte pixel 1
 226    movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%edx // edx = RGBA8888 of upper byte pixel 1
 227    addl %edx,%eax
 228    movl %eax,(%edi)
 229    addl $2,%esi
 230    addl $4,%edi
 231    decl %ecx
 232    jnz .L1
 233.L2:
 234    jmp _X86RETURN
 235
 236.L3:  // save ebp
 237    pushl %ebp
 238
 239    // save count
 240    pushl %ecx
 241
 242    // unroll twice
 243    movl %ecx,%ebp
 244    shrl $1,%ebp
 245
 246    // point arrays to end
 247    leal (%esi,%ebp,4),%esi
 248    leal (%edi,%ebp,8),%edi
 249
 250    // negative counter
 251    negl %ebp
 252
 253    // clear
 254    xorl %ebx,%ebx
 255    xorl %ecx,%ecx
 256
 257    // prestep
 258    movb (%esi,%ebp,4),%cl
 259    movb 1(%esi,%ebp,4),%bl
 260
 261.L4:    movl ConvertX86p16_32RGBA888_LUT_X86(,%ecx,8),%edx
 262        movb 2(%esi,%ebp,4),%cl
 263
 264        movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%eax
 265        movb 3(%esi,%ebp,4),%bl
 266
 267        addl %edx,%eax
 268        movl ConvertX86p16_32RGBA888_LUT_X86(,%ecx,8),%edx
 269
 270        movl %eax,(%edi,%ebp,8)
 271        movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%eax
 272
 273        addl %edx,%eax
 274        movb 4(%esi,%ebp,4),%cl
 275
 276        movl %eax,4(%edi,%ebp,8)
 277        movb 5(%esi,%ebp,4),%bl
 278
 279        incl %ebp
 280        jnz .L4
 281
 282    // tail
 283    popl %ecx
 284    andl $1,%ecx
 285    jz .L6
 286    xorl %ebx,%ebx
 287    movb (%esi),%bl                                       // ebx = lower byte pixel 1
 288    movl ConvertX86p16_32RGBA888_LUT_X86(,%ebx,8),%eax   // eax = RGBA8888 of lower byte pixel 1
 289    movb 1(%esi),%bl                                      // ebx = upper byte pixel 1
 290    movl ConvertX86p16_32RGBA888_LUT_X86+4(,%ebx,8),%edx // edx = RGBA8888 of upper byte pixel 1
 291    addl %edx,%eax
 292    movl %eax,(%edi)
 293    addl $2,%esi
 294    addl $4,%edi
 295
 296.L6: popl %ebp
 297    jmp _X86RETURN
 298end;
 299
 300
 301
 302
 303procedure ConvertX86p16_32BGRA888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
 304asm
 305    // check short
 306    cmpl $32,%ecx
 307    ja .L3
 308
 309    // short loop
 310    xorl %ebx,%ebx
 311.L1: movb (%esi),%bl                                      // ebx = lower byte pixel 1
 312    movl ConvertX86p16_32BGRA888_LUT_X86(,%ebx,8),%eax    // eax = BGRA8888 of lower byte pixel 1
 313    movb 1(%esi),%bl                                      // ebx = upper byte pixel 1
 314    movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%edx  // edx = BGRA8888 of upper byte pixel 1
 315    addl %edx,%eax
 316    movl %eax,(%edi)
 317    addl $2,%esi
 318    addl $4,%edi
 319    decl %ecx
 320    jnz .L1
 321.L2:
 322    jmp _X86RETURN
 323
 324.L3:  // save ebp
 325    pushl %ebp
 326
 327    // save count
 328    pushl %ecx
 329
 330    // unroll twice
 331    movl %ecx,%ebp
 332    shrl $1,%ebp
 333
 334    // point arrays to end
 335    leal (%esi,%ebp,4),%esi
 336    leal (%edi,%ebp,8),%edi
 337
 338    // negative counter
 339    negl %ebp
 340
 341    // clear
 342    xorl %ebx,%ebx
 343    xorl %ecx,%ecx
 344
 345    // prestep
 346    movb (%esi,%ebp,4),%cl
 347    movb 1(%esi,%ebp,4),%bl
 348
 349.L4:    movl ConvertX86p16_32BGRA888_LUT_X86(,%ecx,8),%edx
 350        movb 2(%esi,%ebp,4),%cl
 351
 352        movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%eax
 353        movb 3(%esi,%ebp,4),%bl
 354
 355        addl %edx,%eax
 356        movl ConvertX86p16_32BGRA888_LUT_X86(,%ecx,8),%edx
 357
 358        movl %eax,(%edi,%ebp,8)
 359        movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%eax
 360
 361        addl %edx,%eax
 362        movb 4(%esi,%ebp,4),%cl
 363
 364        movl %eax,4(%edi,%ebp,8)
 365        movb 5(%esi,%ebp,4),%bl
 366
 367        incl %ebp
 368        jnz .L4
 369
 370    // tail
 371    popl %ecx
 372    andl $1,%ecx
 373    jz .L6
 374    xorl %ebx,%ebx
 375    movb (%esi),%bl                                       // ebx = lower byte pixel 1
 376    movl ConvertX86p16_32BGRA888_LUT_X86(,%ebx,8),%eax   // eax = BGRA8888 of lower byte pixel 1
 377    movb 1(%esi),%bl                                      // ebx = upper byte pixel 1
 378    movl ConvertX86p16_32BGRA888_LUT_X86+4(,%ebx,8),%edx // edx = BGRA8888 of upper byte pixel 1
 379    addl %edx,%eax
 380    movl %eax,(%edi)
 381    addl $2,%esi
 382    addl $4,%edi
 383
 384.L6: popl %ebp
 385    jmp _X86RETURN
 386end;
 387
 388
 389
 390
 391procedure ConvertX86p16_24RGB888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
 392asm
 393    // check short
 394    cmpl $32,%ecx
 395    ja .L3
 396
 397
 398    // short loop
 399    xorl %edx,%edx
 400.L1: movb (%esi),%dl
 401    movl ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax     // eax = ARGB8888 of lower byte
 402    movb 1(%esi),%dl
 403    movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx   // ebx = ARGB8888 of upper byte
 404    addl %ebx,%eax                                        // eax = ARGB8888 pixel
 405    movb %al,(%edi)
 406    movb %ah,1(%edi)
 407    shrl $16,%eax
 408    movb %al,2(%edi)
 409    addl $2,%esi
 410    addl $3,%edi
 411    decl %ecx
 412    jnz .L1
 413.L2: jmp _X86RETURN
 414
 415
 416.L3:  // clear edx
 417    xorl %edx,%edx
 418
 419.L4:  // head
 420    movl %edi,%eax
 421    andl $0b11,%eax
 422    jz .L5
 423    movb (%esi),%dl
 424    movl ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax     // eax = ARGB8888 of lower byte
 425    movb 1(%esi),%dl
 426    movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx   // ebx = ARGB8888 of upper byte
 427    addl %ebx,%eax                                        // eax = ARGB8888 pixel
 428    movb %al,(%edi)
 429    movb %ah,1(%edi)
 430    shrl $16,%eax
 431    movb %al,2(%edi)
 432    addl $2,%esi
 433    addl $3,%edi
 434    decl %ecx
 435    jmp .L4
 436
 437.L5:  // unroll 4 times
 438    pushl %ebp
 439    movl %ecx,%ebp
 440    shrl $2,%ebp
 441
 442    // clear ebx
 443    xorl %ebx,%ebx
 444
 445    // save count
 446    pushl %ecx
 447
 448    // prestep
 449    movb (%esi),%bl                                     // ebx = lower byte pixel 1
 450    movb 1(%esi),%dl                                    // edx = upper byte pixel 1
 451
 452.L6:    movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%eax   // eax = ARGB8888 of lower byte pixel 1
 453        movb 2(%esi),%bl                                    // ebx = lower byte pixel 2
 454
 455        movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ecx    // ecx = ARGB8888 of upper byte pixel 1
 456        movb 3(%esi),%dl                                    // edx = upper byte pixel 2
 457
 458        pushl %ebp                                          // save ebp
 459        addl %ecx,%eax                                      // eax = ARGB8888 of pixel 1
 460
 461        movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ebp      // ebp = ARGB8888 of lower byte pixel 2
 462        movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ecx    // ecx = ARGB8888 of upper byte pixel 2
 463
 464        movb 4(%esi),%bl                                    // ebx = lower byte pixel 3
 465        addl %ebp,%ecx                                      // ecx = ARGB8888 of pixel 2
 466
 467        shll $24,%ebp                                       // ebp = [b][0][0][0] of pixel 2
 468        movb 5(%esi),%dl                                    // edx = upper byte pixel 3
 469
 470        shrl $8,%ecx                                        // ecx = [0][0][r][g] pixel 2
 471        addl %ebp,%eax                                      // eax = [b2][r1][g1][b1] (done)
 472
 473        movl %eax,(%edi)                                    // store dword 1
 474        movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%eax    // eax = ARGB8888 of upper byte pixel 3
 475
 476        movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ebp      // ebp = ARGB8888 of lower byte pixel 3
 477        movb 6(%esi),%bl                                    // ebx = lower byte pixel 4
 478
 479        addl %eax,%ebp                                      // ebp = ARGB8888 of pixel 3
 480        movb 7(%esi),%dl                                    // edx = upper byte pixel 4
 481
 482        shll $16,%ebp                                       // ebp = [g][b][0][0] pixel 3
 483
 484        shrl $16,%eax                                       //  al = red component of pixel 3
 485        addl %ecx,%ebp                                      // ebp = [g3][b3][r2][g2] (done)
 486
 487        movl %ebp,4(%edi)                                   // store dword 2
 488        movl ConvertX86p16_32RGB888_LUT_X86(,%ebx,8),%ecx      // ebx = ARGB8888 of lower byte pixel 4
 489
 490        movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebp    // ebp = ARGB8888 of upper byte pixel 4
 491        movb 4*2+0(%esi),%bl                                // ebx = lower byte pixel 1
 492
 493        addl %ebp,%ecx                                      // ecx = ARGB8888 of pixel 4
 494        movb 4*2+1(%esi),%dl                                // edx = upper byte pixel 1
 495
 496        shll $8,%ecx                                        // ecx = [r][g][b][0]
 497        popl %ebp                                           // restore ebp
 498
 499        movb %al,%cl                                        // ecx = [r4][g4][b4][r3] (done)
 500        addl $4*2,%esi
 501
 502        movl %ecx,8(%edi)                                   // store dword 3
 503        addl $3*4,%edi
 504
 505        decl %ebp
 506        jz .L7
 507
 508        jmp .L6
 509
 510.L7:  // check tail
 511    popl %ecx
 512    andl $0b11,%ecx
 513    jz .L9
 514
 515.L8:  // tail
 516    movb (%esi),%dl
 517    movl ConvertX86p16_32RGB888_LUT_X86(,%edx,8),%eax    // eax = ARGB8888 of lower byte
 518    movb 1(%esi),%dl
 519    movl ConvertX86p16_32RGB888_LUT_X86+4(,%edx,8),%ebx  // ebx = ARGB8888 of upper byte
 520    addl %ebx,%eax                                    // eax = ARGB8888 pixel
 521    movb %al,(%edi)
 522    movb %ah,1(%edi)
 523    shrl $16,%eax
 524    movb %al,2(%edi)
 525    addl $2,%esi
 526    addl $3,%edi
 527    decl %ecx
 528    jnz .L8
 529
 530.L9: popl %ebp
 531    jmp _X86RETURN
 532end;
 533
 534
 535
 536
 537procedure ConvertX86p16_24BGR888(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
 538asm
 539    // check short
 540    cmpl $32,%ecx
 541    ja .L3
 542
 543
 544    // short loop
 545    xorl %edx,%edx
 546.L1: movb (%esi),%dl
 547    movl ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax    // eax = ABGR8888 of lower byte
 548    movb 1(%esi),%dl
 549    movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx  // ebx = ABGR8888 of upper byte
 550    addl %ebx,%eax                                    // eax = ABGR8888 pixel
 551    movb %al,(%edi)
 552    movb %ah,1(%edi)
 553    shrl $16,%eax
 554    movb %al,2(%edi)
 555    addl $2,%esi
 556    addl $3,%edi
 557    decl %ecx
 558    jnz .L1
 559.L2:
 560    jmp _X86RETURN
 561
 562
 563.L3:  // clear edx
 564    xorl %edx,%edx
 565
 566.L4:  // head
 567    movl %edi,%eax
 568    andl $0b11,%eax
 569    jz .L5
 570    movb (%esi),%dl
 571    movl ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax    // eax = ABGR8888 of lower byte
 572    movb 1(%esi),%dl
 573    movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx  // ebx = ABGR8888 of upper byte
 574    addl %ebx,%eax                                    // eax = ABGR8888 pixel
 575    movb %al,(%edi)
 576    movb %ah,1(%edi)
 577    shrl $16,%eax
 578    movb %al,2(%edi)
 579    addl $2,%esi
 580    addl $3,%edi
 581    decl %ecx
 582    jmp .L4
 583
 584.L5:  // unroll 4 times
 585    pushl %ebp
 586    movl %ecx,%ebp
 587    shrl $2,%ebp
 588
 589    // clear ebx
 590    xorl %ebx,%ebx
 591
 592    // save count
 593    pushl %ecx
 594
 595    // prestep
 596    movb (%esi),%bl                                     // ebx = lower byte pixel 1
 597    movb 1(%esi),%dl                                    // edx = upper byte pixel 1
 598
 599.L6:    movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax // eax = ABGR8888 of lower byte pixel 1
 600        movb 2(%esi),%bl                                    // ebx = lower byte pixel 2
 601
 602        movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ecx    // ecx = ABGR8888 of upper byte pixel 1
 603        movb 3(%esi),%dl                                    // edx = upper byte pixel 2
 604
 605        pushl %ebp                                          // save ebp
 606        addl %ecx,%eax                                      // eax = ABGR8888 of pixel 1
 607
 608        movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%ecx      // ecx = ABGR8888 of lower byte pixel 2
 609        movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp    // ebp = ABGR8888 of upper byte pixel 2
 610
 611        movb 4(%esi),%bl                                    // ebx = lower byte pixel 3
 612        addl %ebp,%ecx                                      // ecx = ABGR8888 of pixel 2
 613
 614        shll $24,%ebp                                       // ebp = [r][0][0][0] of pixel 2
 615        movb 5(%esi),%dl                                    // edx = upper byte pixel 3
 616
 617        shrl $8,%ecx                                        // ecx = [0][0][b][g] pixel 2
 618        addl %ebp,%eax                                      // eax = [r2][b1][g1][r1] (done)
 619
 620        movl %eax,(%edi)                                    // store dword 1
 621        movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp    // ebp = ABGR8888 of upper byte pixel 3
 622
 623        movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%eax      // eax = ABGR8888 of lower byte pixel 3
 624        movb 6(%esi),%bl                                    // ebx = lower byte pixel 4
 625
 626        addl %eax,%ebp                                      // ebp = ABGR8888 of pixel 3
 627        movb 7(%esi),%dl                                    // edx = upper byte pixel 4
 628
 629        shll $16,%ebp                                       // ebp = [g][r][0][0] pixel 3
 630
 631        shrl $16,%eax                                       //  al = blue component of pixel 3
 632        addl %ecx,%ebp                                      // ebp = [g3][r3][b2][g2] (done)
 633
 634        movl %ebp,4(%edi)                                   // store dword 2
 635        movl ConvertX86p16_32BGR888_LUT_X86(,%ebx,8),%ecx      // ebx = ABGR8888 of lower byte pixel 4
 636
 637        movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebp    // ebp = ABGR8888 of upper byte pixel 4
 638        movb 4*2+0(%esi),%bl                                // ebx = lower byte pixel 1
 639
 640        addl %ebp,%ecx                                      // ecx = ABGR8888 of pixel 4
 641        movb 4*2+1(%esi),%dl                                // edx = upper byte pixel 1
 642
 643        shll $8,%ecx                                        // ecx = [b][g][r][0]
 644        popl %ebp                                           // restore ebp
 645
 646        movb %al,%cl                                        // ecx = [b4][g4][r4][b3] (done)
 647        addl $4*2,%esi
 648
 649        movl %ecx,8(%edi)                                   // store dword 3
 650        addl $3*4,%edi
 651
 652        decl %ebp
 653        jz .L7
 654
 655        jmp .L6
 656
 657.L7:  // check tail
 658    popl %ecx
 659    andl $0b11,%ecx
 660    jz .L9
 661
 662.L8:  // tail
 663    movb (%esi),%dl
 664    movl ConvertX86p16_32BGR888_LUT_X86(,%edx,8),%eax    // eax = ABGR8888 of lower byte
 665    movb 1(%esi),%dl
 666    movl ConvertX86p16_32BGR888_LUT_X86+4(,%edx,8),%ebx  // ebx = ABGR8888 of upper byte
 667    addl %ebx,%eax                                    // eax = ABGR8888 pixel
 668    movb %al,(%edi)
 669    movb %ah,1(%edi)
 670    shrl $16,%eax
 671    movb %al,2(%edi)
 672    addl $2,%esi
 673    addl $3,%edi
 674    decl %ecx
 675    jnz .L8
 676
 677.L9: popl %ebp
 678    jmp _X86RETURN
 679end;
 680
 681
 682
 683procedure ConvertX86p16_16BGR565(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
 684asm
 685    // check short
 686    cmpl $16,%ecx
 687    ja .L3
 688
 689
 690.L1:  // short loop
 691    movb (%esi),%al
 692    movb 1(%esi),%ah
 693    movl %eax,%ebx
 694    movl %eax,%edx
 695    shrl $11,%eax
 696    andl $0b11111,%eax
 697    andl $0b11111100000,%ebx
 698    shll $11,%edx
 699    addl %ebx,%eax
 700    addl %edx,%eax
 701    movb %al,(%edi)
 702    movb %ah,1(%edi)
 703    addl $2,%esi
 704    addl $2,%edi
 705    decl %ecx
 706    jnz .L1
 707.L2:
 708    jmp _X86RETURN
 709
 710.L3:  // head
 711    movl %edi,%eax
 712    andl $0b11,%eax
 713    jz .L4
 714    movb (%esi),%al
 715    movb 1(%esi),%ah
 716    movl %eax,%ebx
 717    movl %eax,%edx
 718    shrl $11,%eax
 719    andl $0b11111,%eax
 720    andl $0b11111100000,%ebx
 721    shll $11,%edx
 722    addl %ebx,%eax
 723    addl %edx,%eax
 724    movb %al,(%edi)
 725    movb %ah,1(%edi)
 726    addl $2,%esi
 727    addl $2,%edi
 728    decl %ecx
 729
 730.L4:  // save count
 731    pushl %ecx
 732
 733    // unroll twice
 734    shrl $1,%ecx
 735
 736    // point arrays to end
 737    leal (%esi,%ecx,4),%esi
 738    leal (%edi,%ecx,4),%edi
 739
 740    // negative counter
 741    negl %ecx
 742    jmp .L6
 743
 744.L5:    movl %eax,-4(%edi,%ecx,4)
 745.L6:    movl (%esi,%ecx,4),%eax
 746
 747        movl (%esi,%ecx,4),%ebx
 748        andl $0x07E007E0,%eax
 749
 750        movl (%esi,%ecx,4),%edx
 751        andl $0x0F800F800,%ebx
 752
 753        shrl $11,%ebx
 754        andl $0x001F001F,%edx
 755
 756        shll $11,%edx
 757        addl %ebx,%eax
 758
 759        addl %edx,%eax
 760        incl %ecx
 761
 762        jnz .L5
 763
 764    movl %eax,-4(%edi,%ecx,4)
 765
 766    // tail
 767    popl %ecx
 768    andl $1,%ecx
 769    jz .L7
 770    movb (%esi),%al
 771    movb 1(%esi),%ah
 772    movl %eax,%ebx
 773    movl %eax,%edx
 774    shrl $11,%eax
 775    andl $0b11111,%eax
 776    andl $0b11111100000,%ebx
 777    shll $11,%edx
 778    addl %ebx,%eax
 779    addl %edx,%eax
 780    movb %al,(%edi)
 781    movb %ah,1(%edi)
 782    addl $2,%esi
 783    addl $2,%edi
 784
 785.L7:
 786    jmp _X86RETURN
 787end;
 788
 789
 790
 791
 792procedure ConvertX86p16_16RGB555(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
 793asm
 794    // check short
 795    cmpl $32,%ecx
 796    ja .L3
 797
 798
 799.L1:  // short loop
 800    movb (%esi),%al
 801    movb 1(%esi),%ah
 802    movl %eax,%ebx
 803    shrl $1,%ebx
 804    andl $0b0111111111100000,%ebx
 805    andl $0b0000000000011111,%eax
 806    addl %ebx,%eax
 807    movb %al,(%edi)
 808    movb %ah,1(%edi)
 809    addl $2,%esi
 810    addl $2,%edi
 811    decl %ecx
 812    jnz .L1
 813.L2:
 814    jmp _X86RETURN
 815
 816.L3:  // head
 817    movl %edi,%eax
 818    andl $0b11,%eax
 819    jz .L4
 820    movb (%esi),%al
 821    movb 1(%esi),%ah
 822    movl %eax,%ebx
 823    shrl $1,%ebx
 824    andl $0b0111111111100000,%ebx
 825    andl $0b0000000000011111,%eax
 826    addl %ebx,%eax
 827    movb %al,(%edi)
 828    movb %ah,1(%edi)
 829    addl $2,%esi
 830    addl $2,%edi
 831    decl %ecx
 832
 833.L4:  // save ebp
 834    pushl %ebp
 835
 836    // save count
 837    pushl %ecx
 838
 839    // unroll four times
 840    shrl $2,%ecx
 841
 842    // point arrays to end
 843    leal (%esi,%ecx,8),%esi
 844    leal (%edi,%ecx,8),%edi
 845
 846    // negative counter
 847    xorl %ebp,%ebp
 848    subl %ecx,%ebp
 849
 850.L5:    movl (%esi,%ebp,8),%eax // agi?
 851        movl 4(%esi,%ebp,8),%ecx
 852
 853        movl %eax,%ebx
 854        movl %ecx,%edx
 855
 856        andl $0x0FFC0FFC0,%eax
 857        andl $0x0FFC0FFC0,%ecx
 858
 859        shrl $1,%eax
 860        andl $0x001F001F,%ebx
 861
 862        shrl $1,%ecx
 863        andl $0x001F001F,%edx
 864
 865        addl %ebx,%eax
 866        addl %edx,%ecx
 867
 868        movl %eax,(%edi,%ebp,8)
 869        movl %ecx,4(%edi,%ebp,8)
 870
 871        incl %ebp
 872        jnz .L5
 873
 874    // tail
 875    popl %ecx
 876.L6: andl $0b11,%ecx
 877    jz .L7
 878    movb (%esi),%al
 879    movb 1(%esi),%ah
 880    movl %eax,%ebx
 881    shrl $1,%ebx
 882    andl $0b0111111111100000,%ebx
 883    andl $0b0000000000011111,%eax
 884    addl %ebx,%eax
 885    movb %al,(%edi)
 886    movb %ah,1(%edi)
 887    addl $2,%esi
 888    addl $2,%edi
 889    decl %ecx
 890    jmp .L6
 891
 892.L7: popl %ebp
 893    jmp _X86RETURN
 894end;
 895
 896
 897
 898
 899
 900procedure ConvertX86p16_16BGR555(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
 901asm
 902    // check short
 903    cmpl $16,%ecx
 904    ja .L3
 905
 906
 907.L1:  // short loop
 908    movb (%esi),%al
 909    movb 1(%esi),%ah
 910    movl %eax,%ebx
 911    movl %eax,%edx
 912    shrl $11,%eax
 913    andl $0b11111,%eax
 914    shrl $1,%ebx
 915    andl $0b1111100000,%ebx
 916    shll $10,%edx
 917    andl $0b0111110000000000,%edx
 918    addl %ebx,%eax
 919    addl %edx,%eax
 920    movb %al,(%edi)
 921    movb %ah,1(%edi)
 922    addl $2,%esi
 923    addl $2,%edi
 924    decl %ecx
 925    jnz .L1
 926.L2:
 927    jmp _X86RETURN
 928
 929.L3:  // head
 930    movl %edi,%eax
 931    andl $0b11,%eax
 932    jz .L4
 933    movb (%esi),%al
 934    movb 1(%esi),%ah
 935    movl %eax,%ebx
 936    movl %eax,%edx
 937    shrl $11,%eax
 938    andl $0b11111,%eax
 939    shrl $1,%ebx
 940    andl $0b1111100000,%ebx
 941    shll $10,%edx
 942    andl $0b0111110000000000,%edx
 943    addl %ebx,%eax
 944    addl %edx,%eax
 945    movb %al,(%edi)
 946    movb %ah,1(%edi)
 947    addl $2,%esi
 948    addl $2,%edi
 949    decl %ecx
 950
 951.L4:  // save count
 952    pushl %ecx
 953
 954    // unroll twice
 955    shrl $1,%ecx
 956
 957    // point arrays to end
 958    leal (%esi,%ecx,4),%esi
 959    leal (%edi,%ecx,4),%edi
 960
 961    // negative counter
 962    negl %ecx
 963    jmp .L6
 964
 965.L5:    movl %eax,-4(%edi,%ecx,4)
 966.L6:    movl (%esi,%ecx,4),%eax
 967
 968        shrl $1,%eax
 969        movl (%esi,%ecx,4),%ebx
 970
 971        andl $0x03E003E0,%eax
 972        movl (%esi,%ecx,4),%edx
 973
 974        andl $0x0F800F800,%ebx
 975
 976        shrl $11,%ebx
 977        andl $0x001F001F,%edx
 978
 979        shll $10,%edx
 980        addl %ebx,%eax
 981
 982        addl %edx,%eax
 983        incl %ecx
 984
 985        jnz .L5
 986
 987    movl %eax,-4(%edi,%ecx,4)
 988
 989    // tail
 990    popl %ecx
 991    andl $1,%ecx
 992    jz .L7
 993    movb (%esi),%al
 994    movb 1(%esi),%ah
 995    movl %eax,%ebx
 996    movl %eax,%edx
 997    shrl $11,%eax
 998    andl $0b11111,%eax
 999    shrl $1,%ebx
1000    andl $0b1111100000,%ebx
1001    shll $10,%edx
1002    andl $0b0111110000000000,%edx
1003    addl %ebx,%eax
1004    addl %edx,%eax
1005    movb %al,(%edi)
1006    movb %ah,1(%edi)
1007    addl $2,%esi
1008    addl $2,%edi
1009
1010.L7:
1011    jmp _X86RETURN
1012end;
1013
1014
1015
1016
1017
1018procedure ConvertX86p16_8RGB332(CONVERT_PARAMETERS); cdecl; nostackframe; assembler;
1019asm
1020    // check short
1021    cmpl $16,%ecx
1022    ja .L3
1023
1024
1025.L1:  // short loop
1026    movb (%esi),%al
1027    movb 1(%esi),%ah
1028    movl %eax,%ebx
1029    movl %eax,%edx
1030    andl $0b11000,%eax          // blue
1031    shrl $3,%eax
1032    andl $0b11100000000,%ebx    // green
1033    shrl $6,%ebx
1034    andl $0b1110000000000000,%edx // red
1035    shrl $8,%edx
1036    addl %ebx,%eax
1037    addl %edx,%eax
1038    movb %al,(%edi)
1039    addl $2,%esi
1040    incl %edi
1041    decl %ecx
1042    jnz .L1
1043.L2:
1044    jmp _X86RETURN
1045
1046.L3: movl %edi,%eax
1047    andl $0b11,%eax
1048    jz .L4
1049    movb (%esi),%al
1050    movb 1(%esi),%ah
1051    movl %eax,%ebx
1052    movl %eax,%edx
1053    andl $0b11000,%eax          // blue
1054    shrl $3,%eax
1055    andl $0b11100000000,%ebx    // green
1056    shrl $6,%ebx
1057    andl $0b1110000000000000,%edx // red
1058    shrl $8,%edx
1059    addl %ebx,%eax
1060    addl %edx,%eax
1061    movb %al,(%edi)
1062    addl $2,%esi
1063    incl %edi
1064    decl %ecx
1065    jmp .L3
1066
1067.L4:  // save ebp
1068    pushl %ebp
1069
1070    // save count
1071    pushl %ecx
1072
1073    // unroll 4 times
1074    shrl $2,%ecx
1075
1076    // prestep
1077    movb (%esi),%dl
1078    movb 1(%esi),%bl
1079    movb 2(%esi),%dh
1080
1081.L5: shll $16,%edx
1082        movb 3(%esi),%bh
1083
1084        shll $16,%ebx
1085        movb 4(%esi),%dl
1086
1087        movb 6(%esi),%dh
1088        movb 5(%esi),%bl
1089
1090        andl $0b00011000000110000001100000011000,%edx
1091        movb 7(%esi),%bh
1092
1093        rorl $16+3,%edx
1094        movl %ebx,%eax                                  // setup eax for reds
1095
1096        andl $0b00000111000001110000011100000111,%ebx
1097        andl $0b11100000111000001110000011100000,%eax   // reds
1098
1099        rorl $16-2,%ebx
1100        addl $8,%esi
1101
1102        rorl $16,%eax
1103        addl $4,%edi
1104
1105        addl %ebx,%eax
1106        movb 1(%esi),%bl                                // greens
1107
1108        addl %edx,%eax
1109        movb (%esi),%dl                                 // blues
1110
1111        movl %eax,-4(%edi)
1112        movb 2(%esi),%dh
1113
1114        decl %ecx
1115        jnz .L5
1116
1117    // check tail
1118    popl %ecx
1119    andl $0b11,%ecx
1120    jz .L7
1121
1122.L6:  // tail
1123    movb (%esi),%al
1124    movb 1(%esi),%ah
1125    movl %eax,%ebx
1126    movl %eax,%edx
1127    andl $0b11000,%eax          // blue
1128    shrl $3,%eax
1129    andl $0b11100000000,%ebx    // green
1130    shrl $6,%ebx
1131    andl $0b1110000000000000,%edx // red
1132    shrl $8,%edx
1133    addl %ebx,%eax
1134    addl %edx,%eax
1135    movb %al,(%edi)
1136    addl $2,%esi
1137    incl %edi
1138    decl %ecx
1139    jnz .L6
1140
1141.L7: popl %ebp
1142    jmp _X86RETURN
1143end;