PageRenderTime 59ms CodeModel.GetById 11ms app.highlight 38ms RepoModel.GetById 1ms app.codeStats 1ms

/StormLib/stormlib/zlib/contrib/inflate86/inffast.S

http://ghostcb.googlecode.com/
Assembly | 1368 lines | 1259 code | 109 blank | 0 comment | 71 complexity | 56cc7e752a85f9d6489892aad226976b MD5 | raw file
   1/*
   2 * inffast.S is a hand tuned assembler version of:
   3 *
   4 * inffast.c -- fast decoding
   5 * Copyright (C) 1995-2003 Mark Adler
   6 * For conditions of distribution and use, see copyright notice in zlib.h
   7 *
   8 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
   9 * Please use the copyright conditions above.
  10 *
  11 * This version (Jan-23-2003) of inflate_fast was coded and tested under
  12 * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution.  On that
  13 * machine, I found that gzip style archives decompressed about 20% faster than
  14 * the gcc-3.2 -O3 -fomit-frame-pointer compiled version.  Your results will
  15 * depend on how large of a buffer is used for z_stream.next_in & next_out
  16 * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in
  17 * stream processing I/O and crc32/addler32.  In my case, this routine used
  18 * 70% of the cpu time and crc32 used 20%.
  19 *
  20 * I am confident that this version will work in the general case, but I have
  21 * not tested a wide variety of datasets or a wide variety of platforms.
  22 *
  23 * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating.
  24 * It should be a runtime flag instead of compile time flag...
  25 *
  26 * Jan-26-2003 -- Added runtime check for MMX support with cpuid instruction.
  27 * With -DUSE_MMX, only MMX code is compiled.  With -DNO_MMX, only non-MMX code
  28 * is compiled.  Without either option, runtime detection is enabled.  Runtime
  29 * detection should work on all modern cpus and the recomended algorithm (flip
  30 * ID bit on eflags and then use the cpuid instruction) is used in many
  31 * multimedia applications.  Tested under win2k with gcc-2.95 and gas-2.12
  32 * distributed with cygwin3.  Compiling with gcc-2.95 -c inffast.S -o
  33 * inffast.obj generates a COFF object which can then be linked with MSVC++
  34 * compiled code.  Tested under FreeBSD 4.7 with gcc-2.95.
  35 *
  36 * Jan-28-2003 -- Tested Athlon XP... MMX mode is slower than no MMX (and
  37 * slower than compiler generated code).  Adjusted cpuid check to use the MMX
  38 * code only for Pentiums < P4 until I have more data on the P4.  Speed
  39 * improvment is only about 15% on the Athlon when compared with code generated
  40 * with MSVC++.  Not sure yet, but I think the P4 will also be slower using the
  41 * MMX mode because many of it's x86 ALU instructions execute in .5 cycles and
  42 * have less latency than MMX ops.  Added code to buffer the last 11 bytes of
  43 * the input stream since the MMX code grabs bits in chunks of 32, which
  44 * differs from the inffast.c algorithm.  I don't think there would have been
  45 * read overruns where a page boundary was crossed (a segfault), but there
  46 * could have been overruns when next_in ends on unaligned memory (unintialized
  47 * memory read).
  48 *
  49 * Mar-13-2003 -- P4 MMX is slightly slower than P4 NO_MMX.  I created a C
  50 * version of the non-MMX code so that it doesn't depend on zstrm and zstate
  51 * structure offsets which are hard coded in this file.  This was last tested
  52 * with zlib-1.2.0 which is currently in beta testing, newer versions of this
  53 * and inffas86.c can be found at http://www.eetbeetee.com/zlib/ and
  54 * http://www.charm.net/~christop/zlib/
  55 */
  56
  57
  58/*
  59 * if you have underscore linking problems (_inflate_fast undefined), try
  60 * using -DGAS_COFF
  61 */
  62#if ! defined( GAS_COFF ) && ! defined( GAS_ELF )
  63
  64#if defined( WIN32 ) || defined( __CYGWIN__ )
  65#define GAS_COFF /* windows object format */
  66#else
  67#define GAS_ELF
  68#endif
  69
  70#endif /* ! GAS_COFF && ! GAS_ELF */
  71
  72
  73#if defined( GAS_COFF )
  74
  75/* coff externals have underscores */
  76#define inflate_fast _inflate_fast
  77#define inflate_fast_use_mmx _inflate_fast_use_mmx
  78
  79#endif /* GAS_COFF */
  80
  81
  82.file "inffast.S"
  83
  84.globl inflate_fast
  85
  86.text
  87.align 4,0
  88.L_invalid_literal_length_code_msg:
  89.string "invalid literal/length code"
  90
  91.align 4,0
  92.L_invalid_distance_code_msg:
  93.string "invalid distance code"
  94
  95.align 4,0
  96.L_invalid_distance_too_far_msg:
  97.string "invalid distance too far back"
  98
  99#if ! defined( NO_MMX )
 100.align 4,0
 101.L_mask: /* mask[N] = ( 1 << N ) - 1 */
 102.long 0
 103.long 1
 104.long 3
 105.long 7
 106.long 15
 107.long 31
 108.long 63
 109.long 127
 110.long 255
 111.long 511
 112.long 1023
 113.long 2047
 114.long 4095
 115.long 8191
 116.long 16383
 117.long 32767
 118.long 65535
 119.long 131071
 120.long 262143
 121.long 524287
 122.long 1048575
 123.long 2097151
 124.long 4194303
 125.long 8388607
 126.long 16777215
 127.long 33554431
 128.long 67108863
 129.long 134217727
 130.long 268435455
 131.long 536870911
 132.long 1073741823
 133.long 2147483647
 134.long 4294967295
 135#endif /* NO_MMX */
 136
 137.text
 138
 139/*
 140 * struct z_stream offsets, in zlib.h
 141 */
 142#define next_in_strm   0   /* strm->next_in */
 143#define avail_in_strm  4   /* strm->avail_in */
 144#define next_out_strm  12  /* strm->next_out */
 145#define avail_out_strm 16  /* strm->avail_out */
 146#define msg_strm       24  /* strm->msg */
 147#define state_strm     28  /* strm->state */
 148
 149/*
 150 * struct inflate_state offsets, in inflate.h
 151 */
 152#define mode_state     0   /* state->mode */
 153#define wsize_state    32  /* state->wsize */
 154#define write_state    40  /* state->write */
 155#define window_state   44  /* state->window */
 156#define hold_state     48  /* state->hold */
 157#define bits_state     52  /* state->bits */
 158#define lencode_state  68  /* state->lencode */
 159#define distcode_state 72  /* state->distcode */
 160#define lenbits_state  76  /* state->lenbits */
 161#define distbits_state 80  /* state->distbits */
 162
 163/*
 164 * inflate_fast's activation record
 165 */
 166#define local_var_size 64 /* how much local space for vars */
 167#define strm_sp        88 /* first arg: z_stream * (local_var_size + 24) */
 168#define start_sp       92 /* second arg: unsigned int (local_var_size + 28) */
 169
 170/*
 171 * offsets for local vars on stack
 172 */
 173#define out            60  /* unsigned char* */
 174#define window         56  /* unsigned char* */
 175#define wsize          52  /* unsigned int */
 176#define write          48  /* unsigned int */
 177#define in             44  /* unsigned char* */
 178#define beg            40  /* unsigned char* */
 179#define buf            28  /* char[ 12 ] */
 180#define len            24  /* unsigned int */
 181#define last           20  /* unsigned char* */
 182#define end            16  /* unsigned char* */
 183#define dcode          12  /* code* */
 184#define lcode           8  /* code* */
 185#define dmask           4  /* unsigned int */
 186#define lmask           0  /* unsigned int */
 187
 188/*
 189 * typedef enum inflate_mode consts, in inflate.h
 190 */
 191#define INFLATE_MODE_TYPE 11  /* state->mode flags enum-ed in inflate.h */
 192#define INFLATE_MODE_BAD  26
 193
 194
 195#if ! defined( USE_MMX ) && ! defined( NO_MMX )
 196
 197#define RUN_TIME_MMX
 198
 199#define CHECK_MMX    1
 200#define DO_USE_MMX   2
 201#define DONT_USE_MMX 3
 202
 203.globl inflate_fast_use_mmx
 204
 205.data
 206
 207.align 4,0
 208inflate_fast_use_mmx: /* integer flag for run time control 1=check,2=mmx,3=no */
 209.long CHECK_MMX
 210
 211#if defined( GAS_ELF )
 212/* elf info */
 213.type   inflate_fast_use_mmx,@object
 214.size   inflate_fast_use_mmx,4
 215#endif
 216
 217#endif /* RUN_TIME_MMX */
 218
 219#if defined( GAS_COFF )
 220/* coff info: scl 2 = extern, type 32 = function */
 221.def inflate_fast; .scl 2; .type 32; .endef
 222#endif
 223
 224.text
 225
 226.align 32,0x90
 227inflate_fast:
 228        pushl   %edi
 229        pushl   %esi
 230        pushl   %ebp
 231        pushl   %ebx
 232        pushf   /* save eflags (strm_sp, state_sp assumes this is 32 bits) */
 233        subl    $local_var_size, %esp
 234        cld
 235
 236#define strm_r  %esi
 237#define state_r %edi
 238
 239        movl    strm_sp(%esp), strm_r
 240        movl    state_strm(strm_r), state_r
 241
 242        /* in = strm->next_in;
 243         * out = strm->next_out;
 244         * last = in + strm->avail_in - 11;
 245         * beg = out - (start - strm->avail_out);
 246         * end = out + (strm->avail_out - 257);
 247         */
 248        movl    avail_in_strm(strm_r), %edx
 249        movl    next_in_strm(strm_r), %eax
 250
 251        addl    %eax, %edx      /* avail_in += next_in */
 252        subl    $11, %edx       /* avail_in -= 11 */
 253
 254        movl    %eax, in(%esp)
 255        movl    %edx, last(%esp)
 256
 257        movl    start_sp(%esp), %ebp
 258        movl    avail_out_strm(strm_r), %ecx
 259        movl    next_out_strm(strm_r), %ebx
 260
 261        subl    %ecx, %ebp      /* start -= avail_out */
 262        negl    %ebp            /* start = -start */
 263        addl    %ebx, %ebp      /* start += next_out */
 264
 265        subl    $257, %ecx      /* avail_out -= 257 */
 266        addl    %ebx, %ecx      /* avail_out += out */
 267
 268        movl    %ebx, out(%esp)
 269        movl    %ebp, beg(%esp)
 270        movl    %ecx, end(%esp)
 271
 272        /* wsize = state->wsize;
 273         * write = state->write;
 274         * window = state->window;
 275         * hold = state->hold;
 276         * bits = state->bits;
 277         * lcode = state->lencode;
 278         * dcode = state->distcode;
 279         * lmask = ( 1 << state->lenbits ) - 1;
 280         * dmask = ( 1 << state->distbits ) - 1;
 281         */
 282
 283        movl    lencode_state(state_r), %eax
 284        movl    distcode_state(state_r), %ecx
 285
 286        movl    %eax, lcode(%esp)
 287        movl    %ecx, dcode(%esp)
 288
 289        movl    $1, %eax
 290        movl    lenbits_state(state_r), %ecx
 291        shll    %cl, %eax
 292        decl    %eax
 293        movl    %eax, lmask(%esp)
 294
 295        movl    $1, %eax
 296        movl    distbits_state(state_r), %ecx
 297        shll    %cl, %eax
 298        decl    %eax
 299        movl    %eax, dmask(%esp)
 300
 301        movl    wsize_state(state_r), %eax
 302        movl    write_state(state_r), %ecx
 303        movl    window_state(state_r), %edx
 304
 305        movl    %eax, wsize(%esp)
 306        movl    %ecx, write(%esp)
 307        movl    %edx, window(%esp)
 308
 309        movl    hold_state(state_r), %ebp
 310        movl    bits_state(state_r), %ebx
 311
 312#undef strm_r
 313#undef state_r
 314
 315#define in_r       %esi
 316#define from_r     %esi
 317#define out_r      %edi
 318
 319        movl    in(%esp), in_r
 320        movl    last(%esp), %ecx
 321        cmpl    in_r, %ecx
 322        ja      .L_align_long           /* if in < last */
 323
 324        addl    $11, %ecx               /* ecx = &in[ avail_in ] */
 325        subl    in_r, %ecx              /* ecx = avail_in */
 326        movl    $12, %eax
 327        subl    %ecx, %eax              /* eax = 12 - avail_in */
 328        leal    buf(%esp), %edi
 329        rep     movsb                   /* memcpy( buf, in, avail_in ) */
 330        movl    %eax, %ecx
 331        xorl    %eax, %eax
 332        rep     stosb         /* memset( &buf[ avail_in ], 0, 12 - avail_in ) */
 333        leal    buf(%esp), in_r         /* in = buf */
 334        movl    in_r, last(%esp)        /* last = in, do just one iteration */
 335        jmp     .L_is_aligned
 336
 337        /* align in_r on long boundary */
 338.L_align_long:
 339        testl   $3, in_r
 340        jz      .L_is_aligned
 341        xorl    %eax, %eax
 342        movb    (in_r), %al
 343        incl    in_r
 344        movl    %ebx, %ecx
 345        addl    $8, %ebx
 346        shll    %cl, %eax
 347        orl     %eax, %ebp
 348        jmp     .L_align_long
 349
 350.L_is_aligned:
 351        movl    out(%esp), out_r
 352
 353#if defined( NO_MMX )
 354        jmp     .L_do_loop
 355#endif
 356
 357#if defined( USE_MMX )
 358        jmp     .L_init_mmx
 359#endif
 360
 361/*** Runtime MMX check ***/
 362
 363#if defined( RUN_TIME_MMX )
 364.L_check_mmx:
 365        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
 366        je      .L_init_mmx
 367        ja      .L_do_loop /* > 2 */
 368
 369        pushl   %eax
 370        pushl   %ebx
 371        pushl   %ecx
 372        pushl   %edx
 373        pushf
 374        movl    (%esp), %eax      /* copy eflags to eax */
 375        xorl    $0x200000, (%esp) /* try toggling ID bit of eflags (bit 21)
 376                                   * to see if cpu supports cpuid...
 377                                   * ID bit method not supported by NexGen but
 378                                   * bios may load a cpuid instruction and
 379                                   * cpuid may be disabled on Cyrix 5-6x86 */
 380        popf
 381        pushf
 382        popl    %edx              /* copy new eflags to edx */
 383        xorl    %eax, %edx        /* test if ID bit is flipped */
 384        jz      .L_dont_use_mmx   /* not flipped if zero */
 385        xorl    %eax, %eax
 386        cpuid
 387        cmpl    $0x756e6547, %ebx /* check for GenuineIntel in ebx,ecx,edx */
 388        jne     .L_dont_use_mmx
 389        cmpl    $0x6c65746e, %ecx
 390        jne     .L_dont_use_mmx
 391        cmpl    $0x49656e69, %edx
 392        jne     .L_dont_use_mmx
 393        movl    $1, %eax
 394        cpuid                     /* get cpu features */
 395        shrl    $8, %eax
 396        andl    $15, %eax
 397        cmpl    $6, %eax          /* check for Pentium family, is 0xf for P4 */
 398        jne     .L_dont_use_mmx
 399        testl   $0x800000, %edx   /* test if MMX feature is set (bit 23) */
 400        jnz     .L_use_mmx
 401        jmp     .L_dont_use_mmx
 402.L_use_mmx:
 403        movl    $DO_USE_MMX, inflate_fast_use_mmx
 404        jmp     .L_check_mmx_pop
 405.L_dont_use_mmx:
 406        movl    $DONT_USE_MMX, inflate_fast_use_mmx
 407.L_check_mmx_pop:
 408        popl    %edx
 409        popl    %ecx
 410        popl    %ebx
 411        popl    %eax
 412        jmp     .L_check_mmx
 413#endif
 414
 415
 416/*** Non-MMX code ***/
 417
 418#if defined ( NO_MMX ) || defined( RUN_TIME_MMX )
 419
 420#define hold_r     %ebp
 421#define bits_r     %bl
 422#define bitslong_r %ebx
 423
 424.align 32,0x90
 425.L_while_test:
 426        /* while (in < last && out < end)
 427         */
 428        cmpl    out_r, end(%esp)
 429        jbe     .L_break_loop           /* if (out >= end) */
 430
 431        cmpl    in_r, last(%esp)
 432        jbe     .L_break_loop
 433
 434.L_do_loop:
 435        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
 436         *
 437         * do {
 438         *   if (bits < 15) {
 439         *     hold |= *((unsigned short *)in)++ << bits;
 440         *     bits += 16
 441         *   }
 442         *   this = lcode[hold & lmask]
 443         */
 444        cmpb    $15, bits_r
 445        ja      .L_get_length_code      /* if (15 < bits) */
 446
 447        xorl    %eax, %eax
 448        lodsw                           /* al = *(ushort *)in++ */
 449        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
 450        addb    $16, bits_r             /* bits += 16 */
 451        shll    %cl, %eax
 452        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
 453
 454.L_get_length_code:
 455        movl    lmask(%esp), %edx       /* edx = lmask */
 456        movl    lcode(%esp), %ecx       /* ecx = lcode */
 457        andl    hold_r, %edx            /* edx &= hold */
 458        movl    (%ecx,%edx,4), %eax     /* eax = lcode[hold & lmask] */
 459
 460.L_dolen:
 461        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
 462         *
 463         * dolen:
 464         *    bits -= this.bits;
 465         *    hold >>= this.bits
 466         */
 467        movb    %ah, %cl                /* cl = this.bits */
 468        subb    %ah, bits_r             /* bits -= this.bits */
 469        shrl    %cl, hold_r             /* hold >>= this.bits */
 470
 471        /* check if op is a literal
 472         * if (op == 0) {
 473         *    PUP(out) = this.val;
 474         *  }
 475         */
 476        testb   %al, %al
 477        jnz     .L_test_for_length_base /* if (op != 0) 45.7% */
 478
 479        shrl    $16, %eax               /* output this.val char */
 480        stosb
 481        jmp     .L_while_test
 482
 483.L_test_for_length_base:
 484        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len
 485         *
 486         * else if (op & 16) {
 487         *   len = this.val
 488         *   op &= 15
 489         *   if (op) {
 490         *     if (op > bits) {
 491         *       hold |= *((unsigned short *)in)++ << bits;
 492         *       bits += 16
 493         *     }
 494         *     len += hold & mask[op];
 495         *     bits -= op;
 496         *     hold >>= op;
 497         *   }
 498         */
 499#define len_r %edx
 500        movl    %eax, len_r             /* len = this */
 501        shrl    $16, len_r              /* len = this.val */
 502        movb    %al, %cl
 503
 504        testb   $16, %al
 505        jz      .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
 506        andb    $15, %cl                /* op &= 15 */
 507        jz      .L_save_len             /* if (!op) */
 508        cmpb    %cl, bits_r
 509        jae     .L_add_bits_to_len      /* if (op <= bits) */
 510
 511        movb    %cl, %ch                /* stash op in ch, freeing cl */
 512        xorl    %eax, %eax
 513        lodsw                           /* al = *(ushort *)in++ */
 514        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
 515        addb    $16, bits_r             /* bits += 16 */
 516        shll    %cl, %eax
 517        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
 518        movb    %ch, %cl                /* move op back to ecx */
 519
 520.L_add_bits_to_len:
 521        movl    $1, %eax
 522        shll    %cl, %eax
 523        decl    %eax
 524        subb    %cl, bits_r
 525        andl    hold_r, %eax            /* eax &= hold */
 526        shrl    %cl, hold_r
 527        addl    %eax, len_r             /* len += hold & mask[op] */
 528
 529.L_save_len:
 530        movl    len_r, len(%esp)        /* save len */
 531#undef  len_r
 532
 533.L_decode_distance:
 534        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist
 535         *
 536         *   if (bits < 15) {
 537         *     hold |= *((unsigned short *)in)++ << bits;
 538         *     bits += 16
 539         *   }
 540         *   this = dcode[hold & dmask];
 541         * dodist:
 542         *   bits -= this.bits;
 543         *   hold >>= this.bits;
 544         *   op = this.op;
 545         */
 546
 547        cmpb    $15, bits_r
 548        ja      .L_get_distance_code    /* if (15 < bits) */
 549
 550        xorl    %eax, %eax
 551        lodsw                           /* al = *(ushort *)in++ */
 552        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
 553        addb    $16, bits_r             /* bits += 16 */
 554        shll    %cl, %eax
 555        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
 556
 557.L_get_distance_code:
 558        movl    dmask(%esp), %edx       /* edx = dmask */
 559        movl    dcode(%esp), %ecx       /* ecx = dcode */
 560        andl    hold_r, %edx            /* edx &= hold */
 561        movl    (%ecx,%edx,4), %eax     /* eax = dcode[hold & dmask] */
 562
 563#define dist_r %edx
 564.L_dodist:
 565        movl    %eax, dist_r            /* dist = this */
 566        shrl    $16, dist_r             /* dist = this.val */
 567        movb    %ah, %cl
 568        subb    %ah, bits_r             /* bits -= this.bits */
 569        shrl    %cl, hold_r             /* hold >>= this.bits */
 570
 571        /* if (op & 16) {
 572         *   dist = this.val
 573         *   op &= 15
 574         *   if (op > bits) {
 575         *     hold |= *((unsigned short *)in)++ << bits;
 576         *     bits += 16
 577         *   }
 578         *   dist += hold & mask[op];
 579         *   bits -= op;
 580         *   hold >>= op;
 581         */
 582        movb    %al, %cl                /* cl = this.op */
 583
 584        testb   $16, %al                /* if ((op & 16) == 0) */
 585        jz      .L_test_for_second_level_dist
 586        andb    $15, %cl                /* op &= 15 */
 587        jz      .L_check_dist_one
 588        cmpb    %cl, bits_r
 589        jae     .L_add_bits_to_dist     /* if (op <= bits) 97.6% */
 590
 591        movb    %cl, %ch                /* stash op in ch, freeing cl */
 592        xorl    %eax, %eax
 593        lodsw                           /* al = *(ushort *)in++ */
 594        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
 595        addb    $16, bits_r             /* bits += 16 */
 596        shll    %cl, %eax
 597        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
 598        movb    %ch, %cl                /* move op back to ecx */
 599
 600.L_add_bits_to_dist:
 601        movl    $1, %eax
 602        shll    %cl, %eax
 603        decl    %eax                    /* (1 << op) - 1 */
 604        subb    %cl, bits_r
 605        andl    hold_r, %eax            /* eax &= hold */
 606        shrl    %cl, hold_r
 607        addl    %eax, dist_r            /* dist += hold & ((1 << op) - 1) */
 608        jmp     .L_check_window
 609
 610.L_check_window:
 611        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
 612         *       %ecx = nbytes
 613         *
 614         * nbytes = out - beg;
 615         * if (dist <= nbytes) {
 616         *   from = out - dist;
 617         *   do {
 618         *     PUP(out) = PUP(from);
 619         *   } while (--len > 0) {
 620         * }
 621         */
 622
 623        movl    in_r, in(%esp)          /* save in so from can use it's reg */
 624        movl    out_r, %eax
 625        subl    beg(%esp), %eax         /* nbytes = out - beg */
 626
 627        cmpl    dist_r, %eax
 628        jb      .L_clip_window          /* if (dist > nbytes) 4.2% */
 629
 630        movl    len(%esp), %ecx
 631        movl    out_r, from_r
 632        subl    dist_r, from_r          /* from = out - dist */
 633
 634        subl    $3, %ecx
 635        movb    (from_r), %al
 636        movb    %al, (out_r)
 637        movb    1(from_r), %al
 638        movb    2(from_r), %dl
 639        addl    $3, from_r
 640        movb    %al, 1(out_r)
 641        movb    %dl, 2(out_r)
 642        addl    $3, out_r
 643        rep     movsb
 644
 645        movl    in(%esp), in_r          /* move in back to %esi, toss from */
 646        jmp     .L_while_test
 647
 648.align 16,0x90
 649.L_check_dist_one:
 650        cmpl    $1, dist_r
 651        jne     .L_check_window
 652        cmpl    out_r, beg(%esp)
 653        je      .L_check_window
 654
 655        decl    out_r
 656        movl    len(%esp), %ecx
 657        movb    (out_r), %al
 658        subl    $3, %ecx
 659
 660        movb    %al, 1(out_r)
 661        movb    %al, 2(out_r)
 662        movb    %al, 3(out_r)
 663        addl    $4, out_r
 664        rep     stosb
 665
 666        jmp     .L_while_test
 667
 668.align 16,0x90
 669.L_test_for_second_level_length:
 670        /* else if ((op & 64) == 0) {
 671         *   this = lcode[this.val + (hold & mask[op])];
 672         * }
 673         */
 674        testb   $64, %al
 675        jnz     .L_test_for_end_of_block  /* if ((op & 64) != 0) */
 676
 677        movl    $1, %eax
 678        shll    %cl, %eax
 679        decl    %eax
 680        andl    hold_r, %eax            /* eax &= hold */
 681        addl    %edx, %eax              /* eax += this.val */
 682        movl    lcode(%esp), %edx       /* edx = lcode */
 683        movl    (%edx,%eax,4), %eax     /* eax = lcode[val + (hold&mask[op])] */
 684        jmp     .L_dolen
 685
 686.align 16,0x90
 687.L_test_for_second_level_dist:
 688        /* else if ((op & 64) == 0) {
 689         *   this = dcode[this.val + (hold & mask[op])];
 690         * }
 691         */
 692        testb   $64, %al
 693        jnz     .L_invalid_distance_code  /* if ((op & 64) != 0) */
 694
 695        movl    $1, %eax
 696        shll    %cl, %eax
 697        decl    %eax
 698        andl    hold_r, %eax            /* eax &= hold */
 699        addl    %edx, %eax              /* eax += this.val */
 700        movl    dcode(%esp), %edx       /* edx = dcode */
 701        movl    (%edx,%eax,4), %eax     /* eax = dcode[val + (hold&mask[op])] */
 702        jmp     .L_dodist
 703
 704.align 16,0x90
 705.L_clip_window:
 706        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
 707         *       %ecx = nbytes
 708         *
 709         * else {
 710         *   if (dist > wsize) {
 711         *     invalid distance
 712         *   }
 713         *   from = window;
 714         *   nbytes = dist - nbytes;
 715         *   if (write == 0) {
 716         *     from += wsize - nbytes;
 717         */
 718#define nbytes_r %ecx
 719        movl    %eax, nbytes_r
 720        movl    wsize(%esp), %eax       /* prepare for dist compare */
 721        negl    nbytes_r                /* nbytes = -nbytes */
 722        movl    window(%esp), from_r    /* from = window */
 723
 724        cmpl    dist_r, %eax
 725        jb      .L_invalid_distance_too_far /* if (dist > wsize) */
 726
 727        addl    dist_r, nbytes_r        /* nbytes = dist - nbytes */
 728        cmpl    $0, write(%esp)
 729        jne     .L_wrap_around_window   /* if (write != 0) */
 730
 731        subl    nbytes_r, %eax
 732        addl    %eax, from_r            /* from += wsize - nbytes */
 733
 734        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
 735         *       %ecx = nbytes, %eax = len
 736         *
 737         *     if (nbytes < len) {
 738         *       len -= nbytes;
 739         *       do {
 740         *         PUP(out) = PUP(from);
 741         *       } while (--nbytes);
 742         *       from = out - dist;
 743         *     }
 744         *   }
 745         */
 746#define len_r %eax
 747        movl    len(%esp), len_r
 748        cmpl    nbytes_r, len_r
 749        jbe     .L_do_copy1             /* if (nbytes >= len) */
 750
 751        subl    nbytes_r, len_r         /* len -= nbytes */
 752        rep     movsb
 753        movl    out_r, from_r
 754        subl    dist_r, from_r          /* from = out - dist */
 755        jmp     .L_do_copy1
 756
 757        cmpl    nbytes_r, len_r
 758        jbe     .L_do_copy1             /* if (nbytes >= len) */
 759
 760        subl    nbytes_r, len_r         /* len -= nbytes */
 761        rep     movsb
 762        movl    out_r, from_r
 763        subl    dist_r, from_r          /* from = out - dist */
 764        jmp     .L_do_copy1
 765
 766.L_wrap_around_window:
 767        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
 768         *       %ecx = nbytes, %eax = write, %eax = len
 769         *
 770         *   else if (write < nbytes) {
 771         *     from += wsize + write - nbytes;
 772         *     nbytes -= write;
 773         *     if (nbytes < len) {
 774         *       len -= nbytes;
 775         *       do {
 776         *         PUP(out) = PUP(from);
 777         *       } while (--nbytes);
 778         *       from = window;
 779         *       nbytes = write;
 780         *       if (nbytes < len) {
 781         *         len -= nbytes;
 782         *         do {
 783         *           PUP(out) = PUP(from);
 784         *         } while(--nbytes);
 785         *         from = out - dist;
 786         *       }
 787         *     }
 788         *   }
 789         */
 790#define write_r %eax
 791        movl    write(%esp), write_r
 792        cmpl    write_r, nbytes_r
 793        jbe     .L_contiguous_in_window /* if (write >= nbytes) */
 794
 795        addl    wsize(%esp), from_r
 796        addl    write_r, from_r
 797        subl    nbytes_r, from_r        /* from += wsize + write - nbytes */
 798        subl    write_r, nbytes_r       /* nbytes -= write */
 799#undef write_r
 800
 801        movl    len(%esp), len_r
 802        cmpl    nbytes_r, len_r
 803        jbe     .L_do_copy1             /* if (nbytes >= len) */
 804
 805        subl    nbytes_r, len_r         /* len -= nbytes */
 806        rep     movsb
 807        movl    window(%esp), from_r    /* from = window */
 808        movl    write(%esp), nbytes_r   /* nbytes = write */
 809        cmpl    nbytes_r, len_r
 810        jbe     .L_do_copy1             /* if (nbytes >= len) */
 811
 812        subl    nbytes_r, len_r         /* len -= nbytes */
 813        rep     movsb
 814        movl    out_r, from_r
 815        subl    dist_r, from_r          /* from = out - dist */
 816        jmp     .L_do_copy1
 817
 818.L_contiguous_in_window:
 819        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
 820         *       %ecx = nbytes, %eax = write, %eax = len
 821         *
 822         *   else {
 823         *     from += write - nbytes;
 824         *     if (nbytes < len) {
 825         *       len -= nbytes;
 826         *       do {
 827         *         PUP(out) = PUP(from);
 828         *       } while (--nbytes);
 829         *       from = out - dist;
 830         *     }
 831         *   }
 832         */
 833#define write_r %eax
 834        addl    write_r, from_r
 835        subl    nbytes_r, from_r        /* from += write - nbytes */
 836#undef write_r
 837
 838        movl    len(%esp), len_r
 839        cmpl    nbytes_r, len_r
 840        jbe     .L_do_copy1             /* if (nbytes >= len) */
 841
 842        subl    nbytes_r, len_r         /* len -= nbytes */
 843        rep     movsb
 844        movl    out_r, from_r
 845        subl    dist_r, from_r          /* from = out - dist */
 846
 847.L_do_copy1:
 848        /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out
 849         *       %eax = len
 850         *
 851         *     while (len > 0) {
 852         *       PUP(out) = PUP(from);
 853         *       len--;
 854         *     }
 855         *   }
 856         * } while (in < last && out < end);
 857         */
 858#undef nbytes_r
 859#define in_r %esi
 860        movl    len_r, %ecx
 861        rep     movsb
 862
 863        movl    in(%esp), in_r          /* move in back to %esi, toss from */
 864        jmp     .L_while_test
 865
 866#undef len_r
 867#undef dist_r
 868
 869#endif /* NO_MMX || RUN_TIME_MMX */
 870
 871
 872/*** MMX code ***/
 873
 874#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
 875
 876.align 32,0x90
 877.L_init_mmx:
 878        emms
 879
 880#undef  bits_r
 881#undef  bitslong_r
 882#define bitslong_r %ebp
 883#define hold_mm    %mm0
 884        movd    %ebp, hold_mm
 885        movl    %ebx, bitslong_r
 886
 887#define used_mm   %mm1
 888#define dmask2_mm %mm2
 889#define lmask2_mm %mm3
 890#define lmask_mm  %mm4
 891#define dmask_mm  %mm5
 892#define tmp_mm    %mm6
 893
 894        movd    lmask(%esp), lmask_mm
 895        movq    lmask_mm, lmask2_mm
 896        movd    dmask(%esp), dmask_mm
 897        movq    dmask_mm, dmask2_mm
 898        pxor    used_mm, used_mm
 899        movl    lcode(%esp), %ebx       /* ebx = lcode */
 900        jmp     .L_do_loop_mmx
 901
 902.align 32,0x90
 903.L_while_test_mmx:
 904        /* while (in < last && out < end)
 905         */
 906        cmpl    out_r, end(%esp)
 907        jbe     .L_break_loop           /* if (out >= end) */
 908
 909        cmpl    in_r, last(%esp)
 910        jbe     .L_break_loop
 911
 912.L_do_loop_mmx:
 913        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
 914
 915        cmpl    $32, bitslong_r
 916        ja      .L_get_length_code_mmx  /* if (32 < bits) */
 917
 918        movd    bitslong_r, tmp_mm
 919        movd    (in_r), %mm7
 920        addl    $4, in_r
 921        psllq   tmp_mm, %mm7
 922        addl    $32, bitslong_r
 923        por     %mm7, hold_mm           /* hold_mm |= *((uint *)in)++ << bits */
 924
 925.L_get_length_code_mmx:
 926        pand    hold_mm, lmask_mm
 927        movd    lmask_mm, %eax
 928        movq    lmask2_mm, lmask_mm
 929        movl    (%ebx,%eax,4), %eax     /* eax = lcode[hold & lmask] */
 930
 931.L_dolen_mmx:
 932        movzbl  %ah, %ecx               /* ecx = this.bits */
 933        movd    %ecx, used_mm
 934        subl    %ecx, bitslong_r        /* bits -= this.bits */
 935
 936        testb   %al, %al
 937        jnz     .L_test_for_length_base_mmx /* if (op != 0) 45.7% */
 938
 939        shrl    $16, %eax               /* output this.val char */
 940        stosb
 941        jmp     .L_while_test_mmx
 942
 943.L_test_for_length_base_mmx:
 944#define len_r  %edx
 945        movl    %eax, len_r             /* len = this */
 946        shrl    $16, len_r              /* len = this.val */
 947
 948        testb   $16, %al
 949        jz      .L_test_for_second_level_length_mmx /* if ((op & 16) == 0) 8% */
 950        andl    $15, %eax               /* op &= 15 */
 951        jz      .L_decode_distance_mmx  /* if (!op) */
 952
 953        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
 954        movd    %eax, used_mm
 955        movd    hold_mm, %ecx
 956        subl    %eax, bitslong_r
 957        andl    .L_mask(,%eax,4), %ecx
 958        addl    %ecx, len_r             /* len += hold & mask[op] */
 959
 960.L_decode_distance_mmx:
 961        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
 962
 963        cmpl    $32, bitslong_r
 964        ja      .L_get_dist_code_mmx    /* if (32 < bits) */
 965
 966        movd    bitslong_r, tmp_mm
 967        movd    (in_r), %mm7
 968        addl    $4, in_r
 969        psllq   tmp_mm, %mm7
 970        addl    $32, bitslong_r
 971        por     %mm7, hold_mm           /* hold_mm |= *((uint *)in)++ << bits */
 972
 973.L_get_dist_code_mmx:
 974        movl    dcode(%esp), %ebx       /* ebx = dcode */
 975        pand    hold_mm, dmask_mm
 976        movd    dmask_mm, %eax
 977        movq    dmask2_mm, dmask_mm
 978        movl    (%ebx,%eax,4), %eax     /* eax = dcode[hold & lmask] */
 979
 980.L_dodist_mmx:
 981#define dist_r %ebx
 982        movzbl  %ah, %ecx               /* ecx = this.bits */
 983        movl    %eax, dist_r
 984        shrl    $16, dist_r             /* dist  = this.val */
 985        subl    %ecx, bitslong_r        /* bits -= this.bits */
 986        movd    %ecx, used_mm
 987
 988        testb   $16, %al                /* if ((op & 16) == 0) */
 989        jz      .L_test_for_second_level_dist_mmx
 990        andl    $15, %eax               /* op &= 15 */
 991        jz      .L_check_dist_one_mmx
 992
 993.L_add_bits_to_dist_mmx:
 994        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
 995        movd    %eax, used_mm           /* save bit length of current op */
 996        movd    hold_mm, %ecx           /* get the next bits on input stream */
 997        subl    %eax, bitslong_r        /* bits -= op bits */
 998        andl    .L_mask(,%eax,4), %ecx  /* ecx   = hold & mask[op] */
 999        addl    %ecx, dist_r            /* dist += hold & mask[op] */
1000
1001.L_check_window_mmx:
1002        movl    in_r, in(%esp)          /* save in so from can use it's reg */
1003        movl    out_r, %eax
1004        subl    beg(%esp), %eax         /* nbytes = out - beg */
1005
1006        cmpl    dist_r, %eax
1007        jb      .L_clip_window_mmx      /* if (dist > nbytes) 4.2% */
1008
1009        movl    len_r, %ecx
1010        movl    out_r, from_r
1011        subl    dist_r, from_r          /* from = out - dist */
1012
1013        subl    $3, %ecx
1014        movb    (from_r), %al
1015        movb    %al, (out_r)
1016        movb    1(from_r), %al
1017        movb    2(from_r), %dl
1018        addl    $3, from_r
1019        movb    %al, 1(out_r)
1020        movb    %dl, 2(out_r)
1021        addl    $3, out_r
1022        rep     movsb
1023
1024        movl    in(%esp), in_r          /* move in back to %esi, toss from */
1025        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
1026        jmp     .L_while_test_mmx
1027
1028.align 16,0x90
1029.L_check_dist_one_mmx:
1030        cmpl    $1, dist_r
1031        jne     .L_check_window_mmx
1032        cmpl    out_r, beg(%esp)
1033        je      .L_check_window_mmx
1034
1035        decl    out_r
1036        movl    len_r, %ecx
1037        movb    (out_r), %al
1038        subl    $3, %ecx
1039
1040        movb    %al, 1(out_r)
1041        movb    %al, 2(out_r)
1042        movb    %al, 3(out_r)
1043        addl    $4, out_r
1044        rep     stosb
1045
1046        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
1047        jmp     .L_while_test_mmx
1048
1049.align 16,0x90
1050.L_test_for_second_level_length_mmx:
1051        testb   $64, %al
1052        jnz     .L_test_for_end_of_block  /* if ((op & 64) != 0) */
1053
1054        andl    $15, %eax
1055        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
1056        movd    hold_mm, %ecx
1057        andl    .L_mask(,%eax,4), %ecx
1058        addl    len_r, %ecx
1059        movl    (%ebx,%ecx,4), %eax     /* eax = lcode[hold & lmask] */
1060        jmp     .L_dolen_mmx
1061
1062.align 16,0x90
1063.L_test_for_second_level_dist_mmx:
1064        testb   $64, %al
1065        jnz     .L_invalid_distance_code  /* if ((op & 64) != 0) */
1066
1067        andl    $15, %eax
1068        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
1069        movd    hold_mm, %ecx
1070        andl    .L_mask(,%eax,4), %ecx
1071        movl    dcode(%esp), %eax       /* ecx = dcode */
1072        addl    dist_r, %ecx
1073        movl    (%eax,%ecx,4), %eax     /* eax = lcode[hold & lmask] */
1074        jmp     .L_dodist_mmx
1075
1076.align 16,0x90
1077.L_clip_window_mmx:
1078#define nbytes_r %ecx
1079        movl    %eax, nbytes_r
1080        movl    wsize(%esp), %eax       /* prepare for dist compare */
1081        negl    nbytes_r                /* nbytes = -nbytes */
1082        movl    window(%esp), from_r    /* from = window */
1083
1084        cmpl    dist_r, %eax
1085        jb      .L_invalid_distance_too_far /* if (dist > wsize) */
1086
1087        addl    dist_r, nbytes_r        /* nbytes = dist - nbytes */
1088        cmpl    $0, write(%esp)
1089        jne     .L_wrap_around_window_mmx /* if (write != 0) */
1090
1091        subl    nbytes_r, %eax
1092        addl    %eax, from_r            /* from += wsize - nbytes */
1093
1094        cmpl    nbytes_r, len_r
1095        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1096
1097        subl    nbytes_r, len_r         /* len -= nbytes */
1098        rep     movsb
1099        movl    out_r, from_r
1100        subl    dist_r, from_r          /* from = out - dist */
1101        jmp     .L_do_copy1_mmx
1102
1103        cmpl    nbytes_r, len_r
1104        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1105
1106        subl    nbytes_r, len_r         /* len -= nbytes */
1107        rep     movsb
1108        movl    out_r, from_r
1109        subl    dist_r, from_r          /* from = out - dist */
1110        jmp     .L_do_copy1_mmx
1111
1112.L_wrap_around_window_mmx:
1113#define write_r %eax
1114        movl    write(%esp), write_r
1115        cmpl    write_r, nbytes_r
1116        jbe     .L_contiguous_in_window_mmx /* if (write >= nbytes) */
1117
1118        addl    wsize(%esp), from_r
1119        addl    write_r, from_r
1120        subl    nbytes_r, from_r        /* from += wsize + write - nbytes */
1121        subl    write_r, nbytes_r       /* nbytes -= write */
1122#undef write_r
1123
1124        cmpl    nbytes_r, len_r
1125        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1126
1127        subl    nbytes_r, len_r         /* len -= nbytes */
1128        rep     movsb
1129        movl    window(%esp), from_r    /* from = window */
1130        movl    write(%esp), nbytes_r   /* nbytes = write */
1131        cmpl    nbytes_r, len_r
1132        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1133
1134        subl    nbytes_r, len_r         /* len -= nbytes */
1135        rep     movsb
1136        movl    out_r, from_r
1137        subl    dist_r, from_r          /* from = out - dist */
1138        jmp     .L_do_copy1_mmx
1139
1140.L_contiguous_in_window_mmx:
1141#define write_r %eax
1142        addl    write_r, from_r
1143        subl    nbytes_r, from_r        /* from += write - nbytes */
1144#undef write_r
1145
1146        cmpl    nbytes_r, len_r
1147        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1148
1149        subl    nbytes_r, len_r         /* len -= nbytes */
1150        rep     movsb
1151        movl    out_r, from_r
1152        subl    dist_r, from_r          /* from = out - dist */
1153
1154.L_do_copy1_mmx:
1155#undef nbytes_r
1156#define in_r %esi
1157        movl    len_r, %ecx
1158        rep     movsb
1159
1160        movl    in(%esp), in_r          /* move in back to %esi, toss from */
1161        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
1162        jmp     .L_while_test_mmx
1163
1164#undef hold_r
1165#undef bitslong_r
1166
1167#endif /* USE_MMX || RUN_TIME_MMX */
1168
1169
1170/*** USE_MMX, NO_MMX, and RUNTIME_MMX from here on ***/
1171
1172.L_invalid_distance_code:
1173        /* else {
1174         *   strm->msg = "invalid distance code";
1175         *   state->mode = BAD;
1176         * }
1177         */
1178        movl    $.L_invalid_distance_code_msg, %ecx
1179        movl    $INFLATE_MODE_BAD, %edx
1180        jmp     .L_update_stream_state
1181
1182.L_test_for_end_of_block:
1183        /* else if (op & 32) {
1184         *   state->mode = TYPE;
1185         *   break;
1186         * }
1187         */
1188        testb   $32, %al
1189        jz      .L_invalid_literal_length_code  /* if ((op & 32) == 0) */
1190
1191        movl    $0, %ecx
1192        movl    $INFLATE_MODE_TYPE, %edx
1193        jmp     .L_update_stream_state
1194
1195.L_invalid_literal_length_code:
1196        /* else {
1197         *   strm->msg = "invalid literal/length code";
1198         *   state->mode = BAD;
1199         * }
1200         */
1201        movl    $.L_invalid_literal_length_code_msg, %ecx
1202        movl    $INFLATE_MODE_BAD, %edx
1203        jmp     .L_update_stream_state
1204
1205.L_invalid_distance_too_far:
1206        /* strm->msg = "invalid distance too far back";
1207         * state->mode = BAD;
1208         */
1209        movl    in(%esp), in_r          /* from_r has in's reg, put in back */
1210        movl    $.L_invalid_distance_too_far_msg, %ecx
1211        movl    $INFLATE_MODE_BAD, %edx
1212        jmp     .L_update_stream_state
1213
1214.L_update_stream_state:
1215        /* set strm->msg = %ecx, strm->state->mode = %edx */
1216        movl    strm_sp(%esp), %eax
1217        testl   %ecx, %ecx              /* if (msg != NULL) */
1218        jz      .L_skip_msg
1219        movl    %ecx, msg_strm(%eax)    /* strm->msg = msg */
1220.L_skip_msg:
1221        movl    state_strm(%eax), %eax  /* state = strm->state */
1222        movl    %edx, mode_state(%eax)  /* state->mode = edx (BAD | TYPE) */
1223        jmp     .L_break_loop
1224
1225.align 32,0x90
1226.L_break_loop:
1227
1228/*
1229 * Regs:
1230 *
1231 * bits = %ebp when mmx, and in %ebx when non-mmx
1232 * hold = %hold_mm when mmx, and in %ebp when non-mmx
1233 * in   = %esi
1234 * out  = %edi
1235 */
1236
1237#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
1238
1239#if defined( RUN_TIME_MMX )
1240
1241        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
1242        jne     .L_update_next_in
1243
1244#endif /* RUN_TIME_MMX */
1245
1246        movl    %ebp, %ebx
1247
1248.L_update_next_in:
1249
1250#endif
1251
1252#define strm_r  %eax
1253#define state_r %edx
1254
1255        /* len = bits >> 3;
1256         * in -= len;
1257         * bits -= len << 3;
1258         * hold &= (1U << bits) - 1;
1259         * state->hold = hold;
1260         * state->bits = bits;
1261         * strm->next_in = in;
1262         * strm->next_out = out;
1263         */
1264        movl    strm_sp(%esp), strm_r
1265        movl    %ebx, %ecx
1266        movl    state_strm(strm_r), state_r
1267        shrl    $3, %ecx
1268        subl    %ecx, in_r
1269        shll    $3, %ecx
1270        subl    %ecx, %ebx
1271        movl    out_r, next_out_strm(strm_r)
1272        movl    %ebx, bits_state(state_r)
1273        movl    %ebx, %ecx
1274
1275        leal    buf(%esp), %ebx
1276        cmpl    %ebx, last(%esp)
1277        jne     .L_buf_not_used         /* if buf != last */
1278
1279        subl    %ebx, in_r              /* in -= buf */
1280        movl    next_in_strm(strm_r), %ebx
1281        movl    %ebx, last(%esp)        /* last = strm->next_in */
1282        addl    %ebx, in_r              /* in += strm->next_in */
1283        movl    avail_in_strm(strm_r), %ebx
1284        subl    $11, %ebx
1285        addl    %ebx, last(%esp)    /* last = &strm->next_in[ avail_in - 11 ] */
1286
1287.L_buf_not_used:
1288        movl    in_r, next_in_strm(strm_r)
1289
1290        movl    $1, %ebx
1291        shll    %cl, %ebx
1292        decl    %ebx
1293
1294#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
1295
1296#if defined( RUN_TIME_MMX )
1297
1298        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
1299        jne     .L_update_hold
1300
1301#endif /* RUN_TIME_MMX */
1302
1303        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
1304        movd    hold_mm, %ebp
1305
1306        emms
1307
1308.L_update_hold:
1309
1310#endif /* USE_MMX || RUN_TIME_MMX */
1311
1312        andl    %ebx, %ebp
1313        movl    %ebp, hold_state(state_r)
1314
1315#define last_r %ebx
1316
1317        /* strm->avail_in = in < last ? 11 + (last - in) : 11 - (in - last) */
1318        movl    last(%esp), last_r
1319        cmpl    in_r, last_r
1320        jbe     .L_last_is_smaller     /* if (in >= last) */
1321
1322        subl    in_r, last_r           /* last -= in */
1323        addl    $11, last_r            /* last += 11 */
1324        movl    last_r, avail_in_strm(strm_r)
1325        jmp     .L_fixup_out
1326.L_last_is_smaller:
1327        subl    last_r, in_r           /* in -= last */
1328        negl    in_r                   /* in = -in */
1329        addl    $11, in_r              /* in += 11 */
1330        movl    in_r, avail_in_strm(strm_r)
1331
1332#undef last_r
1333#define end_r %ebx
1334
1335.L_fixup_out:
1336        /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/
1337        movl    end(%esp), end_r
1338        cmpl    out_r, end_r
1339        jbe     .L_end_is_smaller      /* if (out >= end) */
1340
1341        subl    out_r, end_r           /* end -= out */
1342        addl    $257, end_r            /* end += 257 */
1343        movl    end_r, avail_out_strm(strm_r)
1344        jmp     .L_done
1345.L_end_is_smaller:
1346        subl    end_r, out_r           /* out -= end */
1347        negl    out_r                  /* out = -out */
1348        addl    $257, out_r            /* out += 257 */
1349        movl    out_r, avail_out_strm(strm_r)
1350
1351#undef end_r
1352#undef strm_r
1353#undef state_r
1354
1355.L_done:
1356        addl    $local_var_size, %esp
1357        popf
1358        popl    %ebx
1359        popl    %ebp
1360        popl    %esi
1361        popl    %edi
1362        ret
1363
1364#if defined( GAS_ELF )
1365/* elf info */
1366.type inflate_fast,@function
1367.size inflate_fast,.-inflate_fast
1368#endif