PageRenderTime 75ms CodeModel.GetById 48ms app.highlight 22ms RepoModel.GetById 1ms app.codeStats 0ms

/StormLib/stormlib/zlib/contrib/masmx86/inffas32.asm

http://ghostcb.googlecode.com/
Assembly | 1083 lines | 750 code | 268 blank | 65 comment | 0 complexity | 5bc05d116342fbbfe2ed883466646ab4 MD5 | raw file
   1;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
   2; *
   3; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
   4; *
   5; * Copyright (C) 1995-2003 Mark Adler
   6; * For conditions of distribution and use, see copyright notice in zlib.h
   7; *
   8; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
   9; * Please use the copyright conditions above.
  10; *
  11; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
  12; * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
  13; * the moment.  I have successfully compiled and tested this code with gcc2.96,
  14; * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
  15; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
  16; * enabled.  I will attempt to merge the MMX code into this version.  Newer
  17; * versions of this and inffast.S can be found at
  18; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
  19; * 
  20; * 2005 : modification by Gilles Vollant
  21; */
  22; For Visual C++ 4.x and higher and ML 6.x and higher
  23;   ml.exe is in directory \MASM611C of Win95 DDK
  24;   ml.exe is also distributed in http://www.masm32.com/masmdl.htm
  25;    and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
  26;
  27;
  28;   compile with command line option
  29;   ml  /coff /Zi /c /Flinffas32.lst inffas32.asm
  30
  31;   if you define NO_GZIP (see inflate.h), compile with
  32;   ml  /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
  33
  34
  35; zlib122sup is 0 fort zlib 1.2.2.1 and lower
  36; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head 
  37;        in inflate_state in inflate.h)
  38zlib1222sup      equ    8
  39
  40
  41IFDEF GUNZIP
  42  INFLATE_MODE_TYPE    equ 11
  43  INFLATE_MODE_BAD     equ 26
  44ELSE
  45  IFNDEF NO_GUNZIP
  46    INFLATE_MODE_TYPE    equ 11
  47    INFLATE_MODE_BAD     equ 26
  48  ELSE
  49    INFLATE_MODE_TYPE    equ 3
  50    INFLATE_MODE_BAD     equ 17
  51  ENDIF
  52ENDIF
  53
  54
  55; 75 "inffast.S"
  56;FILE "inffast.S"
  57
  58;;;GLOBAL _inflate_fast
  59
  60;;;SECTION .text
  61
  62
  63
  64	.586p
  65	.mmx
  66
  67	name	inflate_fast_x86
  68	.MODEL	FLAT
  69
  70_DATA			segment
  71inflate_fast_use_mmx:
  72	dd	1
  73
  74
  75_TEXT			segment
  76PUBLIC _inflate_fast
  77
  78ALIGN 4
  79_inflate_fast:
  80	jmp inflate_fast_entry
  81
  82
  83
  84ALIGN 4
  85	db	'Fast decoding Code from Chris Anderson'
  86	db	0
  87
  88ALIGN 4
  89invalid_literal_length_code_msg:
  90	db	'invalid literal/length code'
  91	db	0
  92
  93ALIGN 4
  94invalid_distance_code_msg:
  95	db	'invalid distance code'
  96	db	0
  97
  98ALIGN 4
  99invalid_distance_too_far_msg:
 100	db	'invalid distance too far back'
 101	db	0
 102
 103
 104ALIGN 4
 105inflate_fast_mask:
 106dd	0
 107dd	1
 108dd	3
 109dd	7
 110dd	15
 111dd	31
 112dd	63
 113dd	127
 114dd	255
 115dd	511
 116dd	1023
 117dd	2047
 118dd	4095
 119dd	8191
 120dd	16383
 121dd	32767
 122dd	65535
 123dd	131071
 124dd	262143
 125dd	524287
 126dd	1048575
 127dd	2097151
 128dd	4194303
 129dd	8388607
 130dd	16777215
 131dd	33554431
 132dd	67108863
 133dd	134217727
 134dd	268435455
 135dd	536870911
 136dd	1073741823
 137dd	2147483647
 138dd	4294967295
 139
 140
 141mode_state	 equ	0	;/* state->mode	*/
 142wsize_state	 equ	(32+zlib1222sup)	;/* state->wsize */
 143write_state	 equ	(36+4+zlib1222sup)	;/* state->write */
 144window_state	 equ	(40+4+zlib1222sup)	;/* state->window */
 145hold_state	 equ	(44+4+zlib1222sup)	;/* state->hold	*/
 146bits_state	 equ	(48+4+zlib1222sup)	;/* state->bits	*/
 147lencode_state	 equ	(64+4+zlib1222sup)	;/* state->lencode */
 148distcode_state	 equ	(68+4+zlib1222sup)	;/* state->distcode */
 149lenbits_state	 equ	(72+4+zlib1222sup)	;/* state->lenbits */
 150distbits_state	 equ	(76+4+zlib1222sup)	;/* state->distbits */
 151
 152
 153;;SECTION .text
 154; 205 "inffast.S"
 155;GLOBAL	inflate_fast_use_mmx
 156
 157;SECTION .data
 158
 159
 160; GLOBAL inflate_fast_use_mmx:object
 161;.size inflate_fast_use_mmx, 4
 162; 226 "inffast.S"
 163;SECTION .text
 164
 165ALIGN 4
 166inflate_fast_entry:
 167	push  edi
 168	push  esi
 169	push  ebp
 170	push  ebx
 171	pushfd
 172	sub  esp,64
 173	cld
 174
 175
 176
 177
 178	mov  esi, [esp+88]
 179	mov  edi, [esi+28]
 180
 181
 182
 183
 184
 185
 186
 187	mov  edx, [esi+4]
 188	mov  eax, [esi+0]
 189
 190	add  edx,eax
 191	sub  edx,11
 192
 193	mov  [esp+44],eax
 194	mov  [esp+20],edx
 195
 196	mov  ebp, [esp+92]
 197	mov  ecx, [esi+16]
 198	mov  ebx, [esi+12]
 199
 200	sub  ebp,ecx
 201	neg  ebp
 202	add  ebp,ebx
 203
 204	sub  ecx,257
 205	add  ecx,ebx
 206
 207	mov  [esp+60],ebx
 208	mov  [esp+40],ebp
 209	mov  [esp+16],ecx
 210; 285 "inffast.S"
 211	mov  eax, [edi+lencode_state]
 212	mov  ecx, [edi+distcode_state]
 213
 214	mov  [esp+8],eax
 215	mov  [esp+12],ecx
 216
 217	mov  eax,1
 218	mov  ecx, [edi+lenbits_state]
 219	shl  eax,cl
 220	dec  eax
 221	mov  [esp+0],eax
 222
 223	mov  eax,1
 224	mov  ecx, [edi+distbits_state]
 225	shl  eax,cl
 226	dec  eax
 227	mov  [esp+4],eax
 228
 229	mov  eax, [edi+wsize_state]
 230	mov  ecx, [edi+write_state]
 231	mov  edx, [edi+window_state]
 232
 233	mov  [esp+52],eax
 234	mov  [esp+48],ecx
 235	mov  [esp+56],edx
 236
 237	mov  ebp, [edi+hold_state]
 238	mov  ebx, [edi+bits_state]
 239; 321 "inffast.S"
 240	mov  esi, [esp+44]
 241	mov  ecx, [esp+20]
 242	cmp  ecx,esi
 243	ja   L_align_long
 244
 245	add  ecx,11
 246	sub  ecx,esi
 247	mov  eax,12
 248	sub  eax,ecx
 249	lea  edi, [esp+28]
 250	rep movsb
 251	mov  ecx,eax
 252	xor  eax,eax
 253	rep stosb
 254	lea  esi, [esp+28]
 255	mov  [esp+20],esi
 256	jmp  L_is_aligned
 257
 258
 259L_align_long:
 260	test  esi,3
 261	jz   L_is_aligned
 262	xor  eax,eax
 263	mov  al, [esi]
 264	inc  esi
 265	mov  ecx,ebx
 266	add  ebx,8
 267	shl  eax,cl
 268	or  ebp,eax
 269	jmp L_align_long
 270
 271L_is_aligned:
 272	mov  edi, [esp+60]
 273; 366 "inffast.S"
 274L_check_mmx:
 275	cmp  dword ptr [inflate_fast_use_mmx],2
 276	je   L_init_mmx
 277	ja   L_do_loop
 278
 279	push  eax
 280	push  ebx
 281	push  ecx
 282	push  edx
 283	pushfd
 284	mov  eax, [esp]
 285	xor  dword ptr [esp],0200000h
 286
 287
 288
 289
 290	popfd
 291	pushfd
 292	pop  edx
 293	xor  edx,eax
 294	jz   L_dont_use_mmx
 295	xor  eax,eax
 296	cpuid
 297	cmp  ebx,0756e6547h
 298	jne  L_dont_use_mmx
 299	cmp  ecx,06c65746eh
 300	jne  L_dont_use_mmx
 301	cmp  edx,049656e69h
 302	jne  L_dont_use_mmx
 303	mov  eax,1
 304	cpuid
 305	shr  eax,8
 306	and  eax,15
 307	cmp  eax,6
 308	jne  L_dont_use_mmx
 309	test  edx,0800000h
 310	jnz  L_use_mmx
 311	jmp  L_dont_use_mmx
 312L_use_mmx:
 313	mov  dword ptr [inflate_fast_use_mmx],2
 314	jmp  L_check_mmx_pop
 315L_dont_use_mmx:
 316	mov  dword ptr [inflate_fast_use_mmx],3
 317L_check_mmx_pop:
 318	pop  edx
 319	pop  ecx
 320	pop  ebx
 321	pop  eax
 322	jmp  L_check_mmx
 323; 426 "inffast.S"
 324ALIGN 4
 325L_do_loop:
 326; 437 "inffast.S"
 327	cmp  bl,15
 328	ja   L_get_length_code
 329
 330	xor  eax,eax
 331	lodsw
 332	mov  cl,bl
 333	add  bl,16
 334	shl  eax,cl
 335	or  ebp,eax
 336
 337L_get_length_code:
 338	mov  edx, [esp+0]
 339	mov  ecx, [esp+8]
 340	and  edx,ebp
 341	mov  eax, [ecx+edx*4]
 342
 343L_dolen:
 344
 345
 346
 347
 348
 349
 350	mov  cl,ah
 351	sub  bl,ah
 352	shr  ebp,cl
 353
 354
 355
 356
 357
 358
 359	test  al,al
 360	jnz   L_test_for_length_base
 361
 362	shr  eax,16
 363	stosb
 364
 365L_while_test:
 366
 367
 368	cmp  [esp+16],edi
 369	jbe  L_break_loop
 370
 371	cmp  [esp+20],esi
 372	ja   L_do_loop
 373	jmp  L_break_loop
 374
 375L_test_for_length_base:
 376; 502 "inffast.S"
 377	mov  edx,eax
 378	shr  edx,16
 379	mov  cl,al
 380
 381	test  al,16
 382	jz   L_test_for_second_level_length
 383	and  cl,15
 384	jz   L_save_len
 385	cmp  bl,cl
 386	jae  L_add_bits_to_len
 387
 388	mov  ch,cl
 389	xor  eax,eax
 390	lodsw
 391	mov  cl,bl
 392	add  bl,16
 393	shl  eax,cl
 394	or  ebp,eax
 395	mov  cl,ch
 396
 397L_add_bits_to_len:
 398	mov  eax,1
 399	shl  eax,cl
 400	dec  eax
 401	sub  bl,cl
 402	and  eax,ebp
 403	shr  ebp,cl
 404	add  edx,eax
 405
 406L_save_len:
 407	mov  [esp+24],edx
 408
 409
 410L_decode_distance:
 411; 549 "inffast.S"
 412	cmp  bl,15
 413	ja   L_get_distance_code
 414
 415	xor  eax,eax
 416	lodsw
 417	mov  cl,bl
 418	add  bl,16
 419	shl  eax,cl
 420	or  ebp,eax
 421
 422L_get_distance_code:
 423	mov  edx, [esp+4]
 424	mov  ecx, [esp+12]
 425	and  edx,ebp
 426	mov  eax, [ecx+edx*4]
 427
 428
 429L_dodist:
 430	mov  edx,eax
 431	shr  edx,16
 432	mov  cl,ah
 433	sub  bl,ah
 434	shr  ebp,cl
 435; 584 "inffast.S"
 436	mov  cl,al
 437
 438	test  al,16
 439	jz  L_test_for_second_level_dist
 440	and  cl,15
 441	jz  L_check_dist_one
 442	cmp  bl,cl
 443	jae  L_add_bits_to_dist
 444
 445	mov  ch,cl
 446	xor  eax,eax
 447	lodsw
 448	mov  cl,bl
 449	add  bl,16
 450	shl  eax,cl
 451	or  ebp,eax
 452	mov  cl,ch
 453
 454L_add_bits_to_dist:
 455	mov  eax,1
 456	shl  eax,cl
 457	dec  eax
 458	sub  bl,cl
 459	and  eax,ebp
 460	shr  ebp,cl
 461	add  edx,eax
 462	jmp  L_check_window
 463
 464L_check_window:
 465; 625 "inffast.S"
 466	mov  [esp+44],esi
 467	mov  eax,edi
 468	sub  eax, [esp+40]
 469
 470	cmp  eax,edx
 471	jb   L_clip_window
 472
 473	mov  ecx, [esp+24]
 474	mov  esi,edi
 475	sub  esi,edx
 476
 477	sub  ecx,3
 478	mov  al, [esi]
 479	mov  [edi],al
 480	mov  al, [esi+1]
 481	mov  dl, [esi+2]
 482	add  esi,3
 483	mov  [edi+1],al
 484	mov  [edi+2],dl
 485	add  edi,3
 486	rep movsb
 487
 488	mov  esi, [esp+44]
 489	jmp  L_while_test
 490
 491ALIGN 4
 492L_check_dist_one:
 493	cmp  edx,1
 494	jne  L_check_window
 495	cmp  [esp+40],edi
 496	je  L_check_window
 497
 498	dec  edi
 499	mov  ecx, [esp+24]
 500	mov  al, [edi]
 501	sub  ecx,3
 502
 503	mov  [edi+1],al
 504	mov  [edi+2],al
 505	mov  [edi+3],al
 506	add  edi,4
 507	rep stosb
 508
 509	jmp  L_while_test
 510
 511ALIGN 4
 512L_test_for_second_level_length:
 513
 514
 515
 516
 517	test  al,64
 518	jnz   L_test_for_end_of_block
 519
 520	mov  eax,1
 521	shl  eax,cl
 522	dec  eax
 523	and  eax,ebp
 524	add  eax,edx
 525	mov  edx, [esp+8]
 526	mov  eax, [edx+eax*4]
 527	jmp  L_dolen
 528
 529ALIGN 4
 530L_test_for_second_level_dist:
 531
 532
 533
 534
 535	test  al,64
 536	jnz   L_invalid_distance_code
 537
 538	mov  eax,1
 539	shl  eax,cl
 540	dec  eax
 541	and  eax,ebp
 542	add  eax,edx
 543	mov  edx, [esp+12]
 544	mov  eax, [edx+eax*4]
 545	jmp  L_dodist
 546
 547ALIGN 4
 548L_clip_window:
 549; 721 "inffast.S"
 550	mov  ecx,eax
 551	mov  eax, [esp+52]
 552	neg  ecx
 553	mov  esi, [esp+56]
 554
 555	cmp  eax,edx
 556	jb   L_invalid_distance_too_far
 557
 558	add  ecx,edx
 559	cmp  dword ptr [esp+48],0
 560	jne  L_wrap_around_window
 561
 562	sub  eax,ecx
 563	add  esi,eax
 564; 749 "inffast.S"
 565	mov  eax, [esp+24]
 566	cmp  eax,ecx
 567	jbe  L_do_copy1
 568
 569	sub  eax,ecx
 570	rep movsb
 571	mov  esi,edi
 572	sub  esi,edx
 573	jmp  L_do_copy1
 574
 575	cmp  eax,ecx
 576	jbe  L_do_copy1
 577
 578	sub  eax,ecx
 579	rep movsb
 580	mov  esi,edi
 581	sub  esi,edx
 582	jmp  L_do_copy1
 583
 584L_wrap_around_window:
 585; 793 "inffast.S"
 586	mov  eax, [esp+48]
 587	cmp  ecx,eax
 588	jbe  L_contiguous_in_window
 589
 590	add  esi, [esp+52]
 591	add  esi,eax
 592	sub  esi,ecx
 593	sub  ecx,eax
 594
 595
 596	mov  eax, [esp+24]
 597	cmp  eax,ecx
 598	jbe  L_do_copy1
 599
 600	sub  eax,ecx
 601	rep movsb
 602	mov  esi, [esp+56]
 603	mov  ecx, [esp+48]
 604	cmp  eax,ecx
 605	jbe  L_do_copy1
 606
 607	sub  eax,ecx
 608	rep movsb
 609	mov  esi,edi
 610	sub  esi,edx
 611	jmp  L_do_copy1
 612
 613L_contiguous_in_window:
 614; 836 "inffast.S"
 615	add  esi,eax
 616	sub  esi,ecx
 617
 618
 619	mov  eax, [esp+24]
 620	cmp  eax,ecx
 621	jbe  L_do_copy1
 622
 623	sub  eax,ecx
 624	rep movsb
 625	mov  esi,edi
 626	sub  esi,edx
 627
 628L_do_copy1:
 629; 862 "inffast.S"
 630	mov  ecx,eax
 631	rep movsb
 632
 633	mov  esi, [esp+44]
 634	jmp  L_while_test
 635; 878 "inffast.S"
 636ALIGN 4
 637L_init_mmx:
 638	emms
 639
 640
 641
 642
 643
 644	movd mm0,ebp
 645	mov  ebp,ebx
 646; 896 "inffast.S"
 647	movd mm4,[esp+0]
 648	movq mm3,mm4
 649	movd mm5,[esp+4]
 650	movq mm2,mm5
 651	pxor mm1,mm1
 652	mov  ebx, [esp+8]
 653	jmp  L_do_loop_mmx
 654
 655ALIGN 4
 656L_do_loop_mmx:
 657	psrlq mm0,mm1
 658
 659	cmp  ebp,32
 660	ja  L_get_length_code_mmx
 661
 662	movd mm6,ebp
 663	movd mm7,[esi]
 664	add  esi,4
 665	psllq mm7,mm6
 666	add  ebp,32
 667	por mm0,mm7
 668
 669L_get_length_code_mmx:
 670	pand mm4,mm0
 671	movd eax,mm4
 672	movq mm4,mm3
 673	mov  eax, [ebx+eax*4]
 674
 675L_dolen_mmx:
 676	movzx  ecx,ah
 677	movd mm1,ecx
 678	sub  ebp,ecx
 679
 680	test  al,al
 681	jnz L_test_for_length_base_mmx
 682
 683	shr  eax,16
 684	stosb
 685
 686L_while_test_mmx:
 687
 688
 689	cmp  [esp+16],edi
 690	jbe L_break_loop
 691
 692	cmp  [esp+20],esi
 693	ja L_do_loop_mmx
 694	jmp L_break_loop
 695
 696L_test_for_length_base_mmx:
 697
 698	mov  edx,eax
 699	shr  edx,16
 700
 701	test  al,16
 702	jz  L_test_for_second_level_length_mmx
 703	and  eax,15
 704	jz L_decode_distance_mmx
 705
 706	psrlq mm0,mm1
 707	movd mm1,eax
 708	movd ecx,mm0
 709	sub  ebp,eax
 710	and  ecx, [inflate_fast_mask+eax*4]
 711	add  edx,ecx
 712
 713L_decode_distance_mmx:
 714	psrlq mm0,mm1
 715
 716	cmp  ebp,32
 717	ja L_get_dist_code_mmx
 718
 719	movd mm6,ebp
 720	movd mm7,[esi]
 721	add  esi,4
 722	psllq mm7,mm6
 723	add  ebp,32
 724	por mm0,mm7
 725
 726L_get_dist_code_mmx:
 727	mov  ebx, [esp+12]
 728	pand mm5,mm0
 729	movd eax,mm5
 730	movq mm5,mm2
 731	mov  eax, [ebx+eax*4]
 732
 733L_dodist_mmx:
 734
 735	movzx  ecx,ah
 736	mov  ebx,eax
 737	shr  ebx,16
 738	sub  ebp,ecx
 739	movd mm1,ecx
 740
 741	test  al,16
 742	jz L_test_for_second_level_dist_mmx
 743	and  eax,15
 744	jz L_check_dist_one_mmx
 745
 746L_add_bits_to_dist_mmx:
 747	psrlq mm0,mm1
 748	movd mm1,eax
 749	movd ecx,mm0
 750	sub  ebp,eax
 751	and  ecx, [inflate_fast_mask+eax*4]
 752	add  ebx,ecx
 753
 754L_check_window_mmx:
 755	mov  [esp+44],esi
 756	mov  eax,edi
 757	sub  eax, [esp+40]
 758
 759	cmp  eax,ebx
 760	jb L_clip_window_mmx
 761
 762	mov  ecx,edx
 763	mov  esi,edi
 764	sub  esi,ebx
 765
 766	sub  ecx,3
 767	mov  al, [esi]
 768	mov  [edi],al
 769	mov  al, [esi+1]
 770	mov  dl, [esi+2]
 771	add  esi,3
 772	mov  [edi+1],al
 773	mov  [edi+2],dl
 774	add  edi,3
 775	rep movsb
 776
 777	mov  esi, [esp+44]
 778	mov  ebx, [esp+8]
 779	jmp  L_while_test_mmx
 780
 781ALIGN 4
 782L_check_dist_one_mmx:
 783	cmp  ebx,1
 784	jne  L_check_window_mmx
 785	cmp  [esp+40],edi
 786	je   L_check_window_mmx
 787
 788	dec  edi
 789	mov  ecx,edx
 790	mov  al, [edi]
 791	sub  ecx,3
 792
 793	mov  [edi+1],al
 794	mov  [edi+2],al
 795	mov  [edi+3],al
 796	add  edi,4
 797	rep stosb
 798
 799	mov  ebx, [esp+8]
 800	jmp  L_while_test_mmx
 801
 802ALIGN 4
 803L_test_for_second_level_length_mmx:
 804	test  al,64
 805	jnz L_test_for_end_of_block
 806
 807	and  eax,15
 808	psrlq mm0,mm1
 809	movd ecx,mm0
 810	and  ecx, [inflate_fast_mask+eax*4]
 811	add  ecx,edx
 812	mov  eax, [ebx+ecx*4]
 813	jmp L_dolen_mmx
 814
 815ALIGN 4
 816L_test_for_second_level_dist_mmx:
 817	test  al,64
 818	jnz L_invalid_distance_code
 819
 820	and  eax,15
 821	psrlq mm0,mm1
 822	movd ecx,mm0
 823	and  ecx, [inflate_fast_mask+eax*4]
 824	mov  eax, [esp+12]
 825	add  ecx,ebx
 826	mov  eax, [eax+ecx*4]
 827	jmp  L_dodist_mmx
 828
 829ALIGN 4
 830L_clip_window_mmx:
 831
 832	mov  ecx,eax
 833	mov  eax, [esp+52]
 834	neg  ecx
 835	mov  esi, [esp+56]
 836
 837	cmp  eax,ebx
 838	jb  L_invalid_distance_too_far
 839
 840	add  ecx,ebx
 841	cmp  dword ptr [esp+48],0
 842	jne  L_wrap_around_window_mmx
 843
 844	sub  eax,ecx
 845	add  esi,eax
 846
 847	cmp  edx,ecx
 848	jbe  L_do_copy1_mmx
 849
 850	sub  edx,ecx
 851	rep movsb
 852	mov  esi,edi
 853	sub  esi,ebx
 854	jmp  L_do_copy1_mmx
 855
 856	cmp  edx,ecx
 857	jbe  L_do_copy1_mmx
 858
 859	sub  edx,ecx
 860	rep movsb
 861	mov  esi,edi
 862	sub  esi,ebx
 863	jmp  L_do_copy1_mmx
 864
 865L_wrap_around_window_mmx:
 866
 867	mov  eax, [esp+48]
 868	cmp  ecx,eax
 869	jbe  L_contiguous_in_window_mmx
 870
 871	add  esi, [esp+52]
 872	add  esi,eax
 873	sub  esi,ecx
 874	sub  ecx,eax
 875
 876
 877	cmp  edx,ecx
 878	jbe  L_do_copy1_mmx
 879
 880	sub  edx,ecx
 881	rep movsb
 882	mov  esi, [esp+56]
 883	mov  ecx, [esp+48]
 884	cmp  edx,ecx
 885	jbe  L_do_copy1_mmx
 886
 887	sub  edx,ecx
 888	rep movsb
 889	mov  esi,edi
 890	sub  esi,ebx
 891	jmp  L_do_copy1_mmx
 892
 893L_contiguous_in_window_mmx:
 894
 895	add  esi,eax
 896	sub  esi,ecx
 897
 898
 899	cmp  edx,ecx
 900	jbe  L_do_copy1_mmx
 901
 902	sub  edx,ecx
 903	rep movsb
 904	mov  esi,edi
 905	sub  esi,ebx
 906
 907L_do_copy1_mmx:
 908
 909
 910	mov  ecx,edx
 911	rep movsb
 912
 913	mov  esi, [esp+44]
 914	mov  ebx, [esp+8]
 915	jmp  L_while_test_mmx
 916; 1174 "inffast.S"
 917L_invalid_distance_code:
 918
 919
 920
 921
 922
 923	mov  ecx, invalid_distance_code_msg
 924	mov  edx,INFLATE_MODE_BAD
 925	jmp  L_update_stream_state
 926
 927L_test_for_end_of_block:
 928
 929
 930
 931
 932
 933	test  al,32
 934	jz  L_invalid_literal_length_code
 935
 936	mov  ecx,0
 937	mov  edx,INFLATE_MODE_TYPE
 938	jmp  L_update_stream_state
 939
 940L_invalid_literal_length_code:
 941
 942
 943
 944
 945
 946	mov  ecx, invalid_literal_length_code_msg
 947	mov  edx,INFLATE_MODE_BAD
 948	jmp  L_update_stream_state
 949
 950L_invalid_distance_too_far:
 951
 952
 953
 954	mov  esi, [esp+44]
 955	mov  ecx, invalid_distance_too_far_msg
 956	mov  edx,INFLATE_MODE_BAD
 957	jmp  L_update_stream_state
 958
 959L_update_stream_state:
 960
 961	mov  eax, [esp+88]
 962	test  ecx,ecx
 963	jz  L_skip_msg
 964	mov  [eax+24],ecx
 965L_skip_msg:
 966	mov  eax, [eax+28]
 967	mov  [eax+mode_state],edx
 968	jmp  L_break_loop
 969
 970ALIGN 4
 971L_break_loop:
 972; 1243 "inffast.S"
 973	cmp  dword ptr [inflate_fast_use_mmx],2
 974	jne  L_update_next_in
 975
 976
 977
 978	mov  ebx,ebp
 979
 980L_update_next_in:
 981; 1266 "inffast.S"
 982	mov  eax, [esp+88]
 983	mov  ecx,ebx
 984	mov  edx, [eax+28]
 985	shr  ecx,3
 986	sub  esi,ecx
 987	shl  ecx,3
 988	sub  ebx,ecx
 989	mov  [eax+12],edi
 990	mov  [edx+bits_state],ebx
 991	mov  ecx,ebx
 992
 993	lea  ebx, [esp+28]
 994	cmp  [esp+20],ebx
 995	jne  L_buf_not_used
 996
 997	sub  esi,ebx
 998	mov  ebx, [eax+0]
 999	mov  [esp+20],ebx
1000	add  esi,ebx
1001	mov  ebx, [eax+4]
1002	sub  ebx,11
1003	add  [esp+20],ebx
1004
1005L_buf_not_used:
1006	mov  [eax+0],esi
1007
1008	mov  ebx,1
1009	shl  ebx,cl
1010	dec  ebx
1011
1012
1013
1014
1015
1016	cmp  dword ptr [inflate_fast_use_mmx],2
1017	jne  L_update_hold
1018
1019
1020
1021	psrlq mm0,mm1
1022	movd ebp,mm0
1023
1024	emms
1025
1026L_update_hold:
1027
1028
1029
1030	and  ebp,ebx
1031	mov  [edx+hold_state],ebp
1032
1033
1034
1035
1036	mov  ebx, [esp+20]
1037	cmp  ebx,esi
1038	jbe  L_last_is_smaller
1039
1040	sub  ebx,esi
1041	add  ebx,11
1042	mov  [eax+4],ebx
1043	jmp  L_fixup_out
1044L_last_is_smaller:
1045	sub  esi,ebx
1046	neg  esi
1047	add  esi,11
1048	mov  [eax+4],esi
1049
1050
1051
1052
1053L_fixup_out:
1054
1055	mov  ebx, [esp+16]
1056	cmp  ebx,edi
1057	jbe  L_end_is_smaller
1058
1059	sub  ebx,edi
1060	add  ebx,257
1061	mov  [eax+16],ebx
1062	jmp  L_done
1063L_end_is_smaller:
1064	sub  edi,ebx
1065	neg  edi
1066	add  edi,257
1067	mov  [eax+16],edi
1068
1069
1070
1071
1072
1073L_done:
1074	add  esp,64
1075	popfd
1076	pop  ebx
1077	pop  ebp
1078	pop  esi
1079	pop  edi
1080	ret
1081
1082_TEXT	ends
1083end