PageRenderTime 54ms CodeModel.GetById 32ms app.highlight 17ms RepoModel.GetById 1ms app.codeStats 0ms

/arch/x86/crypto/salsa20-i586-asm_32.S

https://bitbucket.org/ndreys/linux-sunxi
Assembly | 1114 lines | 1113 code | 1 blank | 0 comment | 8 complexity | 0da44ee4a06a2db7f9712c826c26bbb2 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.0, AGPL-1.0
   1# salsa20_pm.s version 20051229
   2# D. J. Bernstein
   3# Public domain.
   4
   5# enter ECRYPT_encrypt_bytes
   6.text
   7.p2align 5
   8.globl ECRYPT_encrypt_bytes
   9ECRYPT_encrypt_bytes:
  10	mov	%esp,%eax
  11	and	$31,%eax
  12	add	$256,%eax
  13	sub	%eax,%esp
  14	# eax_stack = eax
  15	movl	%eax,80(%esp)
  16	# ebx_stack = ebx
  17	movl	%ebx,84(%esp)
  18	# esi_stack = esi
  19	movl	%esi,88(%esp)
  20	# edi_stack = edi
  21	movl	%edi,92(%esp)
  22	# ebp_stack = ebp
  23	movl	%ebp,96(%esp)
  24	# x = arg1
  25	movl	4(%esp,%eax),%edx
  26	# m = arg2
  27	movl	8(%esp,%eax),%esi
  28	# out = arg3
  29	movl	12(%esp,%eax),%edi
  30	# bytes = arg4
  31	movl	16(%esp,%eax),%ebx
  32	# bytes -= 0
  33	sub	$0,%ebx
  34	# goto done if unsigned<=
  35	jbe	._done
  36._start:
  37	# in0 = *(uint32 *) (x + 0)
  38	movl	0(%edx),%eax
  39	# in1 = *(uint32 *) (x + 4)
  40	movl	4(%edx),%ecx
  41	# in2 = *(uint32 *) (x + 8)
  42	movl	8(%edx),%ebp
  43	# j0 = in0
  44	movl	%eax,164(%esp)
  45	# in3 = *(uint32 *) (x + 12)
  46	movl	12(%edx),%eax
  47	# j1 = in1
  48	movl	%ecx,168(%esp)
  49	# in4 = *(uint32 *) (x + 16)
  50	movl	16(%edx),%ecx
  51	# j2 = in2
  52	movl	%ebp,172(%esp)
  53	# in5 = *(uint32 *) (x + 20)
  54	movl	20(%edx),%ebp
  55	# j3 = in3
  56	movl	%eax,176(%esp)
  57	# in6 = *(uint32 *) (x + 24)
  58	movl	24(%edx),%eax
  59	# j4 = in4
  60	movl	%ecx,180(%esp)
  61	# in7 = *(uint32 *) (x + 28)
  62	movl	28(%edx),%ecx
  63	# j5 = in5
  64	movl	%ebp,184(%esp)
  65	# in8 = *(uint32 *) (x + 32)
  66	movl	32(%edx),%ebp
  67	# j6 = in6
  68	movl	%eax,188(%esp)
  69	# in9 = *(uint32 *) (x + 36)
  70	movl	36(%edx),%eax
  71	# j7 = in7
  72	movl	%ecx,192(%esp)
  73	# in10 = *(uint32 *) (x + 40)
  74	movl	40(%edx),%ecx
  75	# j8 = in8
  76	movl	%ebp,196(%esp)
  77	# in11 = *(uint32 *) (x + 44)
  78	movl	44(%edx),%ebp
  79	# j9 = in9
  80	movl	%eax,200(%esp)
  81	# in12 = *(uint32 *) (x + 48)
  82	movl	48(%edx),%eax
  83	# j10 = in10
  84	movl	%ecx,204(%esp)
  85	# in13 = *(uint32 *) (x + 52)
  86	movl	52(%edx),%ecx
  87	# j11 = in11
  88	movl	%ebp,208(%esp)
  89	# in14 = *(uint32 *) (x + 56)
  90	movl	56(%edx),%ebp
  91	# j12 = in12
  92	movl	%eax,212(%esp)
  93	# in15 = *(uint32 *) (x + 60)
  94	movl	60(%edx),%eax
  95	# j13 = in13
  96	movl	%ecx,216(%esp)
  97	# j14 = in14
  98	movl	%ebp,220(%esp)
  99	# j15 = in15
 100	movl	%eax,224(%esp)
 101	# x_backup = x
 102	movl	%edx,64(%esp)
 103._bytesatleast1:
 104	#   bytes - 64
 105	cmp	$64,%ebx
 106	#   goto nocopy if unsigned>=
 107	jae	._nocopy
 108	#     ctarget = out
 109	movl	%edi,228(%esp)
 110	#     out = &tmp
 111	leal	0(%esp),%edi
 112	#     i = bytes
 113	mov	%ebx,%ecx
 114	#     while (i) { *out++ = *m++; --i }
 115	rep	movsb
 116	#     out = &tmp
 117	leal	0(%esp),%edi
 118	#     m = &tmp
 119	leal	0(%esp),%esi
 120._nocopy:
 121	#   out_backup = out
 122	movl	%edi,72(%esp)
 123	#   m_backup = m
 124	movl	%esi,68(%esp)
 125	#   bytes_backup = bytes
 126	movl	%ebx,76(%esp)
 127	#   in0 = j0
 128	movl	164(%esp),%eax
 129	#   in1 = j1
 130	movl	168(%esp),%ecx
 131	#   in2 = j2
 132	movl	172(%esp),%edx
 133	#   in3 = j3
 134	movl	176(%esp),%ebx
 135	#   x0 = in0
 136	movl	%eax,100(%esp)
 137	#   x1 = in1
 138	movl	%ecx,104(%esp)
 139	#   x2 = in2
 140	movl	%edx,108(%esp)
 141	#   x3 = in3
 142	movl	%ebx,112(%esp)
 143	#   in4 = j4
 144	movl	180(%esp),%eax
 145	#   in5 = j5
 146	movl	184(%esp),%ecx
 147	#   in6 = j6
 148	movl	188(%esp),%edx
 149	#   in7 = j7
 150	movl	192(%esp),%ebx
 151	#   x4 = in4
 152	movl	%eax,116(%esp)
 153	#   x5 = in5
 154	movl	%ecx,120(%esp)
 155	#   x6 = in6
 156	movl	%edx,124(%esp)
 157	#   x7 = in7
 158	movl	%ebx,128(%esp)
 159	#   in8 = j8
 160	movl	196(%esp),%eax
 161	#   in9 = j9
 162	movl	200(%esp),%ecx
 163	#   in10 = j10
 164	movl	204(%esp),%edx
 165	#   in11 = j11
 166	movl	208(%esp),%ebx
 167	#   x8 = in8
 168	movl	%eax,132(%esp)
 169	#   x9 = in9
 170	movl	%ecx,136(%esp)
 171	#   x10 = in10
 172	movl	%edx,140(%esp)
 173	#   x11 = in11
 174	movl	%ebx,144(%esp)
 175	#   in12 = j12
 176	movl	212(%esp),%eax
 177	#   in13 = j13
 178	movl	216(%esp),%ecx
 179	#   in14 = j14
 180	movl	220(%esp),%edx
 181	#   in15 = j15
 182	movl	224(%esp),%ebx
 183	#   x12 = in12
 184	movl	%eax,148(%esp)
 185	#   x13 = in13
 186	movl	%ecx,152(%esp)
 187	#   x14 = in14
 188	movl	%edx,156(%esp)
 189	#   x15 = in15
 190	movl	%ebx,160(%esp)
 191	#   i = 20
 192	mov	$20,%ebp
 193	# p = x0
 194	movl	100(%esp),%eax
 195	# s = x5
 196	movl	120(%esp),%ecx
 197	# t = x10
 198	movl	140(%esp),%edx
 199	# w = x15
 200	movl	160(%esp),%ebx
 201._mainloop:
 202	# x0 = p
 203	movl	%eax,100(%esp)
 204	# 				x10 = t
 205	movl	%edx,140(%esp)
 206	# p += x12
 207	addl	148(%esp),%eax
 208	# 		x5 = s
 209	movl	%ecx,120(%esp)
 210	# 				t += x6
 211	addl	124(%esp),%edx
 212	# 						x15 = w
 213	movl	%ebx,160(%esp)
 214	# 		r = x1
 215	movl	104(%esp),%esi
 216	# 		r += s
 217	add	%ecx,%esi
 218	# 						v = x11
 219	movl	144(%esp),%edi
 220	# 						v += w
 221	add	%ebx,%edi
 222	# p <<<= 7
 223	rol	$7,%eax
 224	# p ^= x4
 225	xorl	116(%esp),%eax
 226	# 				t <<<= 7
 227	rol	$7,%edx
 228	# 				t ^= x14
 229	xorl	156(%esp),%edx
 230	# 		r <<<= 7
 231	rol	$7,%esi
 232	# 		r ^= x9
 233	xorl	136(%esp),%esi
 234	# 						v <<<= 7
 235	rol	$7,%edi
 236	# 						v ^= x3
 237	xorl	112(%esp),%edi
 238	# x4 = p
 239	movl	%eax,116(%esp)
 240	# 				x14 = t
 241	movl	%edx,156(%esp)
 242	# p += x0
 243	addl	100(%esp),%eax
 244	# 		x9 = r
 245	movl	%esi,136(%esp)
 246	# 				t += x10
 247	addl	140(%esp),%edx
 248	# 						x3 = v
 249	movl	%edi,112(%esp)
 250	# p <<<= 9
 251	rol	$9,%eax
 252	# p ^= x8
 253	xorl	132(%esp),%eax
 254	# 				t <<<= 9
 255	rol	$9,%edx
 256	# 				t ^= x2
 257	xorl	108(%esp),%edx
 258	# 		s += r
 259	add	%esi,%ecx
 260	# 		s <<<= 9
 261	rol	$9,%ecx
 262	# 		s ^= x13
 263	xorl	152(%esp),%ecx
 264	# 						w += v
 265	add	%edi,%ebx
 266	# 						w <<<= 9
 267	rol	$9,%ebx
 268	# 						w ^= x7
 269	xorl	128(%esp),%ebx
 270	# x8 = p
 271	movl	%eax,132(%esp)
 272	# 				x2 = t
 273	movl	%edx,108(%esp)
 274	# p += x4
 275	addl	116(%esp),%eax
 276	# 		x13 = s
 277	movl	%ecx,152(%esp)
 278	# 				t += x14
 279	addl	156(%esp),%edx
 280	# 						x7 = w
 281	movl	%ebx,128(%esp)
 282	# p <<<= 13
 283	rol	$13,%eax
 284	# p ^= x12
 285	xorl	148(%esp),%eax
 286	# 				t <<<= 13
 287	rol	$13,%edx
 288	# 				t ^= x6
 289	xorl	124(%esp),%edx
 290	# 		r += s
 291	add	%ecx,%esi
 292	# 		r <<<= 13
 293	rol	$13,%esi
 294	# 		r ^= x1
 295	xorl	104(%esp),%esi
 296	# 						v += w
 297	add	%ebx,%edi
 298	# 						v <<<= 13
 299	rol	$13,%edi
 300	# 						v ^= x11
 301	xorl	144(%esp),%edi
 302	# x12 = p
 303	movl	%eax,148(%esp)
 304	# 				x6 = t
 305	movl	%edx,124(%esp)
 306	# p += x8
 307	addl	132(%esp),%eax
 308	# 		x1 = r
 309	movl	%esi,104(%esp)
 310	# 				t += x2
 311	addl	108(%esp),%edx
 312	# 						x11 = v
 313	movl	%edi,144(%esp)
 314	# p <<<= 18
 315	rol	$18,%eax
 316	# p ^= x0
 317	xorl	100(%esp),%eax
 318	# 				t <<<= 18
 319	rol	$18,%edx
 320	# 				t ^= x10
 321	xorl	140(%esp),%edx
 322	# 		s += r
 323	add	%esi,%ecx
 324	# 		s <<<= 18
 325	rol	$18,%ecx
 326	# 		s ^= x5
 327	xorl	120(%esp),%ecx
 328	# 						w += v
 329	add	%edi,%ebx
 330	# 						w <<<= 18
 331	rol	$18,%ebx
 332	# 						w ^= x15
 333	xorl	160(%esp),%ebx
 334	# x0 = p
 335	movl	%eax,100(%esp)
 336	# 				x10 = t
 337	movl	%edx,140(%esp)
 338	# p += x3
 339	addl	112(%esp),%eax
 340	# p <<<= 7
 341	rol	$7,%eax
 342	# 		x5 = s
 343	movl	%ecx,120(%esp)
 344	# 				t += x9
 345	addl	136(%esp),%edx
 346	# 						x15 = w
 347	movl	%ebx,160(%esp)
 348	# 		r = x4
 349	movl	116(%esp),%esi
 350	# 		r += s
 351	add	%ecx,%esi
 352	# 						v = x14
 353	movl	156(%esp),%edi
 354	# 						v += w
 355	add	%ebx,%edi
 356	# p ^= x1
 357	xorl	104(%esp),%eax
 358	# 				t <<<= 7
 359	rol	$7,%edx
 360	# 				t ^= x11
 361	xorl	144(%esp),%edx
 362	# 		r <<<= 7
 363	rol	$7,%esi
 364	# 		r ^= x6
 365	xorl	124(%esp),%esi
 366	# 						v <<<= 7
 367	rol	$7,%edi
 368	# 						v ^= x12
 369	xorl	148(%esp),%edi
 370	# x1 = p
 371	movl	%eax,104(%esp)
 372	# 				x11 = t
 373	movl	%edx,144(%esp)
 374	# p += x0
 375	addl	100(%esp),%eax
 376	# 		x6 = r
 377	movl	%esi,124(%esp)
 378	# 				t += x10
 379	addl	140(%esp),%edx
 380	# 						x12 = v
 381	movl	%edi,148(%esp)
 382	# p <<<= 9
 383	rol	$9,%eax
 384	# p ^= x2
 385	xorl	108(%esp),%eax
 386	# 				t <<<= 9
 387	rol	$9,%edx
 388	# 				t ^= x8
 389	xorl	132(%esp),%edx
 390	# 		s += r
 391	add	%esi,%ecx
 392	# 		s <<<= 9
 393	rol	$9,%ecx
 394	# 		s ^= x7
 395	xorl	128(%esp),%ecx
 396	# 						w += v
 397	add	%edi,%ebx
 398	# 						w <<<= 9
 399	rol	$9,%ebx
 400	# 						w ^= x13
 401	xorl	152(%esp),%ebx
 402	# x2 = p
 403	movl	%eax,108(%esp)
 404	# 				x8 = t
 405	movl	%edx,132(%esp)
 406	# p += x1
 407	addl	104(%esp),%eax
 408	# 		x7 = s
 409	movl	%ecx,128(%esp)
 410	# 				t += x11
 411	addl	144(%esp),%edx
 412	# 						x13 = w
 413	movl	%ebx,152(%esp)
 414	# p <<<= 13
 415	rol	$13,%eax
 416	# p ^= x3
 417	xorl	112(%esp),%eax
 418	# 				t <<<= 13
 419	rol	$13,%edx
 420	# 				t ^= x9
 421	xorl	136(%esp),%edx
 422	# 		r += s
 423	add	%ecx,%esi
 424	# 		r <<<= 13
 425	rol	$13,%esi
 426	# 		r ^= x4
 427	xorl	116(%esp),%esi
 428	# 						v += w
 429	add	%ebx,%edi
 430	# 						v <<<= 13
 431	rol	$13,%edi
 432	# 						v ^= x14
 433	xorl	156(%esp),%edi
 434	# x3 = p
 435	movl	%eax,112(%esp)
 436	# 				x9 = t
 437	movl	%edx,136(%esp)
 438	# p += x2
 439	addl	108(%esp),%eax
 440	# 		x4 = r
 441	movl	%esi,116(%esp)
 442	# 				t += x8
 443	addl	132(%esp),%edx
 444	# 						x14 = v
 445	movl	%edi,156(%esp)
 446	# p <<<= 18
 447	rol	$18,%eax
 448	# p ^= x0
 449	xorl	100(%esp),%eax
 450	# 				t <<<= 18
 451	rol	$18,%edx
 452	# 				t ^= x10
 453	xorl	140(%esp),%edx
 454	# 		s += r
 455	add	%esi,%ecx
 456	# 		s <<<= 18
 457	rol	$18,%ecx
 458	# 		s ^= x5
 459	xorl	120(%esp),%ecx
 460	# 						w += v
 461	add	%edi,%ebx
 462	# 						w <<<= 18
 463	rol	$18,%ebx
 464	# 						w ^= x15
 465	xorl	160(%esp),%ebx
 466	# x0 = p
 467	movl	%eax,100(%esp)
 468	# 				x10 = t
 469	movl	%edx,140(%esp)
 470	# p += x12
 471	addl	148(%esp),%eax
 472	# 		x5 = s
 473	movl	%ecx,120(%esp)
 474	# 				t += x6
 475	addl	124(%esp),%edx
 476	# 						x15 = w
 477	movl	%ebx,160(%esp)
 478	# 		r = x1
 479	movl	104(%esp),%esi
 480	# 		r += s
 481	add	%ecx,%esi
 482	# 						v = x11
 483	movl	144(%esp),%edi
 484	# 						v += w
 485	add	%ebx,%edi
 486	# p <<<= 7
 487	rol	$7,%eax
 488	# p ^= x4
 489	xorl	116(%esp),%eax
 490	# 				t <<<= 7
 491	rol	$7,%edx
 492	# 				t ^= x14
 493	xorl	156(%esp),%edx
 494	# 		r <<<= 7
 495	rol	$7,%esi
 496	# 		r ^= x9
 497	xorl	136(%esp),%esi
 498	# 						v <<<= 7
 499	rol	$7,%edi
 500	# 						v ^= x3
 501	xorl	112(%esp),%edi
 502	# x4 = p
 503	movl	%eax,116(%esp)
 504	# 				x14 = t
 505	movl	%edx,156(%esp)
 506	# p += x0
 507	addl	100(%esp),%eax
 508	# 		x9 = r
 509	movl	%esi,136(%esp)
 510	# 				t += x10
 511	addl	140(%esp),%edx
 512	# 						x3 = v
 513	movl	%edi,112(%esp)
 514	# p <<<= 9
 515	rol	$9,%eax
 516	# p ^= x8
 517	xorl	132(%esp),%eax
 518	# 				t <<<= 9
 519	rol	$9,%edx
 520	# 				t ^= x2
 521	xorl	108(%esp),%edx
 522	# 		s += r
 523	add	%esi,%ecx
 524	# 		s <<<= 9
 525	rol	$9,%ecx
 526	# 		s ^= x13
 527	xorl	152(%esp),%ecx
 528	# 						w += v
 529	add	%edi,%ebx
 530	# 						w <<<= 9
 531	rol	$9,%ebx
 532	# 						w ^= x7
 533	xorl	128(%esp),%ebx
 534	# x8 = p
 535	movl	%eax,132(%esp)
 536	# 				x2 = t
 537	movl	%edx,108(%esp)
 538	# p += x4
 539	addl	116(%esp),%eax
 540	# 		x13 = s
 541	movl	%ecx,152(%esp)
 542	# 				t += x14
 543	addl	156(%esp),%edx
 544	# 						x7 = w
 545	movl	%ebx,128(%esp)
 546	# p <<<= 13
 547	rol	$13,%eax
 548	# p ^= x12
 549	xorl	148(%esp),%eax
 550	# 				t <<<= 13
 551	rol	$13,%edx
 552	# 				t ^= x6
 553	xorl	124(%esp),%edx
 554	# 		r += s
 555	add	%ecx,%esi
 556	# 		r <<<= 13
 557	rol	$13,%esi
 558	# 		r ^= x1
 559	xorl	104(%esp),%esi
 560	# 						v += w
 561	add	%ebx,%edi
 562	# 						v <<<= 13
 563	rol	$13,%edi
 564	# 						v ^= x11
 565	xorl	144(%esp),%edi
 566	# x12 = p
 567	movl	%eax,148(%esp)
 568	# 				x6 = t
 569	movl	%edx,124(%esp)
 570	# p += x8
 571	addl	132(%esp),%eax
 572	# 		x1 = r
 573	movl	%esi,104(%esp)
 574	# 				t += x2
 575	addl	108(%esp),%edx
 576	# 						x11 = v
 577	movl	%edi,144(%esp)
 578	# p <<<= 18
 579	rol	$18,%eax
 580	# p ^= x0
 581	xorl	100(%esp),%eax
 582	# 				t <<<= 18
 583	rol	$18,%edx
 584	# 				t ^= x10
 585	xorl	140(%esp),%edx
 586	# 		s += r
 587	add	%esi,%ecx
 588	# 		s <<<= 18
 589	rol	$18,%ecx
 590	# 		s ^= x5
 591	xorl	120(%esp),%ecx
 592	# 						w += v
 593	add	%edi,%ebx
 594	# 						w <<<= 18
 595	rol	$18,%ebx
 596	# 						w ^= x15
 597	xorl	160(%esp),%ebx
 598	# x0 = p
 599	movl	%eax,100(%esp)
 600	# 				x10 = t
 601	movl	%edx,140(%esp)
 602	# p += x3
 603	addl	112(%esp),%eax
 604	# p <<<= 7
 605	rol	$7,%eax
 606	# 		x5 = s
 607	movl	%ecx,120(%esp)
 608	# 				t += x9
 609	addl	136(%esp),%edx
 610	# 						x15 = w
 611	movl	%ebx,160(%esp)
 612	# 		r = x4
 613	movl	116(%esp),%esi
 614	# 		r += s
 615	add	%ecx,%esi
 616	# 						v = x14
 617	movl	156(%esp),%edi
 618	# 						v += w
 619	add	%ebx,%edi
 620	# p ^= x1
 621	xorl	104(%esp),%eax
 622	# 				t <<<= 7
 623	rol	$7,%edx
 624	# 				t ^= x11
 625	xorl	144(%esp),%edx
 626	# 		r <<<= 7
 627	rol	$7,%esi
 628	# 		r ^= x6
 629	xorl	124(%esp),%esi
 630	# 						v <<<= 7
 631	rol	$7,%edi
 632	# 						v ^= x12
 633	xorl	148(%esp),%edi
 634	# x1 = p
 635	movl	%eax,104(%esp)
 636	# 				x11 = t
 637	movl	%edx,144(%esp)
 638	# p += x0
 639	addl	100(%esp),%eax
 640	# 		x6 = r
 641	movl	%esi,124(%esp)
 642	# 				t += x10
 643	addl	140(%esp),%edx
 644	# 						x12 = v
 645	movl	%edi,148(%esp)
 646	# p <<<= 9
 647	rol	$9,%eax
 648	# p ^= x2
 649	xorl	108(%esp),%eax
 650	# 				t <<<= 9
 651	rol	$9,%edx
 652	# 				t ^= x8
 653	xorl	132(%esp),%edx
 654	# 		s += r
 655	add	%esi,%ecx
 656	# 		s <<<= 9
 657	rol	$9,%ecx
 658	# 		s ^= x7
 659	xorl	128(%esp),%ecx
 660	# 						w += v
 661	add	%edi,%ebx
 662	# 						w <<<= 9
 663	rol	$9,%ebx
 664	# 						w ^= x13
 665	xorl	152(%esp),%ebx
 666	# x2 = p
 667	movl	%eax,108(%esp)
 668	# 				x8 = t
 669	movl	%edx,132(%esp)
 670	# p += x1
 671	addl	104(%esp),%eax
 672	# 		x7 = s
 673	movl	%ecx,128(%esp)
 674	# 				t += x11
 675	addl	144(%esp),%edx
 676	# 						x13 = w
 677	movl	%ebx,152(%esp)
 678	# p <<<= 13
 679	rol	$13,%eax
 680	# p ^= x3
 681	xorl	112(%esp),%eax
 682	# 				t <<<= 13
 683	rol	$13,%edx
 684	# 				t ^= x9
 685	xorl	136(%esp),%edx
 686	# 		r += s
 687	add	%ecx,%esi
 688	# 		r <<<= 13
 689	rol	$13,%esi
 690	# 		r ^= x4
 691	xorl	116(%esp),%esi
 692	# 						v += w
 693	add	%ebx,%edi
 694	# 						v <<<= 13
 695	rol	$13,%edi
 696	# 						v ^= x14
 697	xorl	156(%esp),%edi
 698	# x3 = p
 699	movl	%eax,112(%esp)
 700	# 				x9 = t
 701	movl	%edx,136(%esp)
 702	# p += x2
 703	addl	108(%esp),%eax
 704	# 		x4 = r
 705	movl	%esi,116(%esp)
 706	# 				t += x8
 707	addl	132(%esp),%edx
 708	# 						x14 = v
 709	movl	%edi,156(%esp)
 710	# p <<<= 18
 711	rol	$18,%eax
 712	# p ^= x0
 713	xorl	100(%esp),%eax
 714	# 				t <<<= 18
 715	rol	$18,%edx
 716	# 				t ^= x10
 717	xorl	140(%esp),%edx
 718	# 		s += r
 719	add	%esi,%ecx
 720	# 		s <<<= 18
 721	rol	$18,%ecx
 722	# 		s ^= x5
 723	xorl	120(%esp),%ecx
 724	# 						w += v
 725	add	%edi,%ebx
 726	# 						w <<<= 18
 727	rol	$18,%ebx
 728	# 						w ^= x15
 729	xorl	160(%esp),%ebx
 730	# i -= 4
 731	sub	$4,%ebp
 732	# goto mainloop if unsigned >
 733	ja	._mainloop
 734	# x0 = p
 735	movl	%eax,100(%esp)
 736	# x5 = s
 737	movl	%ecx,120(%esp)
 738	# x10 = t
 739	movl	%edx,140(%esp)
 740	# x15 = w
 741	movl	%ebx,160(%esp)
 742	#   out = out_backup
 743	movl	72(%esp),%edi
 744	#   m = m_backup
 745	movl	68(%esp),%esi
 746	#   in0 = x0
 747	movl	100(%esp),%eax
 748	#   in1 = x1
 749	movl	104(%esp),%ecx
 750	#   in0 += j0
 751	addl	164(%esp),%eax
 752	#   in1 += j1
 753	addl	168(%esp),%ecx
 754	#   in0 ^= *(uint32 *) (m + 0)
 755	xorl	0(%esi),%eax
 756	#   in1 ^= *(uint32 *) (m + 4)
 757	xorl	4(%esi),%ecx
 758	#   *(uint32 *) (out + 0) = in0
 759	movl	%eax,0(%edi)
 760	#   *(uint32 *) (out + 4) = in1
 761	movl	%ecx,4(%edi)
 762	#   in2 = x2
 763	movl	108(%esp),%eax
 764	#   in3 = x3
 765	movl	112(%esp),%ecx
 766	#   in2 += j2
 767	addl	172(%esp),%eax
 768	#   in3 += j3
 769	addl	176(%esp),%ecx
 770	#   in2 ^= *(uint32 *) (m + 8)
 771	xorl	8(%esi),%eax
 772	#   in3 ^= *(uint32 *) (m + 12)
 773	xorl	12(%esi),%ecx
 774	#   *(uint32 *) (out + 8) = in2
 775	movl	%eax,8(%edi)
 776	#   *(uint32 *) (out + 12) = in3
 777	movl	%ecx,12(%edi)
 778	#   in4 = x4
 779	movl	116(%esp),%eax
 780	#   in5 = x5
 781	movl	120(%esp),%ecx
 782	#   in4 += j4
 783	addl	180(%esp),%eax
 784	#   in5 += j5
 785	addl	184(%esp),%ecx
 786	#   in4 ^= *(uint32 *) (m + 16)
 787	xorl	16(%esi),%eax
 788	#   in5 ^= *(uint32 *) (m + 20)
 789	xorl	20(%esi),%ecx
 790	#   *(uint32 *) (out + 16) = in4
 791	movl	%eax,16(%edi)
 792	#   *(uint32 *) (out + 20) = in5
 793	movl	%ecx,20(%edi)
 794	#   in6 = x6
 795	movl	124(%esp),%eax
 796	#   in7 = x7
 797	movl	128(%esp),%ecx
 798	#   in6 += j6
 799	addl	188(%esp),%eax
 800	#   in7 += j7
 801	addl	192(%esp),%ecx
 802	#   in6 ^= *(uint32 *) (m + 24)
 803	xorl	24(%esi),%eax
 804	#   in7 ^= *(uint32 *) (m + 28)
 805	xorl	28(%esi),%ecx
 806	#   *(uint32 *) (out + 24) = in6
 807	movl	%eax,24(%edi)
 808	#   *(uint32 *) (out + 28) = in7
 809	movl	%ecx,28(%edi)
 810	#   in8 = x8
 811	movl	132(%esp),%eax
 812	#   in9 = x9
 813	movl	136(%esp),%ecx
 814	#   in8 += j8
 815	addl	196(%esp),%eax
 816	#   in9 += j9
 817	addl	200(%esp),%ecx
 818	#   in8 ^= *(uint32 *) (m + 32)
 819	xorl	32(%esi),%eax
 820	#   in9 ^= *(uint32 *) (m + 36)
 821	xorl	36(%esi),%ecx
 822	#   *(uint32 *) (out + 32) = in8
 823	movl	%eax,32(%edi)
 824	#   *(uint32 *) (out + 36) = in9
 825	movl	%ecx,36(%edi)
 826	#   in10 = x10
 827	movl	140(%esp),%eax
 828	#   in11 = x11
 829	movl	144(%esp),%ecx
 830	#   in10 += j10
 831	addl	204(%esp),%eax
 832	#   in11 += j11
 833	addl	208(%esp),%ecx
 834	#   in10 ^= *(uint32 *) (m + 40)
 835	xorl	40(%esi),%eax
 836	#   in11 ^= *(uint32 *) (m + 44)
 837	xorl	44(%esi),%ecx
 838	#   *(uint32 *) (out + 40) = in10
 839	movl	%eax,40(%edi)
 840	#   *(uint32 *) (out + 44) = in11
 841	movl	%ecx,44(%edi)
 842	#   in12 = x12
 843	movl	148(%esp),%eax
 844	#   in13 = x13
 845	movl	152(%esp),%ecx
 846	#   in12 += j12
 847	addl	212(%esp),%eax
 848	#   in13 += j13
 849	addl	216(%esp),%ecx
 850	#   in12 ^= *(uint32 *) (m + 48)
 851	xorl	48(%esi),%eax
 852	#   in13 ^= *(uint32 *) (m + 52)
 853	xorl	52(%esi),%ecx
 854	#   *(uint32 *) (out + 48) = in12
 855	movl	%eax,48(%edi)
 856	#   *(uint32 *) (out + 52) = in13
 857	movl	%ecx,52(%edi)
 858	#   in14 = x14
 859	movl	156(%esp),%eax
 860	#   in15 = x15
 861	movl	160(%esp),%ecx
 862	#   in14 += j14
 863	addl	220(%esp),%eax
 864	#   in15 += j15
 865	addl	224(%esp),%ecx
 866	#   in14 ^= *(uint32 *) (m + 56)
 867	xorl	56(%esi),%eax
 868	#   in15 ^= *(uint32 *) (m + 60)
 869	xorl	60(%esi),%ecx
 870	#   *(uint32 *) (out + 56) = in14
 871	movl	%eax,56(%edi)
 872	#   *(uint32 *) (out + 60) = in15
 873	movl	%ecx,60(%edi)
 874	#   bytes = bytes_backup
 875	movl	76(%esp),%ebx
 876	#   in8 = j8
 877	movl	196(%esp),%eax
 878	#   in9 = j9
 879	movl	200(%esp),%ecx
 880	#   in8 += 1
 881	add	$1,%eax
 882	#   in9 += 0 + carry
 883	adc	$0,%ecx
 884	#   j8 = in8
 885	movl	%eax,196(%esp)
 886	#   j9 = in9
 887	movl	%ecx,200(%esp)
 888	#   bytes - 64
 889	cmp	$64,%ebx
 890	#   goto bytesatleast65 if unsigned>
 891	ja	._bytesatleast65
 892	#     goto bytesatleast64 if unsigned>=
 893	jae	._bytesatleast64
 894	#       m = out
 895	mov	%edi,%esi
 896	#       out = ctarget
 897	movl	228(%esp),%edi
 898	#       i = bytes
 899	mov	%ebx,%ecx
 900	#       while (i) { *out++ = *m++; --i }
 901	rep	movsb
 902._bytesatleast64:
 903	#     x = x_backup
 904	movl	64(%esp),%eax
 905	#     in8 = j8
 906	movl	196(%esp),%ecx
 907	#     in9 = j9
 908	movl	200(%esp),%edx
 909	#     *(uint32 *) (x + 32) = in8
 910	movl	%ecx,32(%eax)
 911	#     *(uint32 *) (x + 36) = in9
 912	movl	%edx,36(%eax)
 913._done:
 914	#     eax = eax_stack
 915	movl	80(%esp),%eax
 916	#     ebx = ebx_stack
 917	movl	84(%esp),%ebx
 918	#     esi = esi_stack
 919	movl	88(%esp),%esi
 920	#     edi = edi_stack
 921	movl	92(%esp),%edi
 922	#     ebp = ebp_stack
 923	movl	96(%esp),%ebp
 924	#     leave
 925	add	%eax,%esp
 926	ret
 927._bytesatleast65:
 928	#   bytes -= 64
 929	sub	$64,%ebx
 930	#   out += 64
 931	add	$64,%edi
 932	#   m += 64
 933	add	$64,%esi
 934	# goto bytesatleast1
 935	jmp	._bytesatleast1
 936# enter ECRYPT_keysetup
 937.text
 938.p2align 5
 939.globl ECRYPT_keysetup
 940ECRYPT_keysetup:
 941	mov	%esp,%eax
 942	and	$31,%eax
 943	add	$256,%eax
 944	sub	%eax,%esp
 945	#   eax_stack = eax
 946	movl	%eax,64(%esp)
 947	#   ebx_stack = ebx
 948	movl	%ebx,68(%esp)
 949	#   esi_stack = esi
 950	movl	%esi,72(%esp)
 951	#   edi_stack = edi
 952	movl	%edi,76(%esp)
 953	#   ebp_stack = ebp
 954	movl	%ebp,80(%esp)
 955	#   k = arg2
 956	movl	8(%esp,%eax),%ecx
 957	#   kbits = arg3
 958	movl	12(%esp,%eax),%edx
 959	#   x = arg1
 960	movl	4(%esp,%eax),%eax
 961	#   in1 = *(uint32 *) (k + 0)
 962	movl	0(%ecx),%ebx
 963	#   in2 = *(uint32 *) (k + 4)
 964	movl	4(%ecx),%esi
 965	#   in3 = *(uint32 *) (k + 8)
 966	movl	8(%ecx),%edi
 967	#   in4 = *(uint32 *) (k + 12)
 968	movl	12(%ecx),%ebp
 969	#   *(uint32 *) (x + 4) = in1
 970	movl	%ebx,4(%eax)
 971	#   *(uint32 *) (x + 8) = in2
 972	movl	%esi,8(%eax)
 973	#   *(uint32 *) (x + 12) = in3
 974	movl	%edi,12(%eax)
 975	#   *(uint32 *) (x + 16) = in4
 976	movl	%ebp,16(%eax)
 977	#   kbits - 256
 978	cmp	$256,%edx
 979	#   goto kbits128 if unsigned<
 980	jb	._kbits128
 981._kbits256:
 982	#     in11 = *(uint32 *) (k + 16)
 983	movl	16(%ecx),%edx
 984	#     in12 = *(uint32 *) (k + 20)
 985	movl	20(%ecx),%ebx
 986	#     in13 = *(uint32 *) (k + 24)
 987	movl	24(%ecx),%esi
 988	#     in14 = *(uint32 *) (k + 28)
 989	movl	28(%ecx),%ecx
 990	#     *(uint32 *) (x + 44) = in11
 991	movl	%edx,44(%eax)
 992	#     *(uint32 *) (x + 48) = in12
 993	movl	%ebx,48(%eax)
 994	#     *(uint32 *) (x + 52) = in13
 995	movl	%esi,52(%eax)
 996	#     *(uint32 *) (x + 56) = in14
 997	movl	%ecx,56(%eax)
 998	#     in0 = 1634760805
 999	mov	$1634760805,%ecx
1000	#     in5 = 857760878
1001	mov	$857760878,%edx
1002	#     in10 = 2036477234
1003	mov	$2036477234,%ebx
1004	#     in15 = 1797285236
1005	mov	$1797285236,%esi
1006	#     *(uint32 *) (x + 0) = in0
1007	movl	%ecx,0(%eax)
1008	#     *(uint32 *) (x + 20) = in5
1009	movl	%edx,20(%eax)
1010	#     *(uint32 *) (x + 40) = in10
1011	movl	%ebx,40(%eax)
1012	#     *(uint32 *) (x + 60) = in15
1013	movl	%esi,60(%eax)
1014	#   goto keysetupdone
1015	jmp	._keysetupdone
1016._kbits128:
1017	#     in11 = *(uint32 *) (k + 0)
1018	movl	0(%ecx),%edx
1019	#     in12 = *(uint32 *) (k + 4)
1020	movl	4(%ecx),%ebx
1021	#     in13 = *(uint32 *) (k + 8)
1022	movl	8(%ecx),%esi
1023	#     in14 = *(uint32 *) (k + 12)
1024	movl	12(%ecx),%ecx
1025	#     *(uint32 *) (x + 44) = in11
1026	movl	%edx,44(%eax)
1027	#     *(uint32 *) (x + 48) = in12
1028	movl	%ebx,48(%eax)
1029	#     *(uint32 *) (x + 52) = in13
1030	movl	%esi,52(%eax)
1031	#     *(uint32 *) (x + 56) = in14
1032	movl	%ecx,56(%eax)
1033	#     in0 = 1634760805
1034	mov	$1634760805,%ecx
1035	#     in5 = 824206446
1036	mov	$824206446,%edx
1037	#     in10 = 2036477238
1038	mov	$2036477238,%ebx
1039	#     in15 = 1797285236
1040	mov	$1797285236,%esi
1041	#     *(uint32 *) (x + 0) = in0
1042	movl	%ecx,0(%eax)
1043	#     *(uint32 *) (x + 20) = in5
1044	movl	%edx,20(%eax)
1045	#     *(uint32 *) (x + 40) = in10
1046	movl	%ebx,40(%eax)
1047	#     *(uint32 *) (x + 60) = in15
1048	movl	%esi,60(%eax)
1049._keysetupdone:
1050	#   eax = eax_stack
1051	movl	64(%esp),%eax
1052	#   ebx = ebx_stack
1053	movl	68(%esp),%ebx
1054	#   esi = esi_stack
1055	movl	72(%esp),%esi
1056	#   edi = edi_stack
1057	movl	76(%esp),%edi
1058	#   ebp = ebp_stack
1059	movl	80(%esp),%ebp
1060	# leave
1061	add	%eax,%esp
1062	ret
1063# enter ECRYPT_ivsetup
1064.text
1065.p2align 5
1066.globl ECRYPT_ivsetup
1067ECRYPT_ivsetup:
1068	mov	%esp,%eax
1069	and	$31,%eax
1070	add	$256,%eax
1071	sub	%eax,%esp
1072	#   eax_stack = eax
1073	movl	%eax,64(%esp)
1074	#   ebx_stack = ebx
1075	movl	%ebx,68(%esp)
1076	#   esi_stack = esi
1077	movl	%esi,72(%esp)
1078	#   edi_stack = edi
1079	movl	%edi,76(%esp)
1080	#   ebp_stack = ebp
1081	movl	%ebp,80(%esp)
1082	#   iv = arg2
1083	movl	8(%esp,%eax),%ecx
1084	#   x = arg1
1085	movl	4(%esp,%eax),%eax
1086	#   in6 = *(uint32 *) (iv + 0)
1087	movl	0(%ecx),%edx
1088	#   in7 = *(uint32 *) (iv + 4)
1089	movl	4(%ecx),%ecx
1090	#   in8 = 0
1091	mov	$0,%ebx
1092	#   in9 = 0
1093	mov	$0,%esi
1094	#   *(uint32 *) (x + 24) = in6
1095	movl	%edx,24(%eax)
1096	#   *(uint32 *) (x + 28) = in7
1097	movl	%ecx,28(%eax)
1098	#   *(uint32 *) (x + 32) = in8
1099	movl	%ebx,32(%eax)
1100	#   *(uint32 *) (x + 36) = in9
1101	movl	%esi,36(%eax)
1102	#   eax = eax_stack
1103	movl	64(%esp),%eax
1104	#   ebx = ebx_stack
1105	movl	68(%esp),%ebx
1106	#   esi = esi_stack
1107	movl	72(%esp),%esi
1108	#   edi = edi_stack
1109	movl	76(%esp),%edi
1110	#   ebp = ebp_stack
1111	movl	80(%esp),%ebp
1112	# leave
1113	add	%eax,%esp
1114	ret