PageRenderTime 20ms CodeModel.GetById 16ms app.highlight 2ms RepoModel.GetById 1ms app.codeStats 0ms

/security/nss/lib/freebl/arcfour-amd64-gas.s

http://github.com/zpao/v8monkey
Assembly | 120 lines | 120 code | 0 blank | 0 comment | 1 complexity | e9c2fc312769e8ca96a1bc8f65c9efb9 MD5 | raw file
  1# ***** BEGIN LICENSE BLOCK *****
  2# Version: MPL 1.1/GPL 2.0/LGPL 2.1
  3#
  4# The contents of this file are subject to the Mozilla Public License Version
  5# 1.1 (the "License"); you may not use this file except in compliance with
  6# the License. You may obtain a copy of the License at
  7# http://www.mozilla.org/MPL/
  8#
  9# Software distributed under the License is distributed on an "AS IS" basis,
 10# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 11# for the specific language governing rights and limitations under the
 12# License.
 13#
 14# The Original Code is "Marc Bevand's fast AMD64 ARCFOUR source"
 15#
 16# The Initial Developer of the Original Code is
 17# Marc Bevand <bevand_m@epita.fr> .
 18# Portions created by the Initial Developer are 
 19# Copyright (C) 2004 the Initial Developer. All Rights Reserved.
 20#
 21# Contributor(s):
 22#
 23# Alternatively, the contents of this file may be used under the terms of
 24# either the GNU General Public License Version 2 or later (the "GPL"), or
 25# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 26# in which case the provisions of the GPL or the LGPL are applicable instead
 27# of those above. If you wish to allow use of your version of this file only
 28# under the terms of either the GPL or the LGPL, and not to allow others to
 29# use your version of this file under the terms of the MPL, indicate your
 30# decision by deleting the provisions above and replace them with the notice
 31# and other provisions required by the GPL or the LGPL. If you do not delete
 32# the provisions above, a recipient may use your version of this file under
 33# the terms of any one of the MPL, the GPL or the LGPL.
 34#
 35# ***** END LICENSE BLOCK *****
 36
 37# ** ARCFOUR implementation optimized for AMD64.
 38# **
 39# ** The throughput achieved by this code is about 320 MBytes/sec, on
 40# ** a 1.8 GHz AMD Opteron (rev C0) processor.
 41
 42.text
 43.align 16
 44.globl ARCFOUR
 45.type ARCFOUR,@function
 46ARCFOUR:
 47	pushq	%rbp
 48	pushq	%rbx
 49	movq	%rdi,		%rbp	# key = ARG(key)
 50	movq	%rsi,		%rbx	# rbx = ARG(len)
 51	movq	%rdx,		%rsi	# in = ARG(in)
 52	movq	%rcx,		%rdi	# out = ARG(out)
 53	movq	(%rbp),		%rcx	# x = key->x
 54	movq	8(%rbp),	%rdx	# y = key->y
 55	addq	$16,		%rbp	# d = key->data
 56	incq	%rcx			# x++
 57	andq	$255,		%rcx	# x &= 0xff
 58	leaq	-8(%rbx,%rsi),	%rbx	# rbx = in+len-8
 59	movq	%rbx,		%r9	# tmp = in+len-8
 60	movq	0(%rbp,%rcx,8),	%rax	# tx = d[x]
 61	cmpq	%rsi,		%rbx	# cmp in with in+len-8
 62	jl	.Lend			# jump if (in+len-8 < in)
 63
 64.Lstart:
 65	addq	$8,		%rsi		# increment in
 66	addq	$8,		%rdi		# increment out
 67
 68	# generate the next 8 bytes of the rc4 stream into %r8
 69	movq	$8,		%r11		# byte counter
 701:	addb	%al,		%dl		# y += tx
 71	movl	0(%rbp,%rdx,8),	%ebx		# ty = d[y]
 72	movl	%ebx,		0(%rbp,%rcx,8)	# d[x] = ty
 73	addb	%al,		%bl		# val = ty + tx
 74	movl	%eax,		0(%rbp,%rdx,8)	# d[y] = tx
 75	incb	%cl				# x++		(NEXT ROUND)
 76	movl	0(%rbp,%rcx,8),	%eax		# tx = d[x]	(NEXT ROUND)
 77	movb	0(%rbp,%rbx,8),	%r8b		# val = d[val]
 78	decb	%r11b
 79	rorq	$8,		%r8		# (ror does not change ZF)
 80	jnz 	1b
 81
 82	# xor 8 bytes
 83	xorq	-8(%rsi),	%r8
 84	cmpq	%r9,		%rsi		# cmp in+len-8 with in
 85	movq	%r8,		-8(%rdi)
 86	jle	.Lstart				# jump if (in <= in+len-8)
 87
 88.Lend:
 89	addq	$8,		%r9		# tmp = in+len
 90
 91	# handle the last bytes, one by one
 921:	cmpq	%rsi,		%r9		# cmp in with in+len
 93	jle	.Lfinished			# jump if (in+len <= in)
 94	addb	%al,		%dl		# y += tx
 95	movl	0(%rbp,%rdx,8),	%ebx		# ty = d[y]
 96	movl	%ebx,		0(%rbp,%rcx,8)	# d[x] = ty
 97	addb	%al,		%bl		# val = ty + tx
 98	movl	%eax,		0(%rbp,%rdx,8)	# d[y] = tx
 99	incb	%cl				# x++		(NEXT ROUND)
100	movl	0(%rbp,%rcx,8),	%eax		# tx = d[x]	(NEXT ROUND)
101	movb	0(%rbp,%rbx,8),	%r8b		# val = d[val]
102	xorb	(%rsi),		%r8b		# xor 1 byte
103	movb	%r8b,		(%rdi)
104	incq	%rsi				# in++
105	incq	%rdi				# out++
106	jmp 1b
107
108.Lfinished:
109	decq	%rcx				# x--
110	movb	%dl,		-8(%rbp)	# key->y = y
111	movb	%cl,		-16(%rbp)	# key->x = x
112	popq	%rbx
113	popq	%rbp
114	ret
115.L_ARCFOUR_end:
116.size ARCFOUR,.L_ARCFOUR_end-ARCFOUR
117
118# Magic indicating no need for an executable stack
119.section .note.GNU-stack,"",@progbits
120.previous