/security/nss/lib/freebl/arcfour-amd64-sun.s
http://github.com/zpao/v8monkey · Assembly · 116 lines · 116 code · 0 blank · 0 comment · 1 complexity · 92636f144fbb8750c15c422c31484ae6 MD5 · raw file
- / ***** BEGIN LICENSE BLOCK *****
- / Version: MPL 1.1/GPL 2.0/LGPL 2.1
- /
- / The contents of this file are subject to the Mozilla Public License Version
- / 1.1 (the "License"); you may not use this file except in compliance with
- / the License. You may obtain a copy of the License at
- / http://www.mozilla.org/MPL/
- /
- / Software distributed under the License is distributed on an "AS IS" basis,
- / WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- / for the specific language governing rights and limitations under the
- / License.
- /
- / The Original Code is "Marc Bevand's fast AMD64 ARCFOUR source"
- /
- / The Initial Developer of the Original Code is
- / Marc Bevand <bevand_m@epita.fr> .
- / Portions created by the Initial Developer are
- / Copyright (C) 2004 the Initial Developer. All Rights Reserved.
- /
- / Contributor(s):
- /
- / Alternatively, the contents of this file may be used under the terms of
- / either the GNU General Public License Version 2 or later (the "GPL"), or
- / the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- / in which case the provisions of the GPL or the LGPL are applicable instead
- / of those above. If you wish to allow use of your version of this file only
- / under the terms of either the GPL or the LGPL, and not to allow others to
- / use your version of this file under the terms of the MPL, indicate your
- / decision by deleting the provisions above and replace them with the notice
- / and other provisions required by the GPL or the LGPL. If you do not delete
- / the provisions above, a recipient may use your version of this file under
- / the terms of any one of the MPL, the GPL or the LGPL.
- /
- / ***** END LICENSE BLOCK *****
- / ** ARCFOUR implementation optimized for AMD64.
- / **
- / ** The throughput achieved by this code is about 320 MBytes/sec, on
- / ** a 1.8 GHz AMD Opteron (rev C0) processor.
- .text
- .align 16
- .globl ARCFOUR
- .type ARCFOUR,@function
- ARCFOUR:
- pushq %rbp
- pushq %rbx
- movq %rdi, %rbp / key = ARG(key)
- movq %rsi, %rbx / rbx = ARG(len)
- movq %rdx, %rsi / in = ARG(in)
- movq %rcx, %rdi / out = ARG(out)
- movq (%rbp), %rcx / x = key->x
- movq 8(%rbp), %rdx / y = key->y
- addq $16, %rbp / d = key->data
- incq %rcx / x++
- andq $255, %rcx / x &= 0xff
- leaq -8(%rbx,%rsi), %rbx / rbx = in+len-8
- movq %rbx, %r9 / tmp = in+len-8
- movq 0(%rbp,%rcx,8), %rax / tx = d[x]
- cmpq %rsi, %rbx / cmp in with in+len-8
- jl .Lend / jump if (in+len-8 < in)
- .Lstart:
- addq $8, %rsi / increment in
- addq $8, %rdi / increment out
- / generate the next 8 bytes of the rc4 stream into %r8
- movq $8, %r11 / byte counter
- 1: addb %al, %dl / y += tx
- movl 0(%rbp,%rdx,8), %ebx / ty = d[y]
- movl %ebx, 0(%rbp,%rcx,8) / d[x] = ty
- addb %al, %bl / val = ty + tx
- movl %eax, 0(%rbp,%rdx,8) / d[y] = tx
- incb %cl / x++ (NEXT ROUND)
- movl 0(%rbp,%rcx,8), %eax / tx = d[x] (NEXT ROUND)
- movb 0(%rbp,%rbx,8), %r8b / val = d[val]
- decb %r11b
- rorq $8, %r8 / (ror does not change ZF)
- jnz 1b
- / xor 8 bytes
- xorq -8(%rsi), %r8
- cmpq %r9, %rsi / cmp in+len-8 with in
- movq %r8, -8(%rdi)
- jle .Lstart / jump if (in <= in+len-8)
- .Lend:
- addq $8, %r9 / tmp = in+len
- / handle the last bytes, one by one
- 1: cmpq %rsi, %r9 / cmp in with in+len
- jle .Lfinished / jump if (in+len <= in)
- addb %al, %dl / y += tx
- movl 0(%rbp,%rdx,8), %ebx / ty = d[y]
- movl %ebx, 0(%rbp,%rcx,8) / d[x] = ty
- addb %al, %bl / val = ty + tx
- movl %eax, 0(%rbp,%rdx,8) / d[y] = tx
- incb %cl / x++ (NEXT ROUND)
- movl 0(%rbp,%rcx,8), %eax / tx = d[x] (NEXT ROUND)
- movb 0(%rbp,%rbx,8), %r8b / val = d[val]
- xorb (%rsi), %r8b / xor 1 byte
- movb %r8b, (%rdi)
- incq %rsi / in++
- incq %rdi / out++
- jmp 1b
- .Lfinished:
- decq %rcx / x--
- movb %dl, -8(%rbp) / key->y = y
- movb %cl, -16(%rbp) / key->x = x
- popq %rbx
- popq %rbp
- ret
- .L_ARCFOUR_end:
- .size ARCFOUR,.L_ARCFOUR_end-ARCFOUR