/security/nss/lib/freebl/arcfour-amd64-gas.s
http://github.com/zpao/v8monkey · Assembly · 120 lines · 120 code · 0 blank · 0 comment · 1 complexity · e9c2fc312769e8ca96a1bc8f65c9efb9 MD5 · raw file
- # ***** BEGIN LICENSE BLOCK *****
- # Version: MPL 1.1/GPL 2.0/LGPL 2.1
- #
- # The contents of this file are subject to the Mozilla Public License Version
- # 1.1 (the "License"); you may not use this file except in compliance with
- # the License. You may obtain a copy of the License at
- # http://www.mozilla.org/MPL/
- #
- # Software distributed under the License is distributed on an "AS IS" basis,
- # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- # for the specific language governing rights and limitations under the
- # License.
- #
- # The Original Code is "Marc Bevand's fast AMD64 ARCFOUR source"
- #
- # The Initial Developer of the Original Code is
- # Marc Bevand <bevand_m@epita.fr> .
- # Portions created by the Initial Developer are
- # Copyright (C) 2004 the Initial Developer. All Rights Reserved.
- #
- # Contributor(s):
- #
- # Alternatively, the contents of this file may be used under the terms of
- # either the GNU General Public License Version 2 or later (the "GPL"), or
- # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- # in which case the provisions of the GPL or the LGPL are applicable instead
- # of those above. If you wish to allow use of your version of this file only
- # under the terms of either the GPL or the LGPL, and not to allow others to
- # use your version of this file under the terms of the MPL, indicate your
- # decision by deleting the provisions above and replace them with the notice
- # and other provisions required by the GPL or the LGPL. If you do not delete
- # the provisions above, a recipient may use your version of this file under
- # the terms of any one of the MPL, the GPL or the LGPL.
- #
- # ***** END LICENSE BLOCK *****
- # ** ARCFOUR implementation optimized for AMD64.
- # **
- # ** The throughput achieved by this code is about 320 MBytes/sec, on
- # ** a 1.8 GHz AMD Opteron (rev C0) processor.
- .text
- .align 16
- .globl ARCFOUR
- .type ARCFOUR,@function
- ARCFOUR:
- pushq %rbp
- pushq %rbx
- movq %rdi, %rbp # key = ARG(key)
- movq %rsi, %rbx # rbx = ARG(len)
- movq %rdx, %rsi # in = ARG(in)
- movq %rcx, %rdi # out = ARG(out)
- movq (%rbp), %rcx # x = key->x
- movq 8(%rbp), %rdx # y = key->y
- addq $16, %rbp # d = key->data
- incq %rcx # x++
- andq $255, %rcx # x &= 0xff
- leaq -8(%rbx,%rsi), %rbx # rbx = in+len-8
- movq %rbx, %r9 # tmp = in+len-8
- movq 0(%rbp,%rcx,8), %rax # tx = d[x]
- cmpq %rsi, %rbx # cmp in with in+len-8
- jl .Lend # jump if (in+len-8 < in)
- .Lstart:
- addq $8, %rsi # increment in
- addq $8, %rdi # increment out
- # generate the next 8 bytes of the rc4 stream into %r8
- movq $8, %r11 # byte counter
- 1: addb %al, %dl # y += tx
- movl 0(%rbp,%rdx,8), %ebx # ty = d[y]
- movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty
- addb %al, %bl # val = ty + tx
- movl %eax, 0(%rbp,%rdx,8) # d[y] = tx
- incb %cl # x++ (NEXT ROUND)
- movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND)
- movb 0(%rbp,%rbx,8), %r8b # val = d[val]
- decb %r11b
- rorq $8, %r8 # (ror does not change ZF)
- jnz 1b
- # xor 8 bytes
- xorq -8(%rsi), %r8
- cmpq %r9, %rsi # cmp in+len-8 with in
- movq %r8, -8(%rdi)
- jle .Lstart # jump if (in <= in+len-8)
- .Lend:
- addq $8, %r9 # tmp = in+len
- # handle the last bytes, one by one
- 1: cmpq %rsi, %r9 # cmp in with in+len
- jle .Lfinished # jump if (in+len <= in)
- addb %al, %dl # y += tx
- movl 0(%rbp,%rdx,8), %ebx # ty = d[y]
- movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty
- addb %al, %bl # val = ty + tx
- movl %eax, 0(%rbp,%rdx,8) # d[y] = tx
- incb %cl # x++ (NEXT ROUND)
- movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND)
- movb 0(%rbp,%rbx,8), %r8b # val = d[val]
- xorb (%rsi), %r8b # xor 1 byte
- movb %r8b, (%rdi)
- incq %rsi # in++
- incq %rdi # out++
- jmp 1b
- .Lfinished:
- decq %rcx # x--
- movb %dl, -8(%rbp) # key->y = y
- movb %cl, -16(%rbp) # key->x = x
- popq %rbx
- popq %rbp
- ret
- .L_ARCFOUR_end:
- .size ARCFOUR,.L_ARCFOUR_end-ARCFOUR
- # Magic indicating no need for an executable stack
- .section .note.GNU-stack,"",@progbits
- .previous