/security/nss/lib/freebl/arcfour-amd64-gas.s

http://github.com/zpao/v8monkey · Assembly · 120 lines · 120 code · 0 blank · 0 comment · 1 complexity · e9c2fc312769e8ca96a1bc8f65c9efb9 MD5 · raw file

  1. # ***** BEGIN LICENSE BLOCK *****
  2. # Version: MPL 1.1/GPL 2.0/LGPL 2.1
  3. #
  4. # The contents of this file are subject to the Mozilla Public License Version
  5. # 1.1 (the "License"); you may not use this file except in compliance with
  6. # the License. You may obtain a copy of the License at
  7. # http://www.mozilla.org/MPL/
  8. #
  9. # Software distributed under the License is distributed on an "AS IS" basis,
  10. # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  11. # for the specific language governing rights and limitations under the
  12. # License.
  13. #
  14. # The Original Code is "Marc Bevand's fast AMD64 ARCFOUR source"
  15. #
  16. # The Initial Developer of the Original Code is
  17. # Marc Bevand <bevand_m@epita.fr> .
  18. # Portions created by the Initial Developer are
  19. # Copyright (C) 2004 the Initial Developer. All Rights Reserved.
  20. #
  21. # Contributor(s):
  22. #
  23. # Alternatively, the contents of this file may be used under the terms of
  24. # either the GNU General Public License Version 2 or later (the "GPL"), or
  25. # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  26. # in which case the provisions of the GPL or the LGPL are applicable instead
  27. # of those above. If you wish to allow use of your version of this file only
  28. # under the terms of either the GPL or the LGPL, and not to allow others to
  29. # use your version of this file under the terms of the MPL, indicate your
  30. # decision by deleting the provisions above and replace them with the notice
  31. # and other provisions required by the GPL or the LGPL. If you do not delete
  32. # the provisions above, a recipient may use your version of this file under
  33. # the terms of any one of the MPL, the GPL or the LGPL.
  34. #
  35. # ***** END LICENSE BLOCK *****
  36. # ** ARCFOUR implementation optimized for AMD64.
  37. # **
  38. # ** The throughput achieved by this code is about 320 MBytes/sec, on
  39. # ** a 1.8 GHz AMD Opteron (rev C0) processor.
  40. .text
  41. .align 16
  42. .globl ARCFOUR
  43. .type ARCFOUR,@function
  44. ARCFOUR:
  45. pushq %rbp
  46. pushq %rbx
  47. movq %rdi, %rbp # key = ARG(key)
  48. movq %rsi, %rbx # rbx = ARG(len)
  49. movq %rdx, %rsi # in = ARG(in)
  50. movq %rcx, %rdi # out = ARG(out)
  51. movq (%rbp), %rcx # x = key->x
  52. movq 8(%rbp), %rdx # y = key->y
  53. addq $16, %rbp # d = key->data
  54. incq %rcx # x++
  55. andq $255, %rcx # x &= 0xff
  56. leaq -8(%rbx,%rsi), %rbx # rbx = in+len-8
  57. movq %rbx, %r9 # tmp = in+len-8
  58. movq 0(%rbp,%rcx,8), %rax # tx = d[x]
  59. cmpq %rsi, %rbx # cmp in with in+len-8
  60. jl .Lend # jump if (in+len-8 < in)
  61. .Lstart:
  62. addq $8, %rsi # increment in
  63. addq $8, %rdi # increment out
  64. # generate the next 8 bytes of the rc4 stream into %r8
  65. movq $8, %r11 # byte counter
  66. 1: addb %al, %dl # y += tx
  67. movl 0(%rbp,%rdx,8), %ebx # ty = d[y]
  68. movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty
  69. addb %al, %bl # val = ty + tx
  70. movl %eax, 0(%rbp,%rdx,8) # d[y] = tx
  71. incb %cl # x++ (NEXT ROUND)
  72. movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND)
  73. movb 0(%rbp,%rbx,8), %r8b # val = d[val]
  74. decb %r11b
  75. rorq $8, %r8 # (ror does not change ZF)
  76. jnz 1b
  77. # xor 8 bytes
  78. xorq -8(%rsi), %r8
  79. cmpq %r9, %rsi # cmp in+len-8 with in
  80. movq %r8, -8(%rdi)
  81. jle .Lstart # jump if (in <= in+len-8)
  82. .Lend:
  83. addq $8, %r9 # tmp = in+len
  84. # handle the last bytes, one by one
  85. 1: cmpq %rsi, %r9 # cmp in with in+len
  86. jle .Lfinished # jump if (in+len <= in)
  87. addb %al, %dl # y += tx
  88. movl 0(%rbp,%rdx,8), %ebx # ty = d[y]
  89. movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty
  90. addb %al, %bl # val = ty + tx
  91. movl %eax, 0(%rbp,%rdx,8) # d[y] = tx
  92. incb %cl # x++ (NEXT ROUND)
  93. movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND)
  94. movb 0(%rbp,%rbx,8), %r8b # val = d[val]
  95. xorb (%rsi), %r8b # xor 1 byte
  96. movb %r8b, (%rdi)
  97. incq %rsi # in++
  98. incq %rdi # out++
  99. jmp 1b
  100. .Lfinished:
  101. decq %rcx # x--
  102. movb %dl, -8(%rbp) # key->y = y
  103. movb %cl, -16(%rbp) # key->x = x
  104. popq %rbx
  105. popq %rbp
  106. ret
  107. .L_ARCFOUR_end:
  108. .size ARCFOUR,.L_ARCFOUR_end-ARCFOUR
  109. # Magic indicating no need for an executable stack
  110. .section .note.GNU-stack,"",@progbits
  111. .previous