PageRenderTime 157ms CodeModel.GetById 9ms RepoModel.GetById 0ms app.codeStats 0ms

/arch/x86/lib/memset_64.S

https://bitbucket.org/cresqo/cm7-p500-kernel
Assembly | 127 lines | 113 code | 14 blank | 0 comment | 0 complexity | e3a1a9d1e6e6c42f1b2e4cfef7db0a39 MD5 | raw file
Possible License(s): LGPL-2.0, AGPL-1.0, GPL-2.0
  1. /* Copyright 2002 Andi Kleen, SuSE Labs */
  2. #include <linux/linkage.h>
  3. #include <asm/dwarf2.h>
  4. /*
  5. * ISO C memset - set a memory block to a byte value.
  6. *
  7. * rdi destination
  8. * rsi value (char)
  9. * rdx count (bytes)
  10. *
  11. * rax original destination
  12. */
  13. .section .altinstr_replacement, "ax", @progbits
  14. .Lmemset_c:
  15. movq %rdi,%r9
  16. movl %edx,%r8d
  17. andl $7,%r8d
  18. movl %edx,%ecx
  19. shrl $3,%ecx
  20. /* expand byte value */
  21. movzbl %sil,%esi
  22. movabs $0x0101010101010101,%rax
  23. mulq %rsi /* with rax, clobbers rdx */
  24. rep stosq
  25. movl %r8d,%ecx
  26. rep stosb
  27. movq %r9,%rax
  28. ret
  29. .Lmemset_e:
  30. .previous
  31. ENTRY(memset)
  32. ENTRY(__memset)
  33. CFI_STARTPROC
  34. movq %rdi,%r10
  35. movq %rdx,%r11
  36. /* expand byte value */
  37. movzbl %sil,%ecx
  38. movabs $0x0101010101010101,%rax
  39. mul %rcx /* with rax, clobbers rdx */
  40. /* align dst */
  41. movl %edi,%r9d
  42. andl $7,%r9d
  43. jnz .Lbad_alignment
  44. CFI_REMEMBER_STATE
  45. .Lafter_bad_alignment:
  46. movl %r11d,%ecx
  47. shrl $6,%ecx
  48. jz .Lhandle_tail
  49. .p2align 4
  50. .Lloop_64:
  51. decl %ecx
  52. movq %rax,(%rdi)
  53. movq %rax,8(%rdi)
  54. movq %rax,16(%rdi)
  55. movq %rax,24(%rdi)
  56. movq %rax,32(%rdi)
  57. movq %rax,40(%rdi)
  58. movq %rax,48(%rdi)
  59. movq %rax,56(%rdi)
  60. leaq 64(%rdi),%rdi
  61. jnz .Lloop_64
  62. /* Handle tail in loops. The loops should be faster than hard
  63. to predict jump tables. */
  64. .p2align 4
  65. .Lhandle_tail:
  66. movl %r11d,%ecx
  67. andl $63&(~7),%ecx
  68. jz .Lhandle_7
  69. shrl $3,%ecx
  70. .p2align 4
  71. .Lloop_8:
  72. decl %ecx
  73. movq %rax,(%rdi)
  74. leaq 8(%rdi),%rdi
  75. jnz .Lloop_8
  76. .Lhandle_7:
  77. movl %r11d,%ecx
  78. andl $7,%ecx
  79. jz .Lende
  80. .p2align 4
  81. .Lloop_1:
  82. decl %ecx
  83. movb %al,(%rdi)
  84. leaq 1(%rdi),%rdi
  85. jnz .Lloop_1
  86. .Lende:
  87. movq %r10,%rax
  88. ret
  89. CFI_RESTORE_STATE
  90. .Lbad_alignment:
  91. cmpq $7,%r11
  92. jbe .Lhandle_7
  93. movq %rax,(%rdi) /* unaligned store */
  94. movq $8,%r8
  95. subq %r9,%r8
  96. addq %r8,%rdi
  97. subq %r8,%r11
  98. jmp .Lafter_bad_alignment
  99. .Lfinal:
  100. CFI_ENDPROC
  101. ENDPROC(memset)
  102. ENDPROC(__memset)
  103. /* Some CPUs run faster using the string instructions.
  104. It is also a lot simpler. Use this when possible */
  105. #include <asm/cpufeature.h>
  106. .section .altinstructions,"a"
  107. .align 8
  108. .quad memset
  109. .quad .Lmemset_c
  110. .byte X86_FEATURE_REP_GOOD
  111. .byte .Lfinal - memset
  112. .byte .Lmemset_e - .Lmemset_c
  113. .previous