PageRenderTime 27ms CodeModel.GetById 18ms app.highlight 5ms RepoModel.GetById 1ms app.codeStats 1ms

/arch/x86_64/lib/copy_page.S

https://bitbucket.org/evzijst/gittest
Assembly | 101 lines | 82 code | 19 blank | 0 comment | 0 complexity | 37d6474207b19db73bda041962164650 MD5 | raw file
  1/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
  2	
  3/* Don't use streaming store because it's better when the target
  4   ends up in cache. */
  5	    
  6/* Could vary the prefetch distance based on SMP/UP */
  7
  8	.globl copy_page
  9	.p2align 4
 10copy_page:
 11	subq	$3*8,%rsp
 12	movq	%rbx,(%rsp)
 13	movq	%r12,1*8(%rsp)
 14	movq	%r13,2*8(%rsp)
 15			
 16	movl	$(4096/64)-5,%ecx
 17	.p2align 4
 18.Loop64:	
 19  	dec     %rcx
 20
 21	movq        (%rsi), %rax
 22	movq      8 (%rsi), %rbx
 23	movq     16 (%rsi), %rdx
 24	movq     24 (%rsi), %r8
 25	movq     32 (%rsi), %r9
 26	movq     40 (%rsi), %r10
 27	movq     48 (%rsi), %r11
 28	movq     56 (%rsi), %r12
 29
 30	prefetcht0 5*64(%rsi)
 31
 32	movq     %rax,    (%rdi)
 33	movq     %rbx,  8 (%rdi)
 34	movq     %rdx, 16 (%rdi)
 35	movq     %r8,  24 (%rdi)
 36	movq     %r9,  32 (%rdi)
 37	movq     %r10, 40 (%rdi)
 38	movq     %r11, 48 (%rdi)
 39	movq     %r12, 56 (%rdi)
 40
 41	leaq    64 (%rsi), %rsi
 42	leaq    64 (%rdi), %rdi
 43
 44	jnz     .Loop64
 45
 46	movl	$5,%ecx
 47	.p2align 4
 48.Loop2:	
 49	decl   %ecx
 50
 51	movq        (%rsi), %rax
 52	movq      8 (%rsi), %rbx
 53	movq     16 (%rsi), %rdx
 54	movq     24 (%rsi), %r8
 55	movq     32 (%rsi), %r9
 56	movq     40 (%rsi), %r10
 57	movq     48 (%rsi), %r11
 58	movq     56 (%rsi), %r12
 59
 60	movq     %rax,    (%rdi)
 61	movq     %rbx,  8 (%rdi)
 62	movq     %rdx, 16 (%rdi)
 63	movq     %r8,  24 (%rdi)
 64	movq     %r9,  32 (%rdi)
 65	movq     %r10, 40 (%rdi)
 66	movq     %r11, 48 (%rdi)
 67	movq     %r12, 56 (%rdi)
 68	
 69	leaq	64(%rdi),%rdi			
 70	leaq	64(%rsi),%rsi			
 71	
 72	jnz	.Loop2		
 73	
 74	movq	(%rsp),%rbx
 75	movq	1*8(%rsp),%r12
 76	movq	2*8(%rsp),%r13
 77	addq	$3*8,%rsp
 78	ret
 79	
 80	/* C stepping K8 run faster using the string copy instructions.
 81	   It is also a lot simpler. Use this when possible */
 82
 83#include <asm/cpufeature.h>		
 84		
 85	.section .altinstructions,"a"
 86	.align 8
 87	.quad  copy_page
 88	.quad  copy_page_c
 89	.byte  X86_FEATURE_K8_C
 90	.byte  copy_page_c_end-copy_page_c
 91	.byte  copy_page_c_end-copy_page_c
 92	.previous
 93
 94	.section .altinstr_replacement,"ax"
 95copy_page_c:
 96	movl $4096/8,%ecx
 97	rep 
 98	movsq 
 99	ret
100copy_page_c_end:
101	.previous