PageRenderTime 21ms CodeModel.GetById 14ms app.highlight 4ms RepoModel.GetById 1ms app.codeStats 0ms

/arch/frv/lib/memset.S

https://bitbucket.org/evzijst/gittest
Assembly | 182 lines | 179 code | 3 blank | 0 comment | 0 complexity | 0218cda1ae64a6b03ceaea691aecc3be MD5 | raw file
  1/* memset.S: optimised assembly memset
  2 *
  3 * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
  4 * Written by David Howells (dhowells@redhat.com)
  5 *
  6 * This program is free software; you can redistribute it and/or
  7 * modify it under the terms of the GNU General Public License
  8 * as published by the Free Software Foundation; either version
  9 * 2 of the License, or (at your option) any later version.
 10 */
 11
 12
 13        .text
 14        .p2align	4
 15
 16###############################################################################
 17#
 18# void *memset(void *p, char ch, size_t count)
 19#
 20# - NOTE: must not use any stack. exception detection performs function return
 21#         to caller's fixup routine, aborting the remainder of the set
 22#         GR4, GR7, GR8, and GR11 must be managed
 23#
 24###############################################################################
 25        .globl		memset,__memset_end
 26        .type		memset,@function
 27memset:
 28	orcc.p		gr10,gr0,gr5,icc3		; GR5 = count
 29	andi		gr9,#0xff,gr9
 30	or.p		gr8,gr0,gr4			; GR4 = address
 31	beqlr		icc3,#0
 32
 33	# conditionally write a byte to 2b-align the address
 34	setlos.p	#1,gr6
 35	andicc		gr4,#1,gr0,icc0
 36	ckne		icc0,cc7
 37	cstb.p		gr9,@(gr4,gr0)		,cc7,#1
 38	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
 39	cadd.p		gr4,gr6,gr4		,cc7,#1
 40	beqlr		icc3,#0
 41
 42	# conditionally write a word to 4b-align the address
 43	andicc.p	gr4,#2,gr0,icc0
 44	subicc		gr5,#2,gr0,icc1
 45	setlos.p	#2,gr6
 46	ckne		icc0,cc7
 47	slli.p		gr9,#8,gr12			; need to double up the pattern
 48	cknc		icc1,cc5
 49	or.p		gr9,gr12,gr12
 50	andcr		cc7,cc5,cc7
 51
 52	csth.p		gr12,@(gr4,gr0)		,cc7,#1
 53	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
 54	cadd.p		gr4,gr6,gr4		,cc7,#1
 55	beqlr		icc3,#0
 56
 57	# conditionally write a dword to 8b-align the address
 58	andicc.p	gr4,#4,gr0,icc0
 59	subicc		gr5,#4,gr0,icc1
 60	setlos.p	#4,gr6
 61	ckne		icc0,cc7
 62	slli.p		gr12,#16,gr13			; need to quadruple-up the pattern
 63	cknc		icc1,cc5
 64	or.p		gr13,gr12,gr12
 65	andcr		cc7,cc5,cc7
 66
 67	cst.p		gr12,@(gr4,gr0)		,cc7,#1
 68	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
 69	cadd.p		gr4,gr6,gr4		,cc7,#1
 70	beqlr		icc3,#0
 71
 72	or.p		gr12,gr12,gr13			; need to octuple-up the pattern
 73
 74	# the address is now 8b-aligned - loop around writing 64b chunks
 75	setlos		#8,gr7
 76	subi.p		gr4,#8,gr4			; store with update index does weird stuff
 77	setlos		#64,gr6
 78
 79	subicc		gr5,#64,gr0,icc0
 800:	cknc		icc0,cc7
 81	cstdu		gr12,@(gr4,gr7)		,cc7,#1
 82	cstdu		gr12,@(gr4,gr7)		,cc7,#1
 83	cstdu		gr12,@(gr4,gr7)		,cc7,#1
 84	cstdu		gr12,@(gr4,gr7)		,cc7,#1
 85	cstdu		gr12,@(gr4,gr7)		,cc7,#1
 86	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
 87	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
 88	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
 89	subicc		gr5,#64,gr0,icc0
 90	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
 91	beqlr		icc3,#0
 92	bnc		icc0,#2,0b
 93
 94	# now do 32-byte remnant
 95	subicc.p	gr5,#32,gr0,icc0
 96	setlos		#32,gr6
 97	cknc		icc0,cc7
 98	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
 99	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
100	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
101	setlos		#16,gr6
102	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
103	subicc		gr5,#16,gr0,icc0
104	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
105	beqlr		icc3,#0
106
107	# now do 16-byte remnant
108	cknc		icc0,cc7
109	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
110	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
111	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
112	beqlr		icc3,#0
113
114	# now do 8-byte remnant
115	subicc		gr5,#8,gr0,icc1
116	cknc		icc1,cc7
117	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
118	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
119	setlos.p	#4,gr7
120	beqlr		icc3,#0
121
122	# now do 4-byte remnant
123	subicc		gr5,#4,gr0,icc0
124	addi.p		gr4,#4,gr4
125	cknc		icc0,cc7
126	cstu.p		gr12,@(gr4,gr7)		,cc7,#1
127	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
128	subicc.p	gr5,#2,gr0,icc1
129	beqlr		icc3,#0
130
131	# now do 2-byte remnant
132	setlos		#2,gr7
133	addi.p		gr4,#2,gr4
134	cknc		icc1,cc7
135	csthu.p		gr12,@(gr4,gr7)		,cc7,#1
136	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
137	subicc.p	gr5,#1,gr0,icc0
138	beqlr		icc3,#0
139
140	# now do 1-byte remnant
141	setlos		#0,gr7
142	addi.p		gr4,#2,gr4
143	cknc		icc0,cc7
144	cstb.p		gr12,@(gr4,gr0)		,cc7,#1
145	bralr
146__memset_end:
147
148	.size		memset, __memset_end-memset
149
150###############################################################################
151#
152# clear memory in userspace
153# - return the number of bytes that could not be cleared (0 on complete success)
154#
155# long __memset_user(void *p, size_t count)
156#
157###############################################################################
158        .globl		__memset_user, __memset_user_error_lr, __memset_user_error_handler
159        .type		__memset_user,@function
160__memset_user:
161	movsg		lr,gr11
162
163	# abuse memset to do the dirty work
164	or.p		gr9,gr9,gr10
165	setlos		#0,gr9
166	call		memset
167__memset_user_error_lr:
168	jmpl.p		@(gr11,gr0)
169	setlos		#0,gr8
170
171	# deal any exception generated by memset
172	# GR4  - memset's address tracking pointer
173	# GR7  - memset's step value (index register for store insns)
174	# GR8  - memset's original start address
175	# GR10 - memset's original count
176__memset_user_error_handler:
177	add.p		gr4,gr7,gr4
178	add		gr8,gr10,gr8
179	jmpl.p		@(gr11,gr0)
180	sub		gr8,gr4,gr8		; we return the amount left uncleared
181
182	.size		__memset_user, .-__memset_user