PageRenderTime 15ms CodeModel.GetById 1ms app.highlight 10ms RepoModel.GetById 1ms app.codeStats 0ms

/arch/sh/lib/checksum.S

http://github.com/mirrors/linux
Assembly | 414 lines | 407 code | 7 blank | 0 comment | 15 complexity | 69f672ae17917e3f6c56d2ab2143f92d MD5 | raw file
  1/* SPDX-License-Identifier: GPL-2.0+
  2 *
  3 * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
  4 *
  5 * INET		An implementation of the TCP/IP protocol suite for the LINUX
  6 *		operating system.  INET is implemented using the  BSD Socket
  7 *		interface as the means of communication with the user level.
  8 *
  9 *		IP/TCP/UDP checksumming routines
 10 *
 11 * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
 12 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 13 *		Tom May, <ftom@netcom.com>
 14 *              Pentium Pro/II routines:
 15 *              Alexander Kjeldaas <astor@guardian.no>
 16 *              Finn Arne Gangstad <finnag@guardian.no>
 17 *		Lots of code moved from tcp.c and ip.c; see those files
 18 *		for more names.
 19 *
 20 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
 21 *			     handling.
 22 *		Andi Kleen,  add zeroing on error
 23 *                   converted to pure assembler
 24 *
 25 * SuperH version:  Copyright (C) 1999  Niibe Yutaka
 26 */
 27
 28#include <asm/errno.h>
 29#include <linux/linkage.h>
 30
 31/*
 32 * computes a partial checksum, e.g. for TCP/UDP fragments
 33 */
 34
 35/*	
 36 * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
 37 */
 38
 39.text
 40ENTRY(csum_partial)
 41	  /*
 42	   * Experiments with Ethernet and SLIP connections show that buff
 43	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
 44	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
 45	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
 46	   * alignment for the unrolled loop.
 47	   */
 48	mov	r4, r0
 49	tst	#3, r0		! Check alignment.
 50	bt/s	2f		! Jump if alignment is ok.
 51	 mov	r4, r7		! Keep a copy to check for alignment
 52	!
 53	tst	#1, r0		! Check alignment.
 54	bt	21f		! Jump if alignment is boundary of 2bytes.
 55
 56	! buf is odd
 57	tst	r5, r5
 58	add	#-1, r5
 59	bt	9f
 60	mov.b	@r4+, r0
 61	extu.b	r0, r0
 62	addc	r0, r6		! t=0 from previous tst
 63	mov	r6, r0
 64	shll8	r6
 65	shlr16	r0
 66	shlr8	r0
 67	or	r0, r6
 68	mov	r4, r0
 69	tst	#2, r0
 70	bt	2f
 7121:
 72	! buf is 2 byte aligned (len could be 0)
 73	add	#-2, r5		! Alignment uses up two bytes.
 74	cmp/pz	r5		!
 75	bt/s	1f		! Jump if we had at least two bytes.
 76	 clrt
 77	bra	6f
 78	 add	#2, r5		! r5 was < 2.  Deal with it.
 791:
 80	mov.w	@r4+, r0
 81	extu.w	r0, r0
 82	addc	r0, r6
 83	bf	2f
 84	add	#1, r6
 852:
 86	! buf is 4 byte aligned (len could be 0)
 87	mov	r5, r1
 88	mov	#-5, r0
 89	shld	r0, r1
 90	tst	r1, r1
 91	bt/s	4f		! if it's =0, go to 4f
 92	 clrt
 93	.align	2
 943:
 95	mov.l	@r4+, r0
 96	mov.l	@r4+, r2
 97	mov.l	@r4+, r3
 98	addc	r0, r6
 99	mov.l	@r4+, r0
100	addc	r2, r6
101	mov.l	@r4+, r2
102	addc	r3, r6
103	mov.l	@r4+, r3
104	addc	r0, r6
105	mov.l	@r4+, r0
106	addc	r2, r6
107	mov.l	@r4+, r2
108	addc	r3, r6
109	addc	r0, r6
110	addc	r2, r6
111	movt	r0
112	dt	r1
113	bf/s	3b
114	 cmp/eq	#1, r0
115	! here, we know r1==0
116	addc	r1, r6			! add carry to r6
1174:
118	mov	r5, r0
119	and	#0x1c, r0
120	tst	r0, r0
121	bt	6f
122	! 4 bytes or more remaining
123	mov	r0, r1
124	shlr2	r1
125	mov	#0, r2
1265:
127	addc	r2, r6
128	mov.l	@r4+, r2
129	movt	r0
130	dt	r1
131	bf/s	5b
132	 cmp/eq	#1, r0
133	addc	r2, r6
134	addc	r1, r6		! r1==0 here, so it means add carry-bit
1356:
136	! 3 bytes or less remaining
137	mov	#3, r0
138	and	r0, r5
139	tst	r5, r5
140	bt	9f		! if it's =0 go to 9f
141	mov	#2, r1
142	cmp/hs  r1, r5
143	bf	7f
144	mov.w	@r4+, r0
145	extu.w	r0, r0
146	cmp/eq	r1, r5
147	bt/s	8f
148	 clrt
149	shll16	r0
150	addc	r0, r6
1517:
152	mov.b	@r4+, r0
153	extu.b	r0, r0
154#ifndef	__LITTLE_ENDIAN__
155	shll8	r0
156#endif
1578:
158	addc	r0, r6
159	mov	#0, r0
160	addc	r0, r6
1619:
162	! Check if the buffer was misaligned, if so realign sum
163	mov	r7, r0
164	tst	#1, r0
165	bt	10f
166	mov	r6, r0
167	shll8	r6
168	shlr16	r0
169	shlr8	r0
170	or	r0, r6
17110:
172	rts
173	 mov	r6, r0
174
175/*
176unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, 
177					int sum, int *src_err_ptr, int *dst_err_ptr)
178 */ 
179
180/*
181 * Copy from ds while checksumming, otherwise like csum_partial
182 *
183 * The macros SRC and DST specify the type of access for the instruction.
184 * thus we can call a custom exception handler for all access types.
185 *
186 * FIXME: could someone double-check whether I haven't mixed up some SRC and
187 *	  DST definitions? It's damn hard to trigger all cases.  I hope I got
188 *	  them all but there's no guarantee.
189 */
190
191#define SRC(...)			\
192	9999: __VA_ARGS__ ;		\
193	.section __ex_table, "a";	\
194	.long 9999b, 6001f	;	\
195	.previous
196
197#define DST(...)			\
198	9999: __VA_ARGS__ ;		\
199	.section __ex_table, "a";	\
200	.long 9999b, 6002f	;	\
201	.previous
202
203!
204! r4:	const char *SRC
205! r5:	char *DST
206! r6:	int LEN
207! r7:	int SUM
208!
209! on stack:
210! int *SRC_ERR_PTR
211! int *DST_ERR_PTR
212!
213ENTRY(csum_partial_copy_generic)
214	mov.l	r5,@-r15
215	mov.l	r6,@-r15
216
217	mov	#3,r0		! Check src and dest are equally aligned
218	mov	r4,r1
219	and	r0,r1
220	and	r5,r0
221	cmp/eq	r1,r0
222	bf	3f		! Different alignments, use slow version
223	tst	#1,r0		! Check dest word aligned
224	bf	3f		! If not, do it the slow way
225
226	mov	#2,r0
227	tst	r0,r5		! Check dest alignment. 
228	bt	2f		! Jump if alignment is ok.
229	add	#-2,r6		! Alignment uses up two bytes.
230	cmp/pz	r6		! Jump if we had at least two bytes.
231	bt/s	1f
232	 clrt
233	add	#2,r6		! r6 was < 2.	Deal with it.
234	bra	4f
235	 mov	r6,r2
236
2373:	! Handle different src and dest alignments.
238	! This is not common, so simple byte by byte copy will do.
239	mov	r6,r2
240	shlr	r6
241	tst	r6,r6
242	bt	4f
243	clrt
244	.align	2
2455:
246SRC(	mov.b	@r4+,r1 	)
247SRC(	mov.b	@r4+,r0		)
248	extu.b	r1,r1
249DST(	mov.b	r1,@r5		)
250DST(	mov.b	r0,@(1,r5)	)
251	extu.b	r0,r0
252	add	#2,r5
253
254#ifdef	__LITTLE_ENDIAN__
255	shll8	r0
256#else
257	shll8	r1
258#endif
259	or	r1,r0
260
261	addc	r0,r7
262	movt	r0
263	dt	r6
264	bf/s	5b
265	 cmp/eq	#1,r0
266	mov	#0,r0
267	addc	r0, r7
268
269	mov	r2, r0
270	tst	#1, r0
271	bt	7f
272	bra	5f
273	 clrt
274
275	! src and dest equally aligned, but to a two byte boundary.
276	! Handle first two bytes as a special case
277	.align	2
2781:	
279SRC(	mov.w	@r4+,r0		)
280DST(	mov.w	r0,@r5		)
281	add	#2,r5
282	extu.w	r0,r0
283	addc	r0,r7
284	mov	#0,r0
285	addc	r0,r7
2862:
287	mov	r6,r2
288	mov	#-5,r0
289	shld	r0,r6
290	tst	r6,r6
291	bt/s	2f
292	 clrt
293	.align	2
2941:	
295SRC(	mov.l	@r4+,r0		)
296SRC(	mov.l	@r4+,r1		)
297	addc	r0,r7
298DST(	mov.l	r0,@r5		)
299DST(	mov.l	r1,@(4,r5)	)
300	addc	r1,r7
301
302SRC(	mov.l	@r4+,r0		)
303SRC(	mov.l	@r4+,r1		)
304	addc	r0,r7
305DST(	mov.l	r0,@(8,r5)	)
306DST(	mov.l	r1,@(12,r5)	)
307	addc	r1,r7
308
309SRC(	mov.l	@r4+,r0 	)
310SRC(	mov.l	@r4+,r1		)
311	addc	r0,r7
312DST(	mov.l	r0,@(16,r5)	)
313DST(	mov.l	r1,@(20,r5)	)
314	addc	r1,r7
315
316SRC(	mov.l	@r4+,r0		)
317SRC(	mov.l	@r4+,r1		)
318	addc	r0,r7
319DST(	mov.l	r0,@(24,r5)	)
320DST(	mov.l	r1,@(28,r5)	)
321	addc	r1,r7
322	add	#32,r5
323	movt	r0
324	dt	r6
325	bf/s	1b
326	 cmp/eq	#1,r0
327	mov	#0,r0
328	addc	r0,r7
329
3302:	mov	r2,r6
331	mov	#0x1c,r0
332	and	r0,r6
333	cmp/pl	r6
334	bf/s	4f
335	 clrt
336	shlr2	r6
3373:	
338SRC(	mov.l	@r4+,r0	)
339	addc	r0,r7
340DST(	mov.l	r0,@r5	)
341	add	#4,r5
342	movt	r0
343	dt	r6
344	bf/s	3b
345	 cmp/eq	#1,r0
346	mov	#0,r0
347	addc	r0,r7
3484:	mov	r2,r6
349	mov	#3,r0
350	and	r0,r6
351	cmp/pl	r6
352	bf	7f
353	mov	#2,r1
354	cmp/hs	r1,r6
355	bf	5f
356SRC(	mov.w	@r4+,r0	)
357DST(	mov.w	r0,@r5	)
358	extu.w	r0,r0
359	add	#2,r5
360	cmp/eq	r1,r6
361	bt/s	6f
362	 clrt
363	shll16	r0
364	addc	r0,r7
3655:	
366SRC(	mov.b	@r4+,r0	)
367DST(	mov.b	r0,@r5	)
368	extu.b	r0,r0
369#ifndef	__LITTLE_ENDIAN__
370	shll8	r0
371#endif
3726:	addc	r0,r7
373	mov	#0,r0
374	addc	r0,r7
3757:
3765000:
377
378# Exception handler:
379.section .fixup, "ax"							
380
3816001:
382	mov.l	@(8,r15),r0			! src_err_ptr
383	mov	#-EFAULT,r1
384	mov.l	r1,@r0
385
386	! zero the complete destination - computing the rest
387	! is too much work 
388	mov.l	@(4,r15),r5		! dst
389	mov.l	@r15,r6			! len
390	mov	#0,r7
3911:	mov.b	r7,@r5
392	dt	r6
393	bf/s	1b
394	 add	#1,r5
395	mov.l	8000f,r0
396	jmp	@r0
397	 nop
398	.align	2
3998000:	.long	5000b
400
4016002:
402	mov.l	@(12,r15),r0			! dst_err_ptr
403	mov	#-EFAULT,r1
404	mov.l	r1,@r0
405	mov.l	8001f,r0
406	jmp	@r0
407	 nop
408	.align	2
4098001:	.long	5000b
410
411.previous
412	add	#8,r15
413	rts
414	 mov	r7,r0