/arch/sh/lib/checksum.S

http://github.com/mirrors/linux · Assembly · 414 lines · 335 code · 26 blank · 53 comment · 9 complexity · 69f672ae17917e3f6c56d2ab2143f92d MD5 · raw file

  1. /* SPDX-License-Identifier: GPL-2.0+
  2. *
  3. * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
  4. *
  5. * INET An implementation of the TCP/IP protocol suite for the LINUX
  6. * operating system. INET is implemented using the BSD Socket
  7. * interface as the means of communication with the user level.
  8. *
  9. * IP/TCP/UDP checksumming routines
  10. *
  11. * Authors: Jorge Cwik, <jorge@laser.satlink.net>
  12. * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  13. * Tom May, <ftom@netcom.com>
  14. * Pentium Pro/II routines:
  15. * Alexander Kjeldaas <astor@guardian.no>
  16. * Finn Arne Gangstad <finnag@guardian.no>
  17. * Lots of code moved from tcp.c and ip.c; see those files
  18. * for more names.
  19. *
  20. * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  21. * handling.
  22. * Andi Kleen, add zeroing on error
  23. * converted to pure assembler
  24. *
  25. * SuperH version: Copyright (C) 1999 Niibe Yutaka
  26. */
  27. #include <asm/errno.h>
  28. #include <linux/linkage.h>
  29. /*
  30. * computes a partial checksum, e.g. for TCP/UDP fragments
  31. */
  32. /*
  33. * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
  34. */
  35. .text
  36. ENTRY(csum_partial)
  37. /*
  38. * Experiments with Ethernet and SLIP connections show that buff
  39. * is aligned on either a 2-byte or 4-byte boundary. We get at
  40. * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  41. * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  42. * alignment for the unrolled loop.
  43. */
  44. mov r4, r0
  45. tst #3, r0 ! Check alignment.
  46. bt/s 2f ! Jump if alignment is ok.
  47. mov r4, r7 ! Keep a copy to check for alignment
  48. !
  49. tst #1, r0 ! Check alignment.
  50. bt 21f ! Jump if alignment is boundary of 2bytes.
  51. ! buf is odd
  52. tst r5, r5
  53. add #-1, r5
  54. bt 9f
  55. mov.b @r4+, r0
  56. extu.b r0, r0
  57. addc r0, r6 ! t=0 from previous tst
  58. mov r6, r0
  59. shll8 r6
  60. shlr16 r0
  61. shlr8 r0
  62. or r0, r6
  63. mov r4, r0
  64. tst #2, r0
  65. bt 2f
  66. 21:
  67. ! buf is 2 byte aligned (len could be 0)
  68. add #-2, r5 ! Alignment uses up two bytes.
  69. cmp/pz r5 !
  70. bt/s 1f ! Jump if we had at least two bytes.
  71. clrt
  72. bra 6f
  73. add #2, r5 ! r5 was < 2. Deal with it.
  74. 1:
  75. mov.w @r4+, r0
  76. extu.w r0, r0
  77. addc r0, r6
  78. bf 2f
  79. add #1, r6
  80. 2:
  81. ! buf is 4 byte aligned (len could be 0)
  82. mov r5, r1
  83. mov #-5, r0
  84. shld r0, r1
  85. tst r1, r1
  86. bt/s 4f ! if it's =0, go to 4f
  87. clrt
  88. .align 2
  89. 3:
  90. mov.l @r4+, r0
  91. mov.l @r4+, r2
  92. mov.l @r4+, r3
  93. addc r0, r6
  94. mov.l @r4+, r0
  95. addc r2, r6
  96. mov.l @r4+, r2
  97. addc r3, r6
  98. mov.l @r4+, r3
  99. addc r0, r6
  100. mov.l @r4+, r0
  101. addc r2, r6
  102. mov.l @r4+, r2
  103. addc r3, r6
  104. addc r0, r6
  105. addc r2, r6
  106. movt r0
  107. dt r1
  108. bf/s 3b
  109. cmp/eq #1, r0
  110. ! here, we know r1==0
  111. addc r1, r6 ! add carry to r6
  112. 4:
  113. mov r5, r0
  114. and #0x1c, r0
  115. tst r0, r0
  116. bt 6f
  117. ! 4 bytes or more remaining
  118. mov r0, r1
  119. shlr2 r1
  120. mov #0, r2
  121. 5:
  122. addc r2, r6
  123. mov.l @r4+, r2
  124. movt r0
  125. dt r1
  126. bf/s 5b
  127. cmp/eq #1, r0
  128. addc r2, r6
  129. addc r1, r6 ! r1==0 here, so it means add carry-bit
  130. 6:
  131. ! 3 bytes or less remaining
  132. mov #3, r0
  133. and r0, r5
  134. tst r5, r5
  135. bt 9f ! if it's =0 go to 9f
  136. mov #2, r1
  137. cmp/hs r1, r5
  138. bf 7f
  139. mov.w @r4+, r0
  140. extu.w r0, r0
  141. cmp/eq r1, r5
  142. bt/s 8f
  143. clrt
  144. shll16 r0
  145. addc r0, r6
  146. 7:
  147. mov.b @r4+, r0
  148. extu.b r0, r0
  149. #ifndef __LITTLE_ENDIAN__
  150. shll8 r0
  151. #endif
  152. 8:
  153. addc r0, r6
  154. mov #0, r0
  155. addc r0, r6
  156. 9:
  157. ! Check if the buffer was misaligned, if so realign sum
  158. mov r7, r0
  159. tst #1, r0
  160. bt 10f
  161. mov r6, r0
  162. shll8 r6
  163. shlr16 r0
  164. shlr8 r0
  165. or r0, r6
  166. 10:
  167. rts
  168. mov r6, r0
  169. /*
  170. unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
  171. int sum, int *src_err_ptr, int *dst_err_ptr)
  172. */
  173. /*
  174. * Copy from ds while checksumming, otherwise like csum_partial
  175. *
  176. * The macros SRC and DST specify the type of access for the instruction.
  177. * thus we can call a custom exception handler for all access types.
  178. *
  179. * FIXME: could someone double-check whether I haven't mixed up some SRC and
  180. * DST definitions? It's damn hard to trigger all cases. I hope I got
  181. * them all but there's no guarantee.
  182. */
  183. #define SRC(...) \
  184. 9999: __VA_ARGS__ ; \
  185. .section __ex_table, "a"; \
  186. .long 9999b, 6001f ; \
  187. .previous
  188. #define DST(...) \
  189. 9999: __VA_ARGS__ ; \
  190. .section __ex_table, "a"; \
  191. .long 9999b, 6002f ; \
  192. .previous
  193. !
  194. ! r4: const char *SRC
  195. ! r5: char *DST
  196. ! r6: int LEN
  197. ! r7: int SUM
  198. !
  199. ! on stack:
  200. ! int *SRC_ERR_PTR
  201. ! int *DST_ERR_PTR
  202. !
  203. ENTRY(csum_partial_copy_generic)
  204. mov.l r5,@-r15
  205. mov.l r6,@-r15
  206. mov #3,r0 ! Check src and dest are equally aligned
  207. mov r4,r1
  208. and r0,r1
  209. and r5,r0
  210. cmp/eq r1,r0
  211. bf 3f ! Different alignments, use slow version
  212. tst #1,r0 ! Check dest word aligned
  213. bf 3f ! If not, do it the slow way
  214. mov #2,r0
  215. tst r0,r5 ! Check dest alignment.
  216. bt 2f ! Jump if alignment is ok.
  217. add #-2,r6 ! Alignment uses up two bytes.
  218. cmp/pz r6 ! Jump if we had at least two bytes.
  219. bt/s 1f
  220. clrt
  221. add #2,r6 ! r6 was < 2. Deal with it.
  222. bra 4f
  223. mov r6,r2
  224. 3: ! Handle different src and dest alignments.
  225. ! This is not common, so simple byte by byte copy will do.
  226. mov r6,r2
  227. shlr r6
  228. tst r6,r6
  229. bt 4f
  230. clrt
  231. .align 2
  232. 5:
  233. SRC( mov.b @r4+,r1 )
  234. SRC( mov.b @r4+,r0 )
  235. extu.b r1,r1
  236. DST( mov.b r1,@r5 )
  237. DST( mov.b r0,@(1,r5) )
  238. extu.b r0,r0
  239. add #2,r5
  240. #ifdef __LITTLE_ENDIAN__
  241. shll8 r0
  242. #else
  243. shll8 r1
  244. #endif
  245. or r1,r0
  246. addc r0,r7
  247. movt r0
  248. dt r6
  249. bf/s 5b
  250. cmp/eq #1,r0
  251. mov #0,r0
  252. addc r0, r7
  253. mov r2, r0
  254. tst #1, r0
  255. bt 7f
  256. bra 5f
  257. clrt
  258. ! src and dest equally aligned, but to a two byte boundary.
  259. ! Handle first two bytes as a special case
  260. .align 2
  261. 1:
  262. SRC( mov.w @r4+,r0 )
  263. DST( mov.w r0,@r5 )
  264. add #2,r5
  265. extu.w r0,r0
  266. addc r0,r7
  267. mov #0,r0
  268. addc r0,r7
  269. 2:
  270. mov r6,r2
  271. mov #-5,r0
  272. shld r0,r6
  273. tst r6,r6
  274. bt/s 2f
  275. clrt
  276. .align 2
  277. 1:
  278. SRC( mov.l @r4+,r0 )
  279. SRC( mov.l @r4+,r1 )
  280. addc r0,r7
  281. DST( mov.l r0,@r5 )
  282. DST( mov.l r1,@(4,r5) )
  283. addc r1,r7
  284. SRC( mov.l @r4+,r0 )
  285. SRC( mov.l @r4+,r1 )
  286. addc r0,r7
  287. DST( mov.l r0,@(8,r5) )
  288. DST( mov.l r1,@(12,r5) )
  289. addc r1,r7
  290. SRC( mov.l @r4+,r0 )
  291. SRC( mov.l @r4+,r1 )
  292. addc r0,r7
  293. DST( mov.l r0,@(16,r5) )
  294. DST( mov.l r1,@(20,r5) )
  295. addc r1,r7
  296. SRC( mov.l @r4+,r0 )
  297. SRC( mov.l @r4+,r1 )
  298. addc r0,r7
  299. DST( mov.l r0,@(24,r5) )
  300. DST( mov.l r1,@(28,r5) )
  301. addc r1,r7
  302. add #32,r5
  303. movt r0
  304. dt r6
  305. bf/s 1b
  306. cmp/eq #1,r0
  307. mov #0,r0
  308. addc r0,r7
  309. 2: mov r2,r6
  310. mov #0x1c,r0
  311. and r0,r6
  312. cmp/pl r6
  313. bf/s 4f
  314. clrt
  315. shlr2 r6
  316. 3:
  317. SRC( mov.l @r4+,r0 )
  318. addc r0,r7
  319. DST( mov.l r0,@r5 )
  320. add #4,r5
  321. movt r0
  322. dt r6
  323. bf/s 3b
  324. cmp/eq #1,r0
  325. mov #0,r0
  326. addc r0,r7
  327. 4: mov r2,r6
  328. mov #3,r0
  329. and r0,r6
  330. cmp/pl r6
  331. bf 7f
  332. mov #2,r1
  333. cmp/hs r1,r6
  334. bf 5f
  335. SRC( mov.w @r4+,r0 )
  336. DST( mov.w r0,@r5 )
  337. extu.w r0,r0
  338. add #2,r5
  339. cmp/eq r1,r6
  340. bt/s 6f
  341. clrt
  342. shll16 r0
  343. addc r0,r7
  344. 5:
  345. SRC( mov.b @r4+,r0 )
  346. DST( mov.b r0,@r5 )
  347. extu.b r0,r0
  348. #ifndef __LITTLE_ENDIAN__
  349. shll8 r0
  350. #endif
  351. 6: addc r0,r7
  352. mov #0,r0
  353. addc r0,r7
  354. 7:
  355. 5000:
  356. # Exception handler:
  357. .section .fixup, "ax"
  358. 6001:
  359. mov.l @(8,r15),r0 ! src_err_ptr
  360. mov #-EFAULT,r1
  361. mov.l r1,@r0
  362. ! zero the complete destination - computing the rest
  363. ! is too much work
  364. mov.l @(4,r15),r5 ! dst
  365. mov.l @r15,r6 ! len
  366. mov #0,r7
  367. 1: mov.b r7,@r5
  368. dt r6
  369. bf/s 1b
  370. add #1,r5
  371. mov.l 8000f,r0
  372. jmp @r0
  373. nop
  374. .align 2
  375. 8000: .long 5000b
  376. 6002:
  377. mov.l @(12,r15),r0 ! dst_err_ptr
  378. mov #-EFAULT,r1
  379. mov.l r1,@r0
  380. mov.l 8001f,r0
  381. jmp @r0
  382. nop
  383. .align 2
  384. 8001: .long 5000b
  385. .previous
  386. add #8,r15
  387. rts
  388. mov r7,r0