/net/ipv6/netfilter/nf_conntrack_reasm.c

http://github.com/mirrors/linux · C · 554 lines · 409 code · 88 blank · 57 comment · 62 complexity · f5099007ee6230856e9608a25ac097b2 MD5 · raw file

  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * IPv6 fragment reassembly for connection tracking
  4. *
  5. * Copyright (C)2004 USAGI/WIDE Project
  6. *
  7. * Author:
  8. * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
  9. *
  10. * Based on: net/ipv6/reassembly.c
  11. */
  12. #define pr_fmt(fmt) "IPv6-nf: " fmt
  13. #include <linux/errno.h>
  14. #include <linux/types.h>
  15. #include <linux/string.h>
  16. #include <linux/socket.h>
  17. #include <linux/sockios.h>
  18. #include <linux/jiffies.h>
  19. #include <linux/net.h>
  20. #include <linux/list.h>
  21. #include <linux/netdevice.h>
  22. #include <linux/in6.h>
  23. #include <linux/ipv6.h>
  24. #include <linux/icmpv6.h>
  25. #include <linux/random.h>
  26. #include <linux/slab.h>
  27. #include <net/sock.h>
  28. #include <net/snmp.h>
  29. #include <net/ipv6_frag.h>
  30. #include <net/protocol.h>
  31. #include <net/transp_v6.h>
  32. #include <net/rawv6.h>
  33. #include <net/ndisc.h>
  34. #include <net/addrconf.h>
  35. #include <net/inet_ecn.h>
  36. #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
  37. #include <linux/sysctl.h>
  38. #include <linux/netfilter.h>
  39. #include <linux/netfilter_ipv6.h>
  40. #include <linux/kernel.h>
  41. #include <linux/module.h>
  42. #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
  43. static const char nf_frags_cache_name[] = "nf-frags";
  44. static struct inet_frags nf_frags;
  45. #ifdef CONFIG_SYSCTL
  46. static struct ctl_table nf_ct_frag6_sysctl_table[] = {
  47. {
  48. .procname = "nf_conntrack_frag6_timeout",
  49. .maxlen = sizeof(unsigned int),
  50. .mode = 0644,
  51. .proc_handler = proc_dointvec_jiffies,
  52. },
  53. {
  54. .procname = "nf_conntrack_frag6_low_thresh",
  55. .maxlen = sizeof(unsigned long),
  56. .mode = 0644,
  57. .proc_handler = proc_doulongvec_minmax,
  58. },
  59. {
  60. .procname = "nf_conntrack_frag6_high_thresh",
  61. .maxlen = sizeof(unsigned long),
  62. .mode = 0644,
  63. .proc_handler = proc_doulongvec_minmax,
  64. },
  65. { }
  66. };
  67. static int nf_ct_frag6_sysctl_register(struct net *net)
  68. {
  69. struct ctl_table *table;
  70. struct ctl_table_header *hdr;
  71. table = nf_ct_frag6_sysctl_table;
  72. if (!net_eq(net, &init_net)) {
  73. table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table),
  74. GFP_KERNEL);
  75. if (table == NULL)
  76. goto err_alloc;
  77. }
  78. table[0].data = &net->nf_frag.fqdir->timeout;
  79. table[1].data = &net->nf_frag.fqdir->low_thresh;
  80. table[1].extra2 = &net->nf_frag.fqdir->high_thresh;
  81. table[2].data = &net->nf_frag.fqdir->high_thresh;
  82. table[2].extra1 = &net->nf_frag.fqdir->low_thresh;
  83. table[2].extra2 = &init_net.nf_frag.fqdir->high_thresh;
  84. hdr = register_net_sysctl(net, "net/netfilter", table);
  85. if (hdr == NULL)
  86. goto err_reg;
  87. net->nf_frag_frags_hdr = hdr;
  88. return 0;
  89. err_reg:
  90. if (!net_eq(net, &init_net))
  91. kfree(table);
  92. err_alloc:
  93. return -ENOMEM;
  94. }
  95. static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
  96. {
  97. struct ctl_table *table;
  98. table = net->nf_frag_frags_hdr->ctl_table_arg;
  99. unregister_net_sysctl_table(net->nf_frag_frags_hdr);
  100. if (!net_eq(net, &init_net))
  101. kfree(table);
  102. }
  103. #else
  104. static int nf_ct_frag6_sysctl_register(struct net *net)
  105. {
  106. return 0;
  107. }
  108. static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
  109. {
  110. }
  111. #endif
  112. static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
  113. struct sk_buff *prev_tail, struct net_device *dev);
  114. static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
  115. {
  116. return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
  117. }
  118. static void nf_ct_frag6_expire(struct timer_list *t)
  119. {
  120. struct inet_frag_queue *frag = from_timer(frag, t, timer);
  121. struct frag_queue *fq;
  122. fq = container_of(frag, struct frag_queue, q);
  123. ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
  124. }
  125. /* Creation primitives. */
  126. static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
  127. const struct ipv6hdr *hdr, int iif)
  128. {
  129. struct frag_v6_compare_key key = {
  130. .id = id,
  131. .saddr = hdr->saddr,
  132. .daddr = hdr->daddr,
  133. .user = user,
  134. .iif = iif,
  135. };
  136. struct inet_frag_queue *q;
  137. q = inet_frag_find(net->nf_frag.fqdir, &key);
  138. if (!q)
  139. return NULL;
  140. return container_of(q, struct frag_queue, q);
  141. }
  142. static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
  143. const struct frag_hdr *fhdr, int nhoff)
  144. {
  145. unsigned int payload_len;
  146. struct net_device *dev;
  147. struct sk_buff *prev;
  148. int offset, end, err;
  149. u8 ecn;
  150. if (fq->q.flags & INET_FRAG_COMPLETE) {
  151. pr_debug("Already completed\n");
  152. goto err;
  153. }
  154. payload_len = ntohs(ipv6_hdr(skb)->payload_len);
  155. offset = ntohs(fhdr->frag_off) & ~0x7;
  156. end = offset + (payload_len -
  157. ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
  158. if ((unsigned int)end > IPV6_MAXPLEN) {
  159. pr_debug("offset is too large.\n");
  160. return -EINVAL;
  161. }
  162. ecn = ip6_frag_ecn(ipv6_hdr(skb));
  163. if (skb->ip_summed == CHECKSUM_COMPLETE) {
  164. const unsigned char *nh = skb_network_header(skb);
  165. skb->csum = csum_sub(skb->csum,
  166. csum_partial(nh, (u8 *)(fhdr + 1) - nh,
  167. 0));
  168. }
  169. /* Is this the final fragment? */
  170. if (!(fhdr->frag_off & htons(IP6_MF))) {
  171. /* If we already have some bits beyond end
  172. * or have different end, the segment is corrupted.
  173. */
  174. if (end < fq->q.len ||
  175. ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) {
  176. pr_debug("already received last fragment\n");
  177. goto err;
  178. }
  179. fq->q.flags |= INET_FRAG_LAST_IN;
  180. fq->q.len = end;
  181. } else {
  182. /* Check if the fragment is rounded to 8 bytes.
  183. * Required by the RFC.
  184. */
  185. if (end & 0x7) {
  186. /* RFC2460 says always send parameter problem in
  187. * this case. -DaveM
  188. */
  189. pr_debug("end of fragment not rounded to 8 bytes.\n");
  190. inet_frag_kill(&fq->q);
  191. return -EPROTO;
  192. }
  193. if (end > fq->q.len) {
  194. /* Some bits beyond end -> corruption. */
  195. if (fq->q.flags & INET_FRAG_LAST_IN) {
  196. pr_debug("last packet already reached.\n");
  197. goto err;
  198. }
  199. fq->q.len = end;
  200. }
  201. }
  202. if (end == offset)
  203. goto err;
  204. /* Point into the IP datagram 'data' part. */
  205. if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
  206. pr_debug("queue: message is too short.\n");
  207. goto err;
  208. }
  209. if (pskb_trim_rcsum(skb, end - offset)) {
  210. pr_debug("Can't trim\n");
  211. goto err;
  212. }
  213. /* Note : skb->rbnode and skb->dev share the same location. */
  214. dev = skb->dev;
  215. /* Makes sure compiler wont do silly aliasing games */
  216. barrier();
  217. prev = fq->q.fragments_tail;
  218. err = inet_frag_queue_insert(&fq->q, skb, offset, end);
  219. if (err) {
  220. if (err == IPFRAG_DUP) {
  221. /* No error for duplicates, pretend they got queued. */
  222. kfree_skb(skb);
  223. return -EINPROGRESS;
  224. }
  225. goto insert_error;
  226. }
  227. if (dev)
  228. fq->iif = dev->ifindex;
  229. fq->q.stamp = skb->tstamp;
  230. fq->q.meat += skb->len;
  231. fq->ecn |= ecn;
  232. if (payload_len > fq->q.max_size)
  233. fq->q.max_size = payload_len;
  234. add_frag_mem_limit(fq->q.fqdir, skb->truesize);
  235. /* The first fragment.
  236. * nhoffset is obtained from the first fragment, of course.
  237. */
  238. if (offset == 0) {
  239. fq->nhoffset = nhoff;
  240. fq->q.flags |= INET_FRAG_FIRST_IN;
  241. }
  242. if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
  243. fq->q.meat == fq->q.len) {
  244. unsigned long orefdst = skb->_skb_refdst;
  245. skb->_skb_refdst = 0UL;
  246. err = nf_ct_frag6_reasm(fq, skb, prev, dev);
  247. skb->_skb_refdst = orefdst;
  248. /* After queue has assumed skb ownership, only 0 or
  249. * -EINPROGRESS must be returned.
  250. */
  251. return err ? -EINPROGRESS : 0;
  252. }
  253. skb_dst_drop(skb);
  254. return -EINPROGRESS;
  255. insert_error:
  256. inet_frag_kill(&fq->q);
  257. err:
  258. skb_dst_drop(skb);
  259. return -EINVAL;
  260. }
  261. /*
  262. * Check if this packet is complete.
  263. *
  264. * It is called with locked fq, and caller must check that
  265. * queue is eligible for reassembly i.e. it is not COMPLETE,
  266. * the last and the first frames arrived and all the bits are here.
  267. */
  268. static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
  269. struct sk_buff *prev_tail, struct net_device *dev)
  270. {
  271. void *reasm_data;
  272. int payload_len;
  273. u8 ecn;
  274. inet_frag_kill(&fq->q);
  275. ecn = ip_frag_ecn_table[fq->ecn];
  276. if (unlikely(ecn == 0xff))
  277. goto err;
  278. reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
  279. if (!reasm_data)
  280. goto err;
  281. payload_len = ((skb->data - skb_network_header(skb)) -
  282. sizeof(struct ipv6hdr) + fq->q.len -
  283. sizeof(struct frag_hdr));
  284. if (payload_len > IPV6_MAXPLEN) {
  285. net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
  286. payload_len);
  287. goto err;
  288. }
  289. /* We have to remove fragment header from datagram and to relocate
  290. * header in order to calculate ICV correctly. */
  291. skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0];
  292. memmove(skb->head + sizeof(struct frag_hdr), skb->head,
  293. (skb->data - skb->head) - sizeof(struct frag_hdr));
  294. skb->mac_header += sizeof(struct frag_hdr);
  295. skb->network_header += sizeof(struct frag_hdr);
  296. skb_reset_transport_header(skb);
  297. inet_frag_reasm_finish(&fq->q, skb, reasm_data, false);
  298. skb->ignore_df = 1;
  299. skb->dev = dev;
  300. ipv6_hdr(skb)->payload_len = htons(payload_len);
  301. ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
  302. IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
  303. /* Yes, and fold redundant checksum back. 8) */
  304. if (skb->ip_summed == CHECKSUM_COMPLETE)
  305. skb->csum = csum_partial(skb_network_header(skb),
  306. skb_network_header_len(skb),
  307. skb->csum);
  308. fq->q.rb_fragments = RB_ROOT;
  309. fq->q.fragments_tail = NULL;
  310. fq->q.last_run_head = NULL;
  311. return 0;
  312. err:
  313. inet_frag_kill(&fq->q);
  314. return -EINVAL;
  315. }
  316. /*
  317. * find the header just before Fragment Header.
  318. *
  319. * if success return 0 and set ...
  320. * (*prevhdrp): the value of "Next Header Field" in the header
  321. * just before Fragment Header.
  322. * (*prevhoff): the offset of "Next Header Field" in the header
  323. * just before Fragment Header.
  324. * (*fhoff) : the offset of Fragment Header.
  325. *
  326. * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c
  327. *
  328. */
  329. static int
  330. find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
  331. {
  332. u8 nexthdr = ipv6_hdr(skb)->nexthdr;
  333. const int netoff = skb_network_offset(skb);
  334. u8 prev_nhoff = netoff + offsetof(struct ipv6hdr, nexthdr);
  335. int start = netoff + sizeof(struct ipv6hdr);
  336. int len = skb->len - start;
  337. u8 prevhdr = NEXTHDR_IPV6;
  338. while (nexthdr != NEXTHDR_FRAGMENT) {
  339. struct ipv6_opt_hdr hdr;
  340. int hdrlen;
  341. if (!ipv6_ext_hdr(nexthdr)) {
  342. return -1;
  343. }
  344. if (nexthdr == NEXTHDR_NONE) {
  345. pr_debug("next header is none\n");
  346. return -1;
  347. }
  348. if (len < (int)sizeof(struct ipv6_opt_hdr)) {
  349. pr_debug("too short\n");
  350. return -1;
  351. }
  352. if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
  353. BUG();
  354. if (nexthdr == NEXTHDR_AUTH)
  355. hdrlen = ipv6_authlen(&hdr);
  356. else
  357. hdrlen = ipv6_optlen(&hdr);
  358. prevhdr = nexthdr;
  359. prev_nhoff = start;
  360. nexthdr = hdr.nexthdr;
  361. len -= hdrlen;
  362. start += hdrlen;
  363. }
  364. if (len < 0)
  365. return -1;
  366. *prevhdrp = prevhdr;
  367. *prevhoff = prev_nhoff;
  368. *fhoff = start;
  369. return 0;
  370. }
  371. int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
  372. {
  373. u16 savethdr = skb->transport_header;
  374. int fhoff, nhoff, ret;
  375. struct frag_hdr *fhdr;
  376. struct frag_queue *fq;
  377. struct ipv6hdr *hdr;
  378. u8 prevhdr;
  379. /* Jumbo payload inhibits frag. header */
  380. if (ipv6_hdr(skb)->payload_len == 0) {
  381. pr_debug("payload len = 0\n");
  382. return 0;
  383. }
  384. if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
  385. return 0;
  386. if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr)))
  387. return -ENOMEM;
  388. skb_set_transport_header(skb, fhoff);
  389. hdr = ipv6_hdr(skb);
  390. fhdr = (struct frag_hdr *)skb_transport_header(skb);
  391. skb_orphan(skb);
  392. fq = fq_find(net, fhdr->identification, user, hdr,
  393. skb->dev ? skb->dev->ifindex : 0);
  394. if (fq == NULL) {
  395. pr_debug("Can't find and can't create new queue\n");
  396. return -ENOMEM;
  397. }
  398. spin_lock_bh(&fq->q.lock);
  399. ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
  400. if (ret == -EPROTO) {
  401. skb->transport_header = savethdr;
  402. ret = 0;
  403. }
  404. spin_unlock_bh(&fq->q.lock);
  405. inet_frag_put(&fq->q);
  406. return ret;
  407. }
  408. EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
  409. static int nf_ct_net_init(struct net *net)
  410. {
  411. int res;
  412. res = fqdir_init(&net->nf_frag.fqdir, &nf_frags, net);
  413. if (res < 0)
  414. return res;
  415. net->nf_frag.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
  416. net->nf_frag.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
  417. net->nf_frag.fqdir->timeout = IPV6_FRAG_TIMEOUT;
  418. res = nf_ct_frag6_sysctl_register(net);
  419. if (res < 0)
  420. fqdir_exit(net->nf_frag.fqdir);
  421. return res;
  422. }
  423. static void nf_ct_net_pre_exit(struct net *net)
  424. {
  425. fqdir_pre_exit(net->nf_frag.fqdir);
  426. }
  427. static void nf_ct_net_exit(struct net *net)
  428. {
  429. nf_ct_frags6_sysctl_unregister(net);
  430. fqdir_exit(net->nf_frag.fqdir);
  431. }
  432. static struct pernet_operations nf_ct_net_ops = {
  433. .init = nf_ct_net_init,
  434. .pre_exit = nf_ct_net_pre_exit,
  435. .exit = nf_ct_net_exit,
  436. };
  437. static const struct rhashtable_params nfct_rhash_params = {
  438. .head_offset = offsetof(struct inet_frag_queue, node),
  439. .hashfn = ip6frag_key_hashfn,
  440. .obj_hashfn = ip6frag_obj_hashfn,
  441. .obj_cmpfn = ip6frag_obj_cmpfn,
  442. .automatic_shrinking = true,
  443. };
  444. int nf_ct_frag6_init(void)
  445. {
  446. int ret = 0;
  447. nf_frags.constructor = ip6frag_init;
  448. nf_frags.destructor = NULL;
  449. nf_frags.qsize = sizeof(struct frag_queue);
  450. nf_frags.frag_expire = nf_ct_frag6_expire;
  451. nf_frags.frags_cache_name = nf_frags_cache_name;
  452. nf_frags.rhash_params = nfct_rhash_params;
  453. ret = inet_frags_init(&nf_frags);
  454. if (ret)
  455. goto out;
  456. ret = register_pernet_subsys(&nf_ct_net_ops);
  457. if (ret)
  458. inet_frags_fini(&nf_frags);
  459. out:
  460. return ret;
  461. }
  462. void nf_ct_frag6_cleanup(void)
  463. {
  464. unregister_pernet_subsys(&nf_ct_net_ops);
  465. inet_frags_fini(&nf_frags);
  466. }