/net/ipv6/reassembly.c

http://github.com/mirrors/linux · C · 592 lines · 439 code · 96 blank · 57 comment · 52 complexity · 0ba7bdf8570dbad344cf8f971157e73b MD5 · raw file

  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * IPv6 fragment reassembly
  4. * Linux INET6 implementation
  5. *
  6. * Authors:
  7. * Pedro Roque <roque@di.fc.ul.pt>
  8. *
  9. * Based on: net/ipv4/ip_fragment.c
  10. */
  11. /*
  12. * Fixes:
  13. * Andi Kleen Make it work with multiple hosts.
  14. * More RFC compliance.
  15. *
  16. * Horst von Brand Add missing #include <linux/string.h>
  17. * Alexey Kuznetsov SMP races, threading, cleanup.
  18. * Patrick McHardy LRU queue of frag heads for evictor.
  19. * Mitsuru KANDA @USAGI Register inet6_protocol{}.
  20. * David Stevens and
  21. * YOSHIFUJI,H. @USAGI Always remove fragment header to
  22. * calculate ICV correctly.
  23. */
  24. #define pr_fmt(fmt) "IPv6: " fmt
  25. #include <linux/errno.h>
  26. #include <linux/types.h>
  27. #include <linux/string.h>
  28. #include <linux/socket.h>
  29. #include <linux/sockios.h>
  30. #include <linux/jiffies.h>
  31. #include <linux/net.h>
  32. #include <linux/list.h>
  33. #include <linux/netdevice.h>
  34. #include <linux/in6.h>
  35. #include <linux/ipv6.h>
  36. #include <linux/icmpv6.h>
  37. #include <linux/random.h>
  38. #include <linux/jhash.h>
  39. #include <linux/skbuff.h>
  40. #include <linux/slab.h>
  41. #include <linux/export.h>
  42. #include <net/sock.h>
  43. #include <net/snmp.h>
  44. #include <net/ipv6.h>
  45. #include <net/ip6_route.h>
  46. #include <net/protocol.h>
  47. #include <net/transp_v6.h>
  48. #include <net/rawv6.h>
  49. #include <net/ndisc.h>
  50. #include <net/addrconf.h>
  51. #include <net/ipv6_frag.h>
  52. #include <net/inet_ecn.h>
  53. static const char ip6_frag_cache_name[] = "ip6-frags";
  54. static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
  55. {
  56. return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
  57. }
  58. static struct inet_frags ip6_frags;
  59. static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
  60. struct sk_buff *prev_tail, struct net_device *dev);
  61. static void ip6_frag_expire(struct timer_list *t)
  62. {
  63. struct inet_frag_queue *frag = from_timer(frag, t, timer);
  64. struct frag_queue *fq;
  65. fq = container_of(frag, struct frag_queue, q);
  66. ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
  67. }
  68. static struct frag_queue *
  69. fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
  70. {
  71. struct frag_v6_compare_key key = {
  72. .id = id,
  73. .saddr = hdr->saddr,
  74. .daddr = hdr->daddr,
  75. .user = IP6_DEFRAG_LOCAL_DELIVER,
  76. .iif = iif,
  77. };
  78. struct inet_frag_queue *q;
  79. if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
  80. IPV6_ADDR_LINKLOCAL)))
  81. key.iif = 0;
  82. q = inet_frag_find(net->ipv6.fqdir, &key);
  83. if (!q)
  84. return NULL;
  85. return container_of(q, struct frag_queue, q);
  86. }
  87. static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
  88. struct frag_hdr *fhdr, int nhoff,
  89. u32 *prob_offset)
  90. {
  91. struct net *net = dev_net(skb_dst(skb)->dev);
  92. int offset, end, fragsize;
  93. struct sk_buff *prev_tail;
  94. struct net_device *dev;
  95. int err = -ENOENT;
  96. u8 ecn;
  97. if (fq->q.flags & INET_FRAG_COMPLETE)
  98. goto err;
  99. err = -EINVAL;
  100. offset = ntohs(fhdr->frag_off) & ~0x7;
  101. end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
  102. ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
  103. if ((unsigned int)end > IPV6_MAXPLEN) {
  104. *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
  105. /* note that if prob_offset is set, the skb is freed elsewhere,
  106. * we do not free it here.
  107. */
  108. return -1;
  109. }
  110. ecn = ip6_frag_ecn(ipv6_hdr(skb));
  111. if (skb->ip_summed == CHECKSUM_COMPLETE) {
  112. const unsigned char *nh = skb_network_header(skb);
  113. skb->csum = csum_sub(skb->csum,
  114. csum_partial(nh, (u8 *)(fhdr + 1) - nh,
  115. 0));
  116. }
  117. /* Is this the final fragment? */
  118. if (!(fhdr->frag_off & htons(IP6_MF))) {
  119. /* If we already have some bits beyond end
  120. * or have different end, the segment is corrupted.
  121. */
  122. if (end < fq->q.len ||
  123. ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
  124. goto discard_fq;
  125. fq->q.flags |= INET_FRAG_LAST_IN;
  126. fq->q.len = end;
  127. } else {
  128. /* Check if the fragment is rounded to 8 bytes.
  129. * Required by the RFC.
  130. */
  131. if (end & 0x7) {
  132. /* RFC2460 says always send parameter problem in
  133. * this case. -DaveM
  134. */
  135. *prob_offset = offsetof(struct ipv6hdr, payload_len);
  136. return -1;
  137. }
  138. if (end > fq->q.len) {
  139. /* Some bits beyond end -> corruption. */
  140. if (fq->q.flags & INET_FRAG_LAST_IN)
  141. goto discard_fq;
  142. fq->q.len = end;
  143. }
  144. }
  145. if (end == offset)
  146. goto discard_fq;
  147. err = -ENOMEM;
  148. /* Point into the IP datagram 'data' part. */
  149. if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
  150. goto discard_fq;
  151. err = pskb_trim_rcsum(skb, end - offset);
  152. if (err)
  153. goto discard_fq;
  154. /* Note : skb->rbnode and skb->dev share the same location. */
  155. dev = skb->dev;
  156. /* Makes sure compiler wont do silly aliasing games */
  157. barrier();
  158. prev_tail = fq->q.fragments_tail;
  159. err = inet_frag_queue_insert(&fq->q, skb, offset, end);
  160. if (err)
  161. goto insert_error;
  162. if (dev)
  163. fq->iif = dev->ifindex;
  164. fq->q.stamp = skb->tstamp;
  165. fq->q.meat += skb->len;
  166. fq->ecn |= ecn;
  167. add_frag_mem_limit(fq->q.fqdir, skb->truesize);
  168. fragsize = -skb_network_offset(skb) + skb->len;
  169. if (fragsize > fq->q.max_size)
  170. fq->q.max_size = fragsize;
  171. /* The first fragment.
  172. * nhoffset is obtained from the first fragment, of course.
  173. */
  174. if (offset == 0) {
  175. fq->nhoffset = nhoff;
  176. fq->q.flags |= INET_FRAG_FIRST_IN;
  177. }
  178. if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
  179. fq->q.meat == fq->q.len) {
  180. unsigned long orefdst = skb->_skb_refdst;
  181. skb->_skb_refdst = 0UL;
  182. err = ip6_frag_reasm(fq, skb, prev_tail, dev);
  183. skb->_skb_refdst = orefdst;
  184. return err;
  185. }
  186. skb_dst_drop(skb);
  187. return -EINPROGRESS;
  188. insert_error:
  189. if (err == IPFRAG_DUP) {
  190. kfree_skb(skb);
  191. return -EINVAL;
  192. }
  193. err = -EINVAL;
  194. __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
  195. IPSTATS_MIB_REASM_OVERLAPS);
  196. discard_fq:
  197. inet_frag_kill(&fq->q);
  198. __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
  199. IPSTATS_MIB_REASMFAILS);
  200. err:
  201. kfree_skb(skb);
  202. return err;
  203. }
  204. /*
  205. * Check if this packet is complete.
  206. *
  207. * It is called with locked fq, and caller must check that
  208. * queue is eligible for reassembly i.e. it is not COMPLETE,
  209. * the last and the first frames arrived and all the bits are here.
  210. */
  211. static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
  212. struct sk_buff *prev_tail, struct net_device *dev)
  213. {
  214. struct net *net = fq->q.fqdir->net;
  215. unsigned int nhoff;
  216. void *reasm_data;
  217. int payload_len;
  218. u8 ecn;
  219. inet_frag_kill(&fq->q);
  220. ecn = ip_frag_ecn_table[fq->ecn];
  221. if (unlikely(ecn == 0xff))
  222. goto out_fail;
  223. reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
  224. if (!reasm_data)
  225. goto out_oom;
  226. payload_len = ((skb->data - skb_network_header(skb)) -
  227. sizeof(struct ipv6hdr) + fq->q.len -
  228. sizeof(struct frag_hdr));
  229. if (payload_len > IPV6_MAXPLEN)
  230. goto out_oversize;
  231. /* We have to remove fragment header from datagram and to relocate
  232. * header in order to calculate ICV correctly. */
  233. nhoff = fq->nhoffset;
  234. skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0];
  235. memmove(skb->head + sizeof(struct frag_hdr), skb->head,
  236. (skb->data - skb->head) - sizeof(struct frag_hdr));
  237. if (skb_mac_header_was_set(skb))
  238. skb->mac_header += sizeof(struct frag_hdr);
  239. skb->network_header += sizeof(struct frag_hdr);
  240. skb_reset_transport_header(skb);
  241. inet_frag_reasm_finish(&fq->q, skb, reasm_data, true);
  242. skb->dev = dev;
  243. ipv6_hdr(skb)->payload_len = htons(payload_len);
  244. ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
  245. IP6CB(skb)->nhoff = nhoff;
  246. IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
  247. IP6CB(skb)->frag_max_size = fq->q.max_size;
  248. /* Yes, and fold redundant checksum back. 8) */
  249. skb_postpush_rcsum(skb, skb_network_header(skb),
  250. skb_network_header_len(skb));
  251. rcu_read_lock();
  252. __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS);
  253. rcu_read_unlock();
  254. fq->q.rb_fragments = RB_ROOT;
  255. fq->q.fragments_tail = NULL;
  256. fq->q.last_run_head = NULL;
  257. return 1;
  258. out_oversize:
  259. net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len);
  260. goto out_fail;
  261. out_oom:
  262. net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
  263. out_fail:
  264. rcu_read_lock();
  265. __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS);
  266. rcu_read_unlock();
  267. inet_frag_kill(&fq->q);
  268. return -1;
  269. }
  270. static int ipv6_frag_rcv(struct sk_buff *skb)
  271. {
  272. struct frag_hdr *fhdr;
  273. struct frag_queue *fq;
  274. const struct ipv6hdr *hdr = ipv6_hdr(skb);
  275. struct net *net = dev_net(skb_dst(skb)->dev);
  276. int iif;
  277. if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
  278. goto fail_hdr;
  279. __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
  280. /* Jumbo payload inhibits frag. header */
  281. if (hdr->payload_len == 0)
  282. goto fail_hdr;
  283. if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
  284. sizeof(struct frag_hdr))))
  285. goto fail_hdr;
  286. hdr = ipv6_hdr(skb);
  287. fhdr = (struct frag_hdr *)skb_transport_header(skb);
  288. if (!(fhdr->frag_off & htons(0xFFF9))) {
  289. /* It is not a fragmented frame */
  290. skb->transport_header += sizeof(struct frag_hdr);
  291. __IP6_INC_STATS(net,
  292. ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
  293. IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
  294. IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
  295. return 1;
  296. }
  297. iif = skb->dev ? skb->dev->ifindex : 0;
  298. fq = fq_find(net, fhdr->identification, hdr, iif);
  299. if (fq) {
  300. u32 prob_offset = 0;
  301. int ret;
  302. spin_lock(&fq->q.lock);
  303. fq->iif = iif;
  304. ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff,
  305. &prob_offset);
  306. spin_unlock(&fq->q.lock);
  307. inet_frag_put(&fq->q);
  308. if (prob_offset) {
  309. __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
  310. IPSTATS_MIB_INHDRERRORS);
  311. /* icmpv6_param_prob() calls kfree_skb(skb) */
  312. icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
  313. }
  314. return ret;
  315. }
  316. __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
  317. kfree_skb(skb);
  318. return -1;
  319. fail_hdr:
  320. __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
  321. IPSTATS_MIB_INHDRERRORS);
  322. icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
  323. return -1;
  324. }
  325. static const struct inet6_protocol frag_protocol = {
  326. .handler = ipv6_frag_rcv,
  327. .flags = INET6_PROTO_NOPOLICY,
  328. };
  329. #ifdef CONFIG_SYSCTL
  330. static struct ctl_table ip6_frags_ns_ctl_table[] = {
  331. {
  332. .procname = "ip6frag_high_thresh",
  333. .maxlen = sizeof(unsigned long),
  334. .mode = 0644,
  335. .proc_handler = proc_doulongvec_minmax,
  336. },
  337. {
  338. .procname = "ip6frag_low_thresh",
  339. .maxlen = sizeof(unsigned long),
  340. .mode = 0644,
  341. .proc_handler = proc_doulongvec_minmax,
  342. },
  343. {
  344. .procname = "ip6frag_time",
  345. .maxlen = sizeof(int),
  346. .mode = 0644,
  347. .proc_handler = proc_dointvec_jiffies,
  348. },
  349. { }
  350. };
  351. /* secret interval has been deprecated */
  352. static int ip6_frags_secret_interval_unused;
  353. static struct ctl_table ip6_frags_ctl_table[] = {
  354. {
  355. .procname = "ip6frag_secret_interval",
  356. .data = &ip6_frags_secret_interval_unused,
  357. .maxlen = sizeof(int),
  358. .mode = 0644,
  359. .proc_handler = proc_dointvec_jiffies,
  360. },
  361. { }
  362. };
  363. static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
  364. {
  365. struct ctl_table *table;
  366. struct ctl_table_header *hdr;
  367. table = ip6_frags_ns_ctl_table;
  368. if (!net_eq(net, &init_net)) {
  369. table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL);
  370. if (!table)
  371. goto err_alloc;
  372. }
  373. table[0].data = &net->ipv6.fqdir->high_thresh;
  374. table[0].extra1 = &net->ipv6.fqdir->low_thresh;
  375. table[1].data = &net->ipv6.fqdir->low_thresh;
  376. table[1].extra2 = &net->ipv6.fqdir->high_thresh;
  377. table[2].data = &net->ipv6.fqdir->timeout;
  378. hdr = register_net_sysctl(net, "net/ipv6", table);
  379. if (!hdr)
  380. goto err_reg;
  381. net->ipv6.sysctl.frags_hdr = hdr;
  382. return 0;
  383. err_reg:
  384. if (!net_eq(net, &init_net))
  385. kfree(table);
  386. err_alloc:
  387. return -ENOMEM;
  388. }
  389. static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net)
  390. {
  391. struct ctl_table *table;
  392. table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
  393. unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
  394. if (!net_eq(net, &init_net))
  395. kfree(table);
  396. }
  397. static struct ctl_table_header *ip6_ctl_header;
  398. static int ip6_frags_sysctl_register(void)
  399. {
  400. ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6",
  401. ip6_frags_ctl_table);
  402. return ip6_ctl_header == NULL ? -ENOMEM : 0;
  403. }
  404. static void ip6_frags_sysctl_unregister(void)
  405. {
  406. unregister_net_sysctl_table(ip6_ctl_header);
  407. }
  408. #else
  409. static int ip6_frags_ns_sysctl_register(struct net *net)
  410. {
  411. return 0;
  412. }
  413. static void ip6_frags_ns_sysctl_unregister(struct net *net)
  414. {
  415. }
  416. static int ip6_frags_sysctl_register(void)
  417. {
  418. return 0;
  419. }
  420. static void ip6_frags_sysctl_unregister(void)
  421. {
  422. }
  423. #endif
  424. static int __net_init ipv6_frags_init_net(struct net *net)
  425. {
  426. int res;
  427. res = fqdir_init(&net->ipv6.fqdir, &ip6_frags, net);
  428. if (res < 0)
  429. return res;
  430. net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
  431. net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
  432. net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT;
  433. res = ip6_frags_ns_sysctl_register(net);
  434. if (res < 0)
  435. fqdir_exit(net->ipv6.fqdir);
  436. return res;
  437. }
  438. static void __net_exit ipv6_frags_pre_exit_net(struct net *net)
  439. {
  440. fqdir_pre_exit(net->ipv6.fqdir);
  441. }
  442. static void __net_exit ipv6_frags_exit_net(struct net *net)
  443. {
  444. ip6_frags_ns_sysctl_unregister(net);
  445. fqdir_exit(net->ipv6.fqdir);
  446. }
  447. static struct pernet_operations ip6_frags_ops = {
  448. .init = ipv6_frags_init_net,
  449. .pre_exit = ipv6_frags_pre_exit_net,
  450. .exit = ipv6_frags_exit_net,
  451. };
  452. static const struct rhashtable_params ip6_rhash_params = {
  453. .head_offset = offsetof(struct inet_frag_queue, node),
  454. .hashfn = ip6frag_key_hashfn,
  455. .obj_hashfn = ip6frag_obj_hashfn,
  456. .obj_cmpfn = ip6frag_obj_cmpfn,
  457. .automatic_shrinking = true,
  458. };
  459. int __init ipv6_frag_init(void)
  460. {
  461. int ret;
  462. ip6_frags.constructor = ip6frag_init;
  463. ip6_frags.destructor = NULL;
  464. ip6_frags.qsize = sizeof(struct frag_queue);
  465. ip6_frags.frag_expire = ip6_frag_expire;
  466. ip6_frags.frags_cache_name = ip6_frag_cache_name;
  467. ip6_frags.rhash_params = ip6_rhash_params;
  468. ret = inet_frags_init(&ip6_frags);
  469. if (ret)
  470. goto out;
  471. ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
  472. if (ret)
  473. goto err_protocol;
  474. ret = ip6_frags_sysctl_register();
  475. if (ret)
  476. goto err_sysctl;
  477. ret = register_pernet_subsys(&ip6_frags_ops);
  478. if (ret)
  479. goto err_pernet;
  480. out:
  481. return ret;
  482. err_pernet:
  483. ip6_frags_sysctl_unregister();
  484. err_sysctl:
  485. inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
  486. err_protocol:
  487. inet_frags_fini(&ip6_frags);
  488. goto out;
  489. }
  490. void ipv6_frag_exit(void)
  491. {
  492. ip6_frags_sysctl_unregister();
  493. unregister_pernet_subsys(&ip6_frags_ops);
  494. inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
  495. inet_frags_fini(&ip6_frags);
  496. }