/net/ipv4/netfilter/ip_queue.c

https://bitbucket.org/abioy/linux · C · 646 lines · 528 code · 105 blank · 13 comment · 92 complexity · 2954222d8ba2539c35afea0de569a438 MD5 · raw file

  1. /*
  2. * This is a module which is used for queueing IPv4 packets and
  3. * communicating with userspace via netlink.
  4. *
  5. * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
  6. * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License version 2 as
  10. * published by the Free Software Foundation.
  11. */
  12. #include <linux/module.h>
  13. #include <linux/skbuff.h>
  14. #include <linux/init.h>
  15. #include <linux/ip.h>
  16. #include <linux/notifier.h>
  17. #include <linux/netdevice.h>
  18. #include <linux/netfilter.h>
  19. #include <linux/netfilter_ipv4/ip_queue.h>
  20. #include <linux/netfilter_ipv4/ip_tables.h>
  21. #include <linux/netlink.h>
  22. #include <linux/spinlock.h>
  23. #include <linux/sysctl.h>
  24. #include <linux/proc_fs.h>
  25. #include <linux/seq_file.h>
  26. #include <linux/security.h>
  27. #include <linux/net.h>
  28. #include <linux/mutex.h>
  29. #include <linux/slab.h>
  30. #include <net/net_namespace.h>
  31. #include <net/sock.h>
  32. #include <net/route.h>
  33. #include <net/netfilter/nf_queue.h>
  34. #include <net/ip.h>
  35. #define IPQ_QMAX_DEFAULT 1024
  36. #define IPQ_PROC_FS_NAME "ip_queue"
  37. #define NET_IPQ_QMAX 2088
  38. #define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
  39. typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
  40. static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
  41. static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
  42. static DEFINE_RWLOCK(queue_lock);
  43. static int peer_pid __read_mostly;
  44. static unsigned int copy_range __read_mostly;
  45. static unsigned int queue_total;
  46. static unsigned int queue_dropped = 0;
  47. static unsigned int queue_user_dropped = 0;
  48. static struct sock *ipqnl __read_mostly;
  49. static LIST_HEAD(queue_list);
  50. static DEFINE_MUTEX(ipqnl_mutex);
  51. static inline void
  52. __ipq_enqueue_entry(struct nf_queue_entry *entry)
  53. {
  54. list_add_tail(&entry->list, &queue_list);
  55. queue_total++;
  56. }
  57. static inline int
  58. __ipq_set_mode(unsigned char mode, unsigned int range)
  59. {
  60. int status = 0;
  61. switch(mode) {
  62. case IPQ_COPY_NONE:
  63. case IPQ_COPY_META:
  64. copy_mode = mode;
  65. copy_range = 0;
  66. break;
  67. case IPQ_COPY_PACKET:
  68. copy_mode = mode;
  69. copy_range = range;
  70. if (copy_range > 0xFFFF)
  71. copy_range = 0xFFFF;
  72. break;
  73. default:
  74. status = -EINVAL;
  75. }
  76. return status;
  77. }
  78. static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
  79. static inline void
  80. __ipq_reset(void)
  81. {
  82. peer_pid = 0;
  83. net_disable_timestamp();
  84. __ipq_set_mode(IPQ_COPY_NONE, 0);
  85. __ipq_flush(NULL, 0);
  86. }
  87. static struct nf_queue_entry *
  88. ipq_find_dequeue_entry(unsigned long id)
  89. {
  90. struct nf_queue_entry *entry = NULL, *i;
  91. write_lock_bh(&queue_lock);
  92. list_for_each_entry(i, &queue_list, list) {
  93. if ((unsigned long)i == id) {
  94. entry = i;
  95. break;
  96. }
  97. }
  98. if (entry) {
  99. list_del(&entry->list);
  100. queue_total--;
  101. }
  102. write_unlock_bh(&queue_lock);
  103. return entry;
  104. }
  105. static void
  106. __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
  107. {
  108. struct nf_queue_entry *entry, *next;
  109. list_for_each_entry_safe(entry, next, &queue_list, list) {
  110. if (!cmpfn || cmpfn(entry, data)) {
  111. list_del(&entry->list);
  112. queue_total--;
  113. nf_reinject(entry, NF_DROP);
  114. }
  115. }
  116. }
  117. static void
  118. ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
  119. {
  120. write_lock_bh(&queue_lock);
  121. __ipq_flush(cmpfn, data);
  122. write_unlock_bh(&queue_lock);
  123. }
  124. static struct sk_buff *
  125. ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
  126. {
  127. sk_buff_data_t old_tail;
  128. size_t size = 0;
  129. size_t data_len = 0;
  130. struct sk_buff *skb;
  131. struct ipq_packet_msg *pmsg;
  132. struct nlmsghdr *nlh;
  133. struct timeval tv;
  134. read_lock_bh(&queue_lock);
  135. switch (copy_mode) {
  136. case IPQ_COPY_META:
  137. case IPQ_COPY_NONE:
  138. size = NLMSG_SPACE(sizeof(*pmsg));
  139. break;
  140. case IPQ_COPY_PACKET:
  141. if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
  142. entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
  143. (*errp = skb_checksum_help(entry->skb))) {
  144. read_unlock_bh(&queue_lock);
  145. return NULL;
  146. }
  147. if (copy_range == 0 || copy_range > entry->skb->len)
  148. data_len = entry->skb->len;
  149. else
  150. data_len = copy_range;
  151. size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
  152. break;
  153. default:
  154. *errp = -EINVAL;
  155. read_unlock_bh(&queue_lock);
  156. return NULL;
  157. }
  158. read_unlock_bh(&queue_lock);
  159. skb = alloc_skb(size, GFP_ATOMIC);
  160. if (!skb)
  161. goto nlmsg_failure;
  162. old_tail = skb->tail;
  163. nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
  164. pmsg = NLMSG_DATA(nlh);
  165. memset(pmsg, 0, sizeof(*pmsg));
  166. pmsg->packet_id = (unsigned long )entry;
  167. pmsg->data_len = data_len;
  168. tv = ktime_to_timeval(entry->skb->tstamp);
  169. pmsg->timestamp_sec = tv.tv_sec;
  170. pmsg->timestamp_usec = tv.tv_usec;
  171. pmsg->mark = entry->skb->mark;
  172. pmsg->hook = entry->hook;
  173. pmsg->hw_protocol = entry->skb->protocol;
  174. if (entry->indev)
  175. strcpy(pmsg->indev_name, entry->indev->name);
  176. else
  177. pmsg->indev_name[0] = '\0';
  178. if (entry->outdev)
  179. strcpy(pmsg->outdev_name, entry->outdev->name);
  180. else
  181. pmsg->outdev_name[0] = '\0';
  182. if (entry->indev && entry->skb->dev) {
  183. pmsg->hw_type = entry->skb->dev->type;
  184. pmsg->hw_addrlen = dev_parse_header(entry->skb,
  185. pmsg->hw_addr);
  186. }
  187. if (data_len)
  188. if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
  189. BUG();
  190. nlh->nlmsg_len = skb->tail - old_tail;
  191. return skb;
  192. nlmsg_failure:
  193. *errp = -EINVAL;
  194. printk(KERN_ERR "ip_queue: error creating packet message\n");
  195. return NULL;
  196. }
  197. static int
  198. ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
  199. {
  200. int status = -EINVAL;
  201. struct sk_buff *nskb;
  202. if (copy_mode == IPQ_COPY_NONE)
  203. return -EAGAIN;
  204. nskb = ipq_build_packet_message(entry, &status);
  205. if (nskb == NULL)
  206. return status;
  207. write_lock_bh(&queue_lock);
  208. if (!peer_pid)
  209. goto err_out_free_nskb;
  210. if (queue_total >= queue_maxlen) {
  211. queue_dropped++;
  212. status = -ENOSPC;
  213. if (net_ratelimit())
  214. printk (KERN_WARNING "ip_queue: full at %d entries, "
  215. "dropping packets(s). Dropped: %d\n", queue_total,
  216. queue_dropped);
  217. goto err_out_free_nskb;
  218. }
  219. /* netlink_unicast will either free the nskb or attach it to a socket */
  220. status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
  221. if (status < 0) {
  222. queue_user_dropped++;
  223. goto err_out_unlock;
  224. }
  225. __ipq_enqueue_entry(entry);
  226. write_unlock_bh(&queue_lock);
  227. return status;
  228. err_out_free_nskb:
  229. kfree_skb(nskb);
  230. err_out_unlock:
  231. write_unlock_bh(&queue_lock);
  232. return status;
  233. }
  234. static int
  235. ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
  236. {
  237. int diff;
  238. struct iphdr *user_iph = (struct iphdr *)v->payload;
  239. struct sk_buff *nskb;
  240. if (v->data_len < sizeof(*user_iph))
  241. return 0;
  242. diff = v->data_len - e->skb->len;
  243. if (diff < 0) {
  244. if (pskb_trim(e->skb, v->data_len))
  245. return -ENOMEM;
  246. } else if (diff > 0) {
  247. if (v->data_len > 0xFFFF)
  248. return -EINVAL;
  249. if (diff > skb_tailroom(e->skb)) {
  250. nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
  251. diff, GFP_ATOMIC);
  252. if (!nskb) {
  253. printk(KERN_WARNING "ip_queue: error "
  254. "in mangle, dropping packet\n");
  255. return -ENOMEM;
  256. }
  257. kfree_skb(e->skb);
  258. e->skb = nskb;
  259. }
  260. skb_put(e->skb, diff);
  261. }
  262. if (!skb_make_writable(e->skb, v->data_len))
  263. return -ENOMEM;
  264. skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
  265. e->skb->ip_summed = CHECKSUM_NONE;
  266. return 0;
  267. }
  268. static int
  269. ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
  270. {
  271. struct nf_queue_entry *entry;
  272. if (vmsg->value > NF_MAX_VERDICT)
  273. return -EINVAL;
  274. entry = ipq_find_dequeue_entry(vmsg->id);
  275. if (entry == NULL)
  276. return -ENOENT;
  277. else {
  278. int verdict = vmsg->value;
  279. if (vmsg->data_len && vmsg->data_len == len)
  280. if (ipq_mangle_ipv4(vmsg, entry) < 0)
  281. verdict = NF_DROP;
  282. nf_reinject(entry, verdict);
  283. return 0;
  284. }
  285. }
  286. static int
  287. ipq_set_mode(unsigned char mode, unsigned int range)
  288. {
  289. int status;
  290. write_lock_bh(&queue_lock);
  291. status = __ipq_set_mode(mode, range);
  292. write_unlock_bh(&queue_lock);
  293. return status;
  294. }
  295. static int
  296. ipq_receive_peer(struct ipq_peer_msg *pmsg,
  297. unsigned char type, unsigned int len)
  298. {
  299. int status = 0;
  300. if (len < sizeof(*pmsg))
  301. return -EINVAL;
  302. switch (type) {
  303. case IPQM_MODE:
  304. status = ipq_set_mode(pmsg->msg.mode.value,
  305. pmsg->msg.mode.range);
  306. break;
  307. case IPQM_VERDICT:
  308. if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
  309. status = -EINVAL;
  310. else
  311. status = ipq_set_verdict(&pmsg->msg.verdict,
  312. len - sizeof(*pmsg));
  313. break;
  314. default:
  315. status = -EINVAL;
  316. }
  317. return status;
  318. }
  319. static int
  320. dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
  321. {
  322. if (entry->indev)
  323. if (entry->indev->ifindex == ifindex)
  324. return 1;
  325. if (entry->outdev)
  326. if (entry->outdev->ifindex == ifindex)
  327. return 1;
  328. #ifdef CONFIG_BRIDGE_NETFILTER
  329. if (entry->skb->nf_bridge) {
  330. if (entry->skb->nf_bridge->physindev &&
  331. entry->skb->nf_bridge->physindev->ifindex == ifindex)
  332. return 1;
  333. if (entry->skb->nf_bridge->physoutdev &&
  334. entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
  335. return 1;
  336. }
  337. #endif
  338. return 0;
  339. }
  340. static void
  341. ipq_dev_drop(int ifindex)
  342. {
  343. ipq_flush(dev_cmp, ifindex);
  344. }
  345. #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
  346. static inline void
  347. __ipq_rcv_skb(struct sk_buff *skb)
  348. {
  349. int status, type, pid, flags, nlmsglen, skblen;
  350. struct nlmsghdr *nlh;
  351. skblen = skb->len;
  352. if (skblen < sizeof(*nlh))
  353. return;
  354. nlh = nlmsg_hdr(skb);
  355. nlmsglen = nlh->nlmsg_len;
  356. if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
  357. return;
  358. pid = nlh->nlmsg_pid;
  359. flags = nlh->nlmsg_flags;
  360. if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
  361. RCV_SKB_FAIL(-EINVAL);
  362. if (flags & MSG_TRUNC)
  363. RCV_SKB_FAIL(-ECOMM);
  364. type = nlh->nlmsg_type;
  365. if (type < NLMSG_NOOP || type >= IPQM_MAX)
  366. RCV_SKB_FAIL(-EINVAL);
  367. if (type <= IPQM_BASE)
  368. return;
  369. if (security_netlink_recv(skb, CAP_NET_ADMIN))
  370. RCV_SKB_FAIL(-EPERM);
  371. write_lock_bh(&queue_lock);
  372. if (peer_pid) {
  373. if (peer_pid != pid) {
  374. write_unlock_bh(&queue_lock);
  375. RCV_SKB_FAIL(-EBUSY);
  376. }
  377. } else {
  378. net_enable_timestamp();
  379. peer_pid = pid;
  380. }
  381. write_unlock_bh(&queue_lock);
  382. status = ipq_receive_peer(NLMSG_DATA(nlh), type,
  383. nlmsglen - NLMSG_LENGTH(0));
  384. if (status < 0)
  385. RCV_SKB_FAIL(status);
  386. if (flags & NLM_F_ACK)
  387. netlink_ack(skb, nlh, 0);
  388. return;
  389. }
  390. static void
  391. ipq_rcv_skb(struct sk_buff *skb)
  392. {
  393. mutex_lock(&ipqnl_mutex);
  394. __ipq_rcv_skb(skb);
  395. mutex_unlock(&ipqnl_mutex);
  396. }
  397. static int
  398. ipq_rcv_dev_event(struct notifier_block *this,
  399. unsigned long event, void *ptr)
  400. {
  401. struct net_device *dev = ptr;
  402. if (!net_eq(dev_net(dev), &init_net))
  403. return NOTIFY_DONE;
  404. /* Drop any packets associated with the downed device */
  405. if (event == NETDEV_DOWN)
  406. ipq_dev_drop(dev->ifindex);
  407. return NOTIFY_DONE;
  408. }
  409. static struct notifier_block ipq_dev_notifier = {
  410. .notifier_call = ipq_rcv_dev_event,
  411. };
  412. static int
  413. ipq_rcv_nl_event(struct notifier_block *this,
  414. unsigned long event, void *ptr)
  415. {
  416. struct netlink_notify *n = ptr;
  417. if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
  418. write_lock_bh(&queue_lock);
  419. if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
  420. __ipq_reset();
  421. write_unlock_bh(&queue_lock);
  422. }
  423. return NOTIFY_DONE;
  424. }
  425. static struct notifier_block ipq_nl_notifier = {
  426. .notifier_call = ipq_rcv_nl_event,
  427. };
  428. #ifdef CONFIG_SYSCTL
  429. static struct ctl_table_header *ipq_sysctl_header;
  430. static ctl_table ipq_table[] = {
  431. {
  432. .procname = NET_IPQ_QMAX_NAME,
  433. .data = &queue_maxlen,
  434. .maxlen = sizeof(queue_maxlen),
  435. .mode = 0644,
  436. .proc_handler = proc_dointvec
  437. },
  438. { }
  439. };
  440. #endif
  441. #ifdef CONFIG_PROC_FS
  442. static int ip_queue_show(struct seq_file *m, void *v)
  443. {
  444. read_lock_bh(&queue_lock);
  445. seq_printf(m,
  446. "Peer PID : %d\n"
  447. "Copy mode : %hu\n"
  448. "Copy range : %u\n"
  449. "Queue length : %u\n"
  450. "Queue max. length : %u\n"
  451. "Queue dropped : %u\n"
  452. "Netlink dropped : %u\n",
  453. peer_pid,
  454. copy_mode,
  455. copy_range,
  456. queue_total,
  457. queue_maxlen,
  458. queue_dropped,
  459. queue_user_dropped);
  460. read_unlock_bh(&queue_lock);
  461. return 0;
  462. }
  463. static int ip_queue_open(struct inode *inode, struct file *file)
  464. {
  465. return single_open(file, ip_queue_show, NULL);
  466. }
  467. static const struct file_operations ip_queue_proc_fops = {
  468. .open = ip_queue_open,
  469. .read = seq_read,
  470. .llseek = seq_lseek,
  471. .release = single_release,
  472. .owner = THIS_MODULE,
  473. };
  474. #endif
  475. static const struct nf_queue_handler nfqh = {
  476. .name = "ip_queue",
  477. .outfn = &ipq_enqueue_packet,
  478. };
  479. static int __init ip_queue_init(void)
  480. {
  481. int status = -ENOMEM;
  482. struct proc_dir_entry *proc __maybe_unused;
  483. netlink_register_notifier(&ipq_nl_notifier);
  484. ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
  485. ipq_rcv_skb, NULL, THIS_MODULE);
  486. if (ipqnl == NULL) {
  487. printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
  488. goto cleanup_netlink_notifier;
  489. }
  490. #ifdef CONFIG_PROC_FS
  491. proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
  492. &ip_queue_proc_fops);
  493. if (!proc) {
  494. printk(KERN_ERR "ip_queue: failed to create proc entry\n");
  495. goto cleanup_ipqnl;
  496. }
  497. #endif
  498. register_netdevice_notifier(&ipq_dev_notifier);
  499. #ifdef CONFIG_SYSCTL
  500. ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
  501. #endif
  502. status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
  503. if (status < 0) {
  504. printk(KERN_ERR "ip_queue: failed to register queue handler\n");
  505. goto cleanup_sysctl;
  506. }
  507. return status;
  508. cleanup_sysctl:
  509. #ifdef CONFIG_SYSCTL
  510. unregister_sysctl_table(ipq_sysctl_header);
  511. #endif
  512. unregister_netdevice_notifier(&ipq_dev_notifier);
  513. proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
  514. cleanup_ipqnl: __maybe_unused
  515. netlink_kernel_release(ipqnl);
  516. mutex_lock(&ipqnl_mutex);
  517. mutex_unlock(&ipqnl_mutex);
  518. cleanup_netlink_notifier:
  519. netlink_unregister_notifier(&ipq_nl_notifier);
  520. return status;
  521. }
  522. static void __exit ip_queue_fini(void)
  523. {
  524. nf_unregister_queue_handlers(&nfqh);
  525. ipq_flush(NULL, 0);
  526. #ifdef CONFIG_SYSCTL
  527. unregister_sysctl_table(ipq_sysctl_header);
  528. #endif
  529. unregister_netdevice_notifier(&ipq_dev_notifier);
  530. proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
  531. netlink_kernel_release(ipqnl);
  532. mutex_lock(&ipqnl_mutex);
  533. mutex_unlock(&ipqnl_mutex);
  534. netlink_unregister_notifier(&ipq_nl_notifier);
  535. }
  536. MODULE_DESCRIPTION("IPv4 packet queue handler");
  537. MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
  538. MODULE_LICENSE("GPL");
  539. MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL);
  540. module_init(ip_queue_init);
  541. module_exit(ip_queue_fini);