PageRenderTime 147ms CodeModel.GetById 23ms app.highlight 96ms RepoModel.GetById 1ms app.codeStats 0ms

/net/ipv4/netfilter/ip_queue.c

https://bitbucket.org/abioy/linux
C | 646 lines | 528 code | 105 blank | 13 comment | 91 complexity | 2954222d8ba2539c35afea0de569a438 MD5 | raw file
Possible License(s): CC-BY-SA-3.0, GPL-2.0, LGPL-2.0, AGPL-1.0
  1/*
  2 * This is a module which is used for queueing IPv4 packets and
  3 * communicating with userspace via netlink.
  4 *
  5 * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
  6 * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
  7 *
  8 * This program is free software; you can redistribute it and/or modify
  9 * it under the terms of the GNU General Public License version 2 as
 10 * published by the Free Software Foundation.
 11 */
 12#include <linux/module.h>
 13#include <linux/skbuff.h>
 14#include <linux/init.h>
 15#include <linux/ip.h>
 16#include <linux/notifier.h>
 17#include <linux/netdevice.h>
 18#include <linux/netfilter.h>
 19#include <linux/netfilter_ipv4/ip_queue.h>
 20#include <linux/netfilter_ipv4/ip_tables.h>
 21#include <linux/netlink.h>
 22#include <linux/spinlock.h>
 23#include <linux/sysctl.h>
 24#include <linux/proc_fs.h>
 25#include <linux/seq_file.h>
 26#include <linux/security.h>
 27#include <linux/net.h>
 28#include <linux/mutex.h>
 29#include <linux/slab.h>
 30#include <net/net_namespace.h>
 31#include <net/sock.h>
 32#include <net/route.h>
 33#include <net/netfilter/nf_queue.h>
 34#include <net/ip.h>
 35
 36#define IPQ_QMAX_DEFAULT 1024
 37#define IPQ_PROC_FS_NAME "ip_queue"
 38#define NET_IPQ_QMAX 2088
 39#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
 40
 41typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
 42
 43static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
 44static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
 45static DEFINE_RWLOCK(queue_lock);
 46static int peer_pid __read_mostly;
 47static unsigned int copy_range __read_mostly;
 48static unsigned int queue_total;
 49static unsigned int queue_dropped = 0;
 50static unsigned int queue_user_dropped = 0;
 51static struct sock *ipqnl __read_mostly;
 52static LIST_HEAD(queue_list);
 53static DEFINE_MUTEX(ipqnl_mutex);
 54
 55static inline void
 56__ipq_enqueue_entry(struct nf_queue_entry *entry)
 57{
 58       list_add_tail(&entry->list, &queue_list);
 59       queue_total++;
 60}
 61
 62static inline int
 63__ipq_set_mode(unsigned char mode, unsigned int range)
 64{
 65	int status = 0;
 66
 67	switch(mode) {
 68	case IPQ_COPY_NONE:
 69	case IPQ_COPY_META:
 70		copy_mode = mode;
 71		copy_range = 0;
 72		break;
 73
 74	case IPQ_COPY_PACKET:
 75		copy_mode = mode;
 76		copy_range = range;
 77		if (copy_range > 0xFFFF)
 78			copy_range = 0xFFFF;
 79		break;
 80
 81	default:
 82		status = -EINVAL;
 83
 84	}
 85	return status;
 86}
 87
 88static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
 89
 90static inline void
 91__ipq_reset(void)
 92{
 93	peer_pid = 0;
 94	net_disable_timestamp();
 95	__ipq_set_mode(IPQ_COPY_NONE, 0);
 96	__ipq_flush(NULL, 0);
 97}
 98
 99static struct nf_queue_entry *
100ipq_find_dequeue_entry(unsigned long id)
101{
102	struct nf_queue_entry *entry = NULL, *i;
103
104	write_lock_bh(&queue_lock);
105
106	list_for_each_entry(i, &queue_list, list) {
107		if ((unsigned long)i == id) {
108			entry = i;
109			break;
110		}
111	}
112
113	if (entry) {
114		list_del(&entry->list);
115		queue_total--;
116	}
117
118	write_unlock_bh(&queue_lock);
119	return entry;
120}
121
122static void
123__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
124{
125	struct nf_queue_entry *entry, *next;
126
127	list_for_each_entry_safe(entry, next, &queue_list, list) {
128		if (!cmpfn || cmpfn(entry, data)) {
129			list_del(&entry->list);
130			queue_total--;
131			nf_reinject(entry, NF_DROP);
132		}
133	}
134}
135
136static void
137ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
138{
139	write_lock_bh(&queue_lock);
140	__ipq_flush(cmpfn, data);
141	write_unlock_bh(&queue_lock);
142}
143
144static struct sk_buff *
145ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
146{
147	sk_buff_data_t old_tail;
148	size_t size = 0;
149	size_t data_len = 0;
150	struct sk_buff *skb;
151	struct ipq_packet_msg *pmsg;
152	struct nlmsghdr *nlh;
153	struct timeval tv;
154
155	read_lock_bh(&queue_lock);
156
157	switch (copy_mode) {
158	case IPQ_COPY_META:
159	case IPQ_COPY_NONE:
160		size = NLMSG_SPACE(sizeof(*pmsg));
161		break;
162
163	case IPQ_COPY_PACKET:
164		if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
165		     entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
166		    (*errp = skb_checksum_help(entry->skb))) {
167			read_unlock_bh(&queue_lock);
168			return NULL;
169		}
170		if (copy_range == 0 || copy_range > entry->skb->len)
171			data_len = entry->skb->len;
172		else
173			data_len = copy_range;
174
175		size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
176		break;
177
178	default:
179		*errp = -EINVAL;
180		read_unlock_bh(&queue_lock);
181		return NULL;
182	}
183
184	read_unlock_bh(&queue_lock);
185
186	skb = alloc_skb(size, GFP_ATOMIC);
187	if (!skb)
188		goto nlmsg_failure;
189
190	old_tail = skb->tail;
191	nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
192	pmsg = NLMSG_DATA(nlh);
193	memset(pmsg, 0, sizeof(*pmsg));
194
195	pmsg->packet_id       = (unsigned long )entry;
196	pmsg->data_len        = data_len;
197	tv = ktime_to_timeval(entry->skb->tstamp);
198	pmsg->timestamp_sec   = tv.tv_sec;
199	pmsg->timestamp_usec  = tv.tv_usec;
200	pmsg->mark            = entry->skb->mark;
201	pmsg->hook            = entry->hook;
202	pmsg->hw_protocol     = entry->skb->protocol;
203
204	if (entry->indev)
205		strcpy(pmsg->indev_name, entry->indev->name);
206	else
207		pmsg->indev_name[0] = '\0';
208
209	if (entry->outdev)
210		strcpy(pmsg->outdev_name, entry->outdev->name);
211	else
212		pmsg->outdev_name[0] = '\0';
213
214	if (entry->indev && entry->skb->dev) {
215		pmsg->hw_type = entry->skb->dev->type;
216		pmsg->hw_addrlen = dev_parse_header(entry->skb,
217						    pmsg->hw_addr);
218	}
219
220	if (data_len)
221		if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
222			BUG();
223
224	nlh->nlmsg_len = skb->tail - old_tail;
225	return skb;
226
227nlmsg_failure:
228	*errp = -EINVAL;
229	printk(KERN_ERR "ip_queue: error creating packet message\n");
230	return NULL;
231}
232
233static int
234ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
235{
236	int status = -EINVAL;
237	struct sk_buff *nskb;
238
239	if (copy_mode == IPQ_COPY_NONE)
240		return -EAGAIN;
241
242	nskb = ipq_build_packet_message(entry, &status);
243	if (nskb == NULL)
244		return status;
245
246	write_lock_bh(&queue_lock);
247
248	if (!peer_pid)
249		goto err_out_free_nskb;
250
251	if (queue_total >= queue_maxlen) {
252		queue_dropped++;
253		status = -ENOSPC;
254		if (net_ratelimit())
255			  printk (KERN_WARNING "ip_queue: full at %d entries, "
256				  "dropping packets(s). Dropped: %d\n", queue_total,
257				  queue_dropped);
258		goto err_out_free_nskb;
259	}
260
261	/* netlink_unicast will either free the nskb or attach it to a socket */
262	status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
263	if (status < 0) {
264		queue_user_dropped++;
265		goto err_out_unlock;
266	}
267
268	__ipq_enqueue_entry(entry);
269
270	write_unlock_bh(&queue_lock);
271	return status;
272
273err_out_free_nskb:
274	kfree_skb(nskb);
275
276err_out_unlock:
277	write_unlock_bh(&queue_lock);
278	return status;
279}
280
281static int
282ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
283{
284	int diff;
285	struct iphdr *user_iph = (struct iphdr *)v->payload;
286	struct sk_buff *nskb;
287
288	if (v->data_len < sizeof(*user_iph))
289		return 0;
290	diff = v->data_len - e->skb->len;
291	if (diff < 0) {
292		if (pskb_trim(e->skb, v->data_len))
293			return -ENOMEM;
294	} else if (diff > 0) {
295		if (v->data_len > 0xFFFF)
296			return -EINVAL;
297		if (diff > skb_tailroom(e->skb)) {
298			nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
299					       diff, GFP_ATOMIC);
300			if (!nskb) {
301				printk(KERN_WARNING "ip_queue: error "
302				      "in mangle, dropping packet\n");
303				return -ENOMEM;
304			}
305			kfree_skb(e->skb);
306			e->skb = nskb;
307		}
308		skb_put(e->skb, diff);
309	}
310	if (!skb_make_writable(e->skb, v->data_len))
311		return -ENOMEM;
312	skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
313	e->skb->ip_summed = CHECKSUM_NONE;
314
315	return 0;
316}
317
318static int
319ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
320{
321	struct nf_queue_entry *entry;
322
323	if (vmsg->value > NF_MAX_VERDICT)
324		return -EINVAL;
325
326	entry = ipq_find_dequeue_entry(vmsg->id);
327	if (entry == NULL)
328		return -ENOENT;
329	else {
330		int verdict = vmsg->value;
331
332		if (vmsg->data_len && vmsg->data_len == len)
333			if (ipq_mangle_ipv4(vmsg, entry) < 0)
334				verdict = NF_DROP;
335
336		nf_reinject(entry, verdict);
337		return 0;
338	}
339}
340
341static int
342ipq_set_mode(unsigned char mode, unsigned int range)
343{
344	int status;
345
346	write_lock_bh(&queue_lock);
347	status = __ipq_set_mode(mode, range);
348	write_unlock_bh(&queue_lock);
349	return status;
350}
351
352static int
353ipq_receive_peer(struct ipq_peer_msg *pmsg,
354		 unsigned char type, unsigned int len)
355{
356	int status = 0;
357
358	if (len < sizeof(*pmsg))
359		return -EINVAL;
360
361	switch (type) {
362	case IPQM_MODE:
363		status = ipq_set_mode(pmsg->msg.mode.value,
364				      pmsg->msg.mode.range);
365		break;
366
367	case IPQM_VERDICT:
368		if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
369			status = -EINVAL;
370		else
371			status = ipq_set_verdict(&pmsg->msg.verdict,
372						 len - sizeof(*pmsg));
373			break;
374	default:
375		status = -EINVAL;
376	}
377	return status;
378}
379
380static int
381dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
382{
383	if (entry->indev)
384		if (entry->indev->ifindex == ifindex)
385			return 1;
386	if (entry->outdev)
387		if (entry->outdev->ifindex == ifindex)
388			return 1;
389#ifdef CONFIG_BRIDGE_NETFILTER
390	if (entry->skb->nf_bridge) {
391		if (entry->skb->nf_bridge->physindev &&
392		    entry->skb->nf_bridge->physindev->ifindex == ifindex)
393			return 1;
394		if (entry->skb->nf_bridge->physoutdev &&
395		    entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
396			return 1;
397	}
398#endif
399	return 0;
400}
401
402static void
403ipq_dev_drop(int ifindex)
404{
405	ipq_flush(dev_cmp, ifindex);
406}
407
408#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
409
410static inline void
411__ipq_rcv_skb(struct sk_buff *skb)
412{
413	int status, type, pid, flags, nlmsglen, skblen;
414	struct nlmsghdr *nlh;
415
416	skblen = skb->len;
417	if (skblen < sizeof(*nlh))
418		return;
419
420	nlh = nlmsg_hdr(skb);
421	nlmsglen = nlh->nlmsg_len;
422	if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
423		return;
424
425	pid = nlh->nlmsg_pid;
426	flags = nlh->nlmsg_flags;
427
428	if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
429		RCV_SKB_FAIL(-EINVAL);
430
431	if (flags & MSG_TRUNC)
432		RCV_SKB_FAIL(-ECOMM);
433
434	type = nlh->nlmsg_type;
435	if (type < NLMSG_NOOP || type >= IPQM_MAX)
436		RCV_SKB_FAIL(-EINVAL);
437
438	if (type <= IPQM_BASE)
439		return;
440
441	if (security_netlink_recv(skb, CAP_NET_ADMIN))
442		RCV_SKB_FAIL(-EPERM);
443
444	write_lock_bh(&queue_lock);
445
446	if (peer_pid) {
447		if (peer_pid != pid) {
448			write_unlock_bh(&queue_lock);
449			RCV_SKB_FAIL(-EBUSY);
450		}
451	} else {
452		net_enable_timestamp();
453		peer_pid = pid;
454	}
455
456	write_unlock_bh(&queue_lock);
457
458	status = ipq_receive_peer(NLMSG_DATA(nlh), type,
459				  nlmsglen - NLMSG_LENGTH(0));
460	if (status < 0)
461		RCV_SKB_FAIL(status);
462
463	if (flags & NLM_F_ACK)
464		netlink_ack(skb, nlh, 0);
465	return;
466}
467
468static void
469ipq_rcv_skb(struct sk_buff *skb)
470{
471	mutex_lock(&ipqnl_mutex);
472	__ipq_rcv_skb(skb);
473	mutex_unlock(&ipqnl_mutex);
474}
475
476static int
477ipq_rcv_dev_event(struct notifier_block *this,
478		  unsigned long event, void *ptr)
479{
480	struct net_device *dev = ptr;
481
482	if (!net_eq(dev_net(dev), &init_net))
483		return NOTIFY_DONE;
484
485	/* Drop any packets associated with the downed device */
486	if (event == NETDEV_DOWN)
487		ipq_dev_drop(dev->ifindex);
488	return NOTIFY_DONE;
489}
490
491static struct notifier_block ipq_dev_notifier = {
492	.notifier_call	= ipq_rcv_dev_event,
493};
494
495static int
496ipq_rcv_nl_event(struct notifier_block *this,
497		 unsigned long event, void *ptr)
498{
499	struct netlink_notify *n = ptr;
500
501	if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
502		write_lock_bh(&queue_lock);
503		if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
504			__ipq_reset();
505		write_unlock_bh(&queue_lock);
506	}
507	return NOTIFY_DONE;
508}
509
510static struct notifier_block ipq_nl_notifier = {
511	.notifier_call	= ipq_rcv_nl_event,
512};
513
514#ifdef CONFIG_SYSCTL
515static struct ctl_table_header *ipq_sysctl_header;
516
517static ctl_table ipq_table[] = {
518	{
519		.procname	= NET_IPQ_QMAX_NAME,
520		.data		= &queue_maxlen,
521		.maxlen		= sizeof(queue_maxlen),
522		.mode		= 0644,
523		.proc_handler	= proc_dointvec
524	},
525	{ }
526};
527#endif
528
529#ifdef CONFIG_PROC_FS
530static int ip_queue_show(struct seq_file *m, void *v)
531{
532	read_lock_bh(&queue_lock);
533
534	seq_printf(m,
535		      "Peer PID          : %d\n"
536		      "Copy mode         : %hu\n"
537		      "Copy range        : %u\n"
538		      "Queue length      : %u\n"
539		      "Queue max. length : %u\n"
540		      "Queue dropped     : %u\n"
541		      "Netlink dropped   : %u\n",
542		      peer_pid,
543		      copy_mode,
544		      copy_range,
545		      queue_total,
546		      queue_maxlen,
547		      queue_dropped,
548		      queue_user_dropped);
549
550	read_unlock_bh(&queue_lock);
551	return 0;
552}
553
554static int ip_queue_open(struct inode *inode, struct file *file)
555{
556	return single_open(file, ip_queue_show, NULL);
557}
558
559static const struct file_operations ip_queue_proc_fops = {
560	.open		= ip_queue_open,
561	.read		= seq_read,
562	.llseek		= seq_lseek,
563	.release	= single_release,
564	.owner		= THIS_MODULE,
565};
566#endif
567
568static const struct nf_queue_handler nfqh = {
569	.name	= "ip_queue",
570	.outfn	= &ipq_enqueue_packet,
571};
572
573static int __init ip_queue_init(void)
574{
575	int status = -ENOMEM;
576	struct proc_dir_entry *proc __maybe_unused;
577
578	netlink_register_notifier(&ipq_nl_notifier);
579	ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
580				      ipq_rcv_skb, NULL, THIS_MODULE);
581	if (ipqnl == NULL) {
582		printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
583		goto cleanup_netlink_notifier;
584	}
585
586#ifdef CONFIG_PROC_FS
587	proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
588			   &ip_queue_proc_fops);
589	if (!proc) {
590		printk(KERN_ERR "ip_queue: failed to create proc entry\n");
591		goto cleanup_ipqnl;
592	}
593#endif
594	register_netdevice_notifier(&ipq_dev_notifier);
595#ifdef CONFIG_SYSCTL
596	ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
597#endif
598	status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
599	if (status < 0) {
600		printk(KERN_ERR "ip_queue: failed to register queue handler\n");
601		goto cleanup_sysctl;
602	}
603	return status;
604
605cleanup_sysctl:
606#ifdef CONFIG_SYSCTL
607	unregister_sysctl_table(ipq_sysctl_header);
608#endif
609	unregister_netdevice_notifier(&ipq_dev_notifier);
610	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
611cleanup_ipqnl: __maybe_unused
612	netlink_kernel_release(ipqnl);
613	mutex_lock(&ipqnl_mutex);
614	mutex_unlock(&ipqnl_mutex);
615
616cleanup_netlink_notifier:
617	netlink_unregister_notifier(&ipq_nl_notifier);
618	return status;
619}
620
621static void __exit ip_queue_fini(void)
622{
623	nf_unregister_queue_handlers(&nfqh);
624
625	ipq_flush(NULL, 0);
626
627#ifdef CONFIG_SYSCTL
628	unregister_sysctl_table(ipq_sysctl_header);
629#endif
630	unregister_netdevice_notifier(&ipq_dev_notifier);
631	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
632
633	netlink_kernel_release(ipqnl);
634	mutex_lock(&ipqnl_mutex);
635	mutex_unlock(&ipqnl_mutex);
636
637	netlink_unregister_notifier(&ipq_nl_notifier);
638}
639
640MODULE_DESCRIPTION("IPv4 packet queue handler");
641MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
642MODULE_LICENSE("GPL");
643MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL);
644
645module_init(ip_queue_init);
646module_exit(ip_queue_fini);