PageRenderTime 46ms CodeModel.GetById 18ms app.highlight 24ms RepoModel.GetById 0ms app.codeStats 0ms

/net/ipv4/inet_lro.c

https://bitbucket.org/abioy/linux
C | 600 lines | 454 code | 112 blank | 34 comment | 84 complexity | 22d715450fd35d6192ab14813b8636f3 MD5 | raw file
Possible License(s): CC-BY-SA-3.0, GPL-2.0, LGPL-2.0, AGPL-1.0
  1/*
  2 *  linux/net/ipv4/inet_lro.c
  3 *
  4 *  Large Receive Offload (ipv4 / tcp)
  5 *
  6 *  (C) Copyright IBM Corp. 2007
  7 *
  8 *  Authors:
  9 *       Jan-Bernd Themann <themann@de.ibm.com>
 10 *       Christoph Raisch <raisch@de.ibm.com>
 11 *
 12 *
 13 * This program is free software; you can redistribute it and/or modify
 14 * it under the terms of the GNU General Public License as published by
 15 * the Free Software Foundation; either version 2, or (at your option)
 16 * any later version.
 17 *
 18 * This program is distributed in the hope that it will be useful,
 19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 21 * GNU General Public License for more details.
 22 *
 23 * You should have received a copy of the GNU General Public License
 24 * along with this program; if not, write to the Free Software
 25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 26 */
 27
 28
 29#include <linux/module.h>
 30#include <linux/if_vlan.h>
 31#include <linux/inet_lro.h>
 32
 33MODULE_LICENSE("GPL");
 34MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
 35MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
 36
 37#define TCP_HDR_LEN(tcph) (tcph->doff << 2)
 38#define IP_HDR_LEN(iph) (iph->ihl << 2)
 39#define TCP_PAYLOAD_LENGTH(iph, tcph) \
 40	(ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
 41
 42#define IPH_LEN_WO_OPTIONS 5
 43#define TCPH_LEN_WO_OPTIONS 5
 44#define TCPH_LEN_W_TIMESTAMP 8
 45
 46#define LRO_MAX_PG_HLEN 64
 47
 48#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
 49
 50/*
 51 * Basic tcp checks whether packet is suitable for LRO
 52 */
 53
 54static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph,
 55			    int len, struct net_lro_desc *lro_desc)
 56{
 57        /* check ip header: don't aggregate padded frames */
 58	if (ntohs(iph->tot_len) != len)
 59		return -1;
 60
 61	if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
 62		return -1;
 63
 64	if (iph->ihl != IPH_LEN_WO_OPTIONS)
 65		return -1;
 66
 67	if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
 68	    tcph->rst || tcph->syn || tcph->fin)
 69		return -1;
 70
 71	if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
 72		return -1;
 73
 74	if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
 75	    tcph->doff != TCPH_LEN_W_TIMESTAMP)
 76		return -1;
 77
 78	/* check tcp options (only timestamp allowed) */
 79	if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
 80		__be32 *topt = (__be32 *)(tcph + 1);
 81
 82		if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
 83				   | (TCPOPT_TIMESTAMP << 8)
 84				   | TCPOLEN_TIMESTAMP))
 85			return -1;
 86
 87		/* timestamp should be in right order */
 88		topt++;
 89		if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
 90				      ntohl(*topt)))
 91			return -1;
 92
 93		/* timestamp reply should not be zero */
 94		topt++;
 95		if (*topt == 0)
 96			return -1;
 97	}
 98
 99	return 0;
100}
101
102static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
103{
104	struct iphdr *iph = lro_desc->iph;
105	struct tcphdr *tcph = lro_desc->tcph;
106	__be32 *p;
107	__wsum tcp_hdr_csum;
108
109	tcph->ack_seq = lro_desc->tcp_ack;
110	tcph->window = lro_desc->tcp_window;
111
112	if (lro_desc->tcp_saw_tstamp) {
113		p = (__be32 *)(tcph + 1);
114		*(p+2) = lro_desc->tcp_rcv_tsecr;
115	}
116
117	iph->tot_len = htons(lro_desc->ip_tot_len);
118
119	iph->check = 0;
120	iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl);
121
122	tcph->check = 0;
123	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
124	lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
125	tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
126					lro_desc->ip_tot_len -
127					IP_HDR_LEN(iph), IPPROTO_TCP,
128					lro_desc->data_csum);
129}
130
131static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
132{
133	__wsum tcp_csum;
134	__wsum tcp_hdr_csum;
135	__wsum tcp_ps_hdr_csum;
136
137	tcp_csum = ~csum_unfold(tcph->check);
138	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
139
140	tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
141					     len + TCP_HDR_LEN(tcph),
142					     IPPROTO_TCP, 0);
143
144	return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
145			tcp_ps_hdr_csum);
146}
147
148static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
149			  struct iphdr *iph, struct tcphdr *tcph,
150			  u16 vlan_tag, struct vlan_group *vgrp)
151{
152	int nr_frags;
153	__be32 *ptr;
154	u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
155
156	nr_frags = skb_shinfo(skb)->nr_frags;
157	lro_desc->parent = skb;
158	lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
159	lro_desc->iph = iph;
160	lro_desc->tcph = tcph;
161	lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
162	lro_desc->tcp_ack = tcph->ack_seq;
163	lro_desc->tcp_window = tcph->window;
164
165	lro_desc->pkt_aggr_cnt = 1;
166	lro_desc->ip_tot_len = ntohs(iph->tot_len);
167
168	if (tcph->doff == 8) {
169		ptr = (__be32 *)(tcph+1);
170		lro_desc->tcp_saw_tstamp = 1;
171		lro_desc->tcp_rcv_tsval = *(ptr+1);
172		lro_desc->tcp_rcv_tsecr = *(ptr+2);
173	}
174
175	lro_desc->mss = tcp_data_len;
176	lro_desc->vgrp = vgrp;
177	lro_desc->vlan_tag = vlan_tag;
178	lro_desc->active = 1;
179
180	lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
181						tcp_data_len);
182}
183
184static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
185{
186	memset(lro_desc, 0, sizeof(struct net_lro_desc));
187}
188
189static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
190			   struct tcphdr *tcph, int tcp_data_len)
191{
192	struct sk_buff *parent = lro_desc->parent;
193	__be32 *topt;
194
195	lro_desc->pkt_aggr_cnt++;
196	lro_desc->ip_tot_len += tcp_data_len;
197	lro_desc->tcp_next_seq += tcp_data_len;
198	lro_desc->tcp_window = tcph->window;
199	lro_desc->tcp_ack = tcph->ack_seq;
200
201	/* don't update tcp_rcv_tsval, would not work with PAWS */
202	if (lro_desc->tcp_saw_tstamp) {
203		topt = (__be32 *) (tcph + 1);
204		lro_desc->tcp_rcv_tsecr = *(topt + 2);
205	}
206
207	lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
208					     lro_tcp_data_csum(iph, tcph,
209							       tcp_data_len),
210					     parent->len);
211
212	parent->len += tcp_data_len;
213	parent->data_len += tcp_data_len;
214	if (tcp_data_len > lro_desc->mss)
215		lro_desc->mss = tcp_data_len;
216}
217
218static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
219			   struct iphdr *iph, struct tcphdr *tcph)
220{
221	struct sk_buff *parent = lro_desc->parent;
222	int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
223
224	lro_add_common(lro_desc, iph, tcph, tcp_data_len);
225
226	skb_pull(skb, (skb->len - tcp_data_len));
227	parent->truesize += skb->truesize;
228
229	if (lro_desc->last_skb)
230		lro_desc->last_skb->next = skb;
231	else
232		skb_shinfo(parent)->frag_list = skb;
233
234	lro_desc->last_skb = skb;
235}
236
237static void lro_add_frags(struct net_lro_desc *lro_desc,
238			  int len, int hlen, int truesize,
239			  struct skb_frag_struct *skb_frags,
240			  struct iphdr *iph, struct tcphdr *tcph)
241{
242	struct sk_buff *skb = lro_desc->parent;
243	int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
244
245	lro_add_common(lro_desc, iph, tcph, tcp_data_len);
246
247	skb->truesize += truesize;
248
249	skb_frags[0].page_offset += hlen;
250	skb_frags[0].size -= hlen;
251
252	while (tcp_data_len > 0) {
253		*(lro_desc->next_frag) = *skb_frags;
254		tcp_data_len -= skb_frags->size;
255		lro_desc->next_frag++;
256		skb_frags++;
257		skb_shinfo(skb)->nr_frags++;
258	}
259}
260
261static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
262			      struct iphdr *iph,
263			      struct tcphdr *tcph)
264{
265	if ((lro_desc->iph->saddr != iph->saddr) ||
266	    (lro_desc->iph->daddr != iph->daddr) ||
267	    (lro_desc->tcph->source != tcph->source) ||
268	    (lro_desc->tcph->dest != tcph->dest))
269		return -1;
270	return 0;
271}
272
273static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
274					 struct net_lro_desc *lro_arr,
275					 struct iphdr *iph,
276					 struct tcphdr *tcph)
277{
278	struct net_lro_desc *lro_desc = NULL;
279	struct net_lro_desc *tmp;
280	int max_desc = lro_mgr->max_desc;
281	int i;
282
283	for (i = 0; i < max_desc; i++) {
284		tmp = &lro_arr[i];
285		if (tmp->active)
286			if (!lro_check_tcp_conn(tmp, iph, tcph)) {
287				lro_desc = tmp;
288				goto out;
289			}
290	}
291
292	for (i = 0; i < max_desc; i++) {
293		if (!lro_arr[i].active) {
294			lro_desc = &lro_arr[i];
295			goto out;
296		}
297	}
298
299	LRO_INC_STATS(lro_mgr, no_desc);
300out:
301	return lro_desc;
302}
303
304static void lro_flush(struct net_lro_mgr *lro_mgr,
305		      struct net_lro_desc *lro_desc)
306{
307	if (lro_desc->pkt_aggr_cnt > 1)
308		lro_update_tcp_ip_header(lro_desc);
309
310	skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
311
312	if (lro_desc->vgrp) {
313		if (lro_mgr->features & LRO_F_NAPI)
314			vlan_hwaccel_receive_skb(lro_desc->parent,
315						 lro_desc->vgrp,
316						 lro_desc->vlan_tag);
317		else
318			vlan_hwaccel_rx(lro_desc->parent,
319					lro_desc->vgrp,
320					lro_desc->vlan_tag);
321
322	} else {
323		if (lro_mgr->features & LRO_F_NAPI)
324			netif_receive_skb(lro_desc->parent);
325		else
326			netif_rx(lro_desc->parent);
327	}
328
329	LRO_INC_STATS(lro_mgr, flushed);
330	lro_clear_desc(lro_desc);
331}
332
333static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
334			  struct vlan_group *vgrp, u16 vlan_tag, void *priv)
335{
336	struct net_lro_desc *lro_desc;
337	struct iphdr *iph;
338	struct tcphdr *tcph;
339	u64 flags;
340	int vlan_hdr_len = 0;
341
342	if (!lro_mgr->get_skb_header ||
343	    lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
344				    &flags, priv))
345		goto out;
346
347	if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
348		goto out;
349
350	lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
351	if (!lro_desc)
352		goto out;
353
354	if ((skb->protocol == htons(ETH_P_8021Q)) &&
355	    !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
356		vlan_hdr_len = VLAN_HLEN;
357
358	if (!lro_desc->active) { /* start new lro session */
359		if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
360			goto out;
361
362		skb->ip_summed = lro_mgr->ip_summed_aggr;
363		lro_init_desc(lro_desc, skb, iph, tcph, vlan_tag, vgrp);
364		LRO_INC_STATS(lro_mgr, aggregated);
365		return 0;
366	}
367
368	if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
369		goto out2;
370
371	if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
372		goto out2;
373
374	lro_add_packet(lro_desc, skb, iph, tcph);
375	LRO_INC_STATS(lro_mgr, aggregated);
376
377	if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
378	    lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
379		lro_flush(lro_mgr, lro_desc);
380
381	return 0;
382
383out2: /* send aggregated SKBs to stack */
384	lro_flush(lro_mgr, lro_desc);
385
386out:
387	return 1;
388}
389
390
391static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
392				   struct skb_frag_struct *frags,
393				   int len, int true_size,
394				   void *mac_hdr,
395				   int hlen, __wsum sum,
396				   u32 ip_summed)
397{
398	struct sk_buff *skb;
399	struct skb_frag_struct *skb_frags;
400	int data_len = len;
401	int hdr_len = min(len, hlen);
402
403	skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad);
404	if (!skb)
405		return NULL;
406
407	skb_reserve(skb, lro_mgr->frag_align_pad);
408	skb->len = len;
409	skb->data_len = len - hdr_len;
410	skb->truesize += true_size;
411	skb->tail += hdr_len;
412
413	memcpy(skb->data, mac_hdr, hdr_len);
414
415	skb_frags = skb_shinfo(skb)->frags;
416	while (data_len > 0) {
417		*skb_frags = *frags;
418		data_len -= frags->size;
419		skb_frags++;
420		frags++;
421		skb_shinfo(skb)->nr_frags++;
422	}
423
424	skb_shinfo(skb)->frags[0].page_offset += hdr_len;
425	skb_shinfo(skb)->frags[0].size -= hdr_len;
426
427	skb->ip_summed = ip_summed;
428	skb->csum = sum;
429	skb->protocol = eth_type_trans(skb, lro_mgr->dev);
430	return skb;
431}
432
433static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
434					  struct skb_frag_struct *frags,
435					  int len, int true_size,
436					  struct vlan_group *vgrp,
437					  u16 vlan_tag, void *priv, __wsum sum)
438{
439	struct net_lro_desc *lro_desc;
440	struct iphdr *iph;
441	struct tcphdr *tcph;
442	struct sk_buff *skb;
443	u64 flags;
444	void *mac_hdr;
445	int mac_hdr_len;
446	int hdr_len = LRO_MAX_PG_HLEN;
447	int vlan_hdr_len = 0;
448
449	if (!lro_mgr->get_frag_header ||
450	    lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
451				     (void *)&tcph, &flags, priv)) {
452		mac_hdr = page_address(frags->page) + frags->page_offset;
453		goto out1;
454	}
455
456	if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
457		goto out1;
458
459	hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr);
460	mac_hdr_len = (int)((void *)(iph) - mac_hdr);
461
462	lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
463	if (!lro_desc)
464		goto out1;
465
466	if (!lro_desc->active) { /* start new lro session */
467		if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL))
468			goto out1;
469
470		skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
471				  hdr_len, 0, lro_mgr->ip_summed_aggr);
472		if (!skb)
473			goto out;
474
475		if ((skb->protocol == htons(ETH_P_8021Q)) &&
476		    !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
477			vlan_hdr_len = VLAN_HLEN;
478
479		iph = (void *)(skb->data + vlan_hdr_len);
480		tcph = (void *)((u8 *)skb->data + vlan_hdr_len
481				+ IP_HDR_LEN(iph));
482
483		lro_init_desc(lro_desc, skb, iph, tcph, 0, NULL);
484		LRO_INC_STATS(lro_mgr, aggregated);
485		return NULL;
486	}
487
488	if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
489		goto out2;
490
491	if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc))
492		goto out2;
493
494	lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph);
495	LRO_INC_STATS(lro_mgr, aggregated);
496
497	if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) ||
498	    lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
499		lro_flush(lro_mgr, lro_desc);
500
501	return NULL;
502
503out2: /* send aggregated packets to the stack */
504	lro_flush(lro_mgr, lro_desc);
505
506out1:  /* Original packet has to be posted to the stack */
507	skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
508			  hdr_len, sum, lro_mgr->ip_summed);
509out:
510	return skb;
511}
512
513void lro_receive_skb(struct net_lro_mgr *lro_mgr,
514		     struct sk_buff *skb,
515		     void *priv)
516{
517	if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) {
518		if (lro_mgr->features & LRO_F_NAPI)
519			netif_receive_skb(skb);
520		else
521			netif_rx(skb);
522	}
523}
524EXPORT_SYMBOL(lro_receive_skb);
525
526void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr,
527				  struct sk_buff *skb,
528				  struct vlan_group *vgrp,
529				  u16 vlan_tag,
530				  void *priv)
531{
532	if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) {
533		if (lro_mgr->features & LRO_F_NAPI)
534			vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag);
535		else
536			vlan_hwaccel_rx(skb, vgrp, vlan_tag);
537	}
538}
539EXPORT_SYMBOL(lro_vlan_hwaccel_receive_skb);
540
541void lro_receive_frags(struct net_lro_mgr *lro_mgr,
542		       struct skb_frag_struct *frags,
543		       int len, int true_size, void *priv, __wsum sum)
544{
545	struct sk_buff *skb;
546
547	skb = __lro_proc_segment(lro_mgr, frags, len, true_size, NULL, 0,
548				 priv, sum);
549	if (!skb)
550		return;
551
552	if (lro_mgr->features & LRO_F_NAPI)
553		netif_receive_skb(skb);
554	else
555		netif_rx(skb);
556}
557EXPORT_SYMBOL(lro_receive_frags);
558
559void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr,
560				    struct skb_frag_struct *frags,
561				    int len, int true_size,
562				    struct vlan_group *vgrp,
563				    u16 vlan_tag, void *priv, __wsum sum)
564{
565	struct sk_buff *skb;
566
567	skb = __lro_proc_segment(lro_mgr, frags, len, true_size, vgrp,
568				 vlan_tag, priv, sum);
569	if (!skb)
570		return;
571
572	if (lro_mgr->features & LRO_F_NAPI)
573		vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag);
574	else
575		vlan_hwaccel_rx(skb, vgrp, vlan_tag);
576}
577EXPORT_SYMBOL(lro_vlan_hwaccel_receive_frags);
578
579void lro_flush_all(struct net_lro_mgr *lro_mgr)
580{
581	int i;
582	struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
583
584	for (i = 0; i < lro_mgr->max_desc; i++) {
585		if (lro_desc[i].active)
586			lro_flush(lro_mgr, &lro_desc[i]);
587	}
588}
589EXPORT_SYMBOL(lro_flush_all);
590
591void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
592		  struct iphdr *iph, struct tcphdr *tcph)
593{
594	struct net_lro_desc *lro_desc;
595
596	lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
597	if (lro_desc->active)
598		lro_flush(lro_mgr, lro_desc);
599}
600EXPORT_SYMBOL(lro_flush_pkt);