PageRenderTime 104ms CodeModel.GetById 15ms app.highlight 80ms RepoModel.GetById 2ms app.codeStats 0ms

/drivers/infiniband/ulp/ipoib/ipoib_multicast.c

https://bitbucket.org/evzijst/gittest
C | 991 lines | 726 code | 195 blank | 70 comment | 99 complexity | 112c47de3ed3d3668b9be27279386f59 MD5 | raw file
  1/*
  2 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  3 *
  4 * This software is available to you under a choice of one of two
  5 * licenses.  You may choose to be licensed under the terms of the GNU
  6 * General Public License (GPL) Version 2, available from the file
  7 * COPYING in the main directory of this source tree, or the
  8 * OpenIB.org BSD license below:
  9 *
 10 *     Redistribution and use in source and binary forms, with or
 11 *     without modification, are permitted provided that the following
 12 *     conditions are met:
 13 *
 14 *      - Redistributions of source code must retain the above
 15 *        copyright notice, this list of conditions and the following
 16 *        disclaimer.
 17 *
 18 *      - Redistributions in binary form must reproduce the above
 19 *        copyright notice, this list of conditions and the following
 20 *        disclaimer in the documentation and/or other materials
 21 *        provided with the distribution.
 22 *
 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 30 * SOFTWARE.
 31 *
 32 * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $
 33 */
 34
 35#include <linux/skbuff.h>
 36#include <linux/rtnetlink.h>
 37#include <linux/ip.h>
 38#include <linux/in.h>
 39#include <linux/igmp.h>
 40#include <linux/inetdevice.h>
 41#include <linux/delay.h>
 42#include <linux/completion.h>
 43
 44#include "ipoib.h"
 45
 46#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 47static int mcast_debug_level;
 48
 49module_param(mcast_debug_level, int, 0644);
 50MODULE_PARM_DESC(mcast_debug_level,
 51		 "Enable multicast debug tracing if > 0");
 52#endif
 53
 54static DECLARE_MUTEX(mcast_mutex);
 55
 56/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
 57struct ipoib_mcast {
 58	struct ib_sa_mcmember_rec mcmember;
 59	struct ipoib_ah          *ah;
 60
 61	struct rb_node    rb_node;
 62	struct list_head  list;
 63	struct completion done;
 64
 65	int                 query_id;
 66	struct ib_sa_query *query;
 67
 68	unsigned long created;
 69	unsigned long backoff;
 70
 71	unsigned long flags;
 72	unsigned char logcount;
 73
 74	struct list_head  neigh_list;
 75
 76	struct sk_buff_head pkt_queue;
 77
 78	struct net_device *dev;
 79};
 80
 81struct ipoib_mcast_iter {
 82	struct net_device *dev;
 83	union ib_gid       mgid;
 84	unsigned long      created;
 85	unsigned int       queuelen;
 86	unsigned int       complete;
 87	unsigned int       send_only;
 88};
 89
 90static void ipoib_mcast_free(struct ipoib_mcast *mcast)
 91{
 92	struct net_device *dev = mcast->dev;
 93	struct ipoib_dev_priv *priv = netdev_priv(dev);
 94	struct ipoib_neigh *neigh, *tmp;
 95	unsigned long flags;
 96	LIST_HEAD(ah_list);
 97	struct ipoib_ah *ah, *tah;
 98
 99	ipoib_dbg_mcast(netdev_priv(dev),
100			"deleting multicast group " IPOIB_GID_FMT "\n",
101			IPOIB_GID_ARG(mcast->mcmember.mgid));
102
103	spin_lock_irqsave(&priv->lock, flags);
104
105	list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) {
106		if (neigh->ah)
107			list_add_tail(&neigh->ah->list, &ah_list);
108		*to_ipoib_neigh(neigh->neighbour) = NULL;
109		neigh->neighbour->ops->destructor = NULL;
110		kfree(neigh);
111	}
112
113	spin_unlock_irqrestore(&priv->lock, flags);
114
115	list_for_each_entry_safe(ah, tah, &ah_list, list)
116		ipoib_put_ah(ah);
117
118	if (mcast->ah)
119		ipoib_put_ah(mcast->ah);
120
121	while (!skb_queue_empty(&mcast->pkt_queue)) {
122		struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
123
124		skb->dev = dev;
125		dev_kfree_skb_any(skb);
126	}
127
128	kfree(mcast);
129}
130
131static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
132					     int can_sleep)
133{
134	struct ipoib_mcast *mcast;
135
136	mcast = kmalloc(sizeof (*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC);
137	if (!mcast)
138		return NULL;
139
140	memset(mcast, 0, sizeof (*mcast));
141
142	init_completion(&mcast->done);
143
144	mcast->dev = dev;
145	mcast->created = jiffies;
146	mcast->backoff = HZ;
147	mcast->logcount = 0;
148
149	INIT_LIST_HEAD(&mcast->list);
150	INIT_LIST_HEAD(&mcast->neigh_list);
151	skb_queue_head_init(&mcast->pkt_queue);
152
153	mcast->ah    = NULL;
154	mcast->query = NULL;
155
156	return mcast;
157}
158
159static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, union ib_gid *mgid)
160{
161	struct ipoib_dev_priv *priv = netdev_priv(dev);
162	struct rb_node *n = priv->multicast_tree.rb_node;
163
164	while (n) {
165		struct ipoib_mcast *mcast;
166		int ret;
167
168		mcast = rb_entry(n, struct ipoib_mcast, rb_node);
169
170		ret = memcmp(mgid->raw, mcast->mcmember.mgid.raw,
171			     sizeof (union ib_gid));
172		if (ret < 0)
173			n = n->rb_left;
174		else if (ret > 0)
175			n = n->rb_right;
176		else
177			return mcast;
178	}
179
180	return NULL;
181}
182
183static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast)
184{
185	struct ipoib_dev_priv *priv = netdev_priv(dev);
186	struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
187
188	while (*n) {
189		struct ipoib_mcast *tmcast;
190		int ret;
191
192		pn = *n;
193		tmcast = rb_entry(pn, struct ipoib_mcast, rb_node);
194
195		ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw,
196			     sizeof (union ib_gid));
197		if (ret < 0)
198			n = &pn->rb_left;
199		else if (ret > 0)
200			n = &pn->rb_right;
201		else
202			return -EEXIST;
203	}
204
205	rb_link_node(&mcast->rb_node, pn, n);
206	rb_insert_color(&mcast->rb_node, &priv->multicast_tree);
207
208	return 0;
209}
210
211static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
212				   struct ib_sa_mcmember_rec *mcmember)
213{
214	struct net_device *dev = mcast->dev;
215	struct ipoib_dev_priv *priv = netdev_priv(dev);
216	int ret;
217
218	mcast->mcmember = *mcmember;
219
220	/* Set the cached Q_Key before we attach if it's the broadcast group */
221	if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
222		    sizeof (union ib_gid))) {
223		priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
224		priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
225	}
226
227	if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
228		if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
229			ipoib_warn(priv, "multicast group " IPOIB_GID_FMT
230				   " already attached\n",
231				   IPOIB_GID_ARG(mcast->mcmember.mgid));
232
233			return 0;
234		}
235
236		ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
237					 &mcast->mcmember.mgid);
238		if (ret < 0) {
239			ipoib_warn(priv, "couldn't attach QP to multicast group "
240				   IPOIB_GID_FMT "\n",
241				   IPOIB_GID_ARG(mcast->mcmember.mgid));
242
243			clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
244			return ret;
245		}
246	}
247
248	{
249		struct ib_ah_attr av = {
250			.dlid	       = be16_to_cpu(mcast->mcmember.mlid),
251			.port_num      = priv->port,
252			.sl	       = mcast->mcmember.sl,
253			.ah_flags      = IB_AH_GRH,
254			.grh	       = {
255				.flow_label    = be32_to_cpu(mcast->mcmember.flow_label),
256				.hop_limit     = mcast->mcmember.hop_limit,
257				.sgid_index    = 0,
258				.traffic_class = mcast->mcmember.traffic_class
259			}
260		};
261
262		av.grh.dgid = mcast->mcmember.mgid;
263
264		if (ib_sa_rate_enum_to_int(mcast->mcmember.rate) > 0)
265			av.static_rate = (2 * priv->local_rate -
266					  ib_sa_rate_enum_to_int(mcast->mcmember.rate) - 1) /
267				(priv->local_rate ? priv->local_rate : 1);
268
269		ipoib_dbg_mcast(priv, "static_rate %d for local port %dX, mcmember %dX\n",
270				av.static_rate, priv->local_rate,
271				ib_sa_rate_enum_to_int(mcast->mcmember.rate));
272
273		mcast->ah = ipoib_create_ah(dev, priv->pd, &av);
274		if (!mcast->ah) {
275			ipoib_warn(priv, "ib_address_create failed\n");
276		} else {
277			ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT
278					" AV %p, LID 0x%04x, SL %d\n",
279					IPOIB_GID_ARG(mcast->mcmember.mgid),
280					mcast->ah->ah,
281					be16_to_cpu(mcast->mcmember.mlid),
282					mcast->mcmember.sl);
283		}
284	}
285
286	/* actually send any queued packets */
287	while (!skb_queue_empty(&mcast->pkt_queue)) {
288		struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
289
290		skb->dev = dev;
291
292		if (!skb->dst || !skb->dst->neighbour) {
293			/* put pseudoheader back on for next time */
294			skb_push(skb, sizeof (struct ipoib_pseudoheader));
295		}
296
297		if (dev_queue_xmit(skb))
298			ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
299	}
300
301	return 0;
302}
303
304static void
305ipoib_mcast_sendonly_join_complete(int status,
306				   struct ib_sa_mcmember_rec *mcmember,
307				   void *mcast_ptr)
308{
309	struct ipoib_mcast *mcast = mcast_ptr;
310	struct net_device *dev = mcast->dev;
311
312	if (!status)
313		ipoib_mcast_join_finish(mcast, mcmember);
314	else {
315		if (mcast->logcount++ < 20)
316			ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for "
317					IPOIB_GID_FMT ", status %d\n",
318					IPOIB_GID_ARG(mcast->mcmember.mgid), status);
319
320		/* Flush out any queued packets */
321		while (!skb_queue_empty(&mcast->pkt_queue)) {
322			struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
323
324			skb->dev = dev;
325
326			dev_kfree_skb_any(skb);
327		}
328
329		/* Clear the busy flag so we try again */
330		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
331	}
332
333	complete(&mcast->done);
334}
335
336static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
337{
338	struct net_device *dev = mcast->dev;
339	struct ipoib_dev_priv *priv = netdev_priv(dev);
340	struct ib_sa_mcmember_rec rec = {
341#if 0				/* Some SMs don't support send-only yet */
342		.join_state = 4
343#else
344		.join_state = 1
345#endif
346	};
347	int ret = 0;
348
349	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
350		ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
351		return -ENODEV;
352	}
353
354	if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
355		ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
356		return -EBUSY;
357	}
358
359	rec.mgid     = mcast->mcmember.mgid;
360	rec.port_gid = priv->local_gid;
361	rec.pkey     = be16_to_cpu(priv->pkey);
362
363	ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec,
364				     IB_SA_MCMEMBER_REC_MGID		|
365				     IB_SA_MCMEMBER_REC_PORT_GID	|
366				     IB_SA_MCMEMBER_REC_PKEY		|
367				     IB_SA_MCMEMBER_REC_JOIN_STATE,
368				     1000, GFP_ATOMIC,
369				     ipoib_mcast_sendonly_join_complete,
370				     mcast, &mcast->query);
371	if (ret < 0) {
372		ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n",
373			   ret);
374	} else {
375		ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT
376				", starting join\n",
377				IPOIB_GID_ARG(mcast->mcmember.mgid));
378
379		mcast->query_id = ret;
380	}
381
382	return ret;
383}
384
385static void ipoib_mcast_join_complete(int status,
386				      struct ib_sa_mcmember_rec *mcmember,
387				      void *mcast_ptr)
388{
389	struct ipoib_mcast *mcast = mcast_ptr;
390	struct net_device *dev = mcast->dev;
391	struct ipoib_dev_priv *priv = netdev_priv(dev);
392
393	ipoib_dbg_mcast(priv, "join completion for " IPOIB_GID_FMT
394			" (status %d)\n",
395			IPOIB_GID_ARG(mcast->mcmember.mgid), status);
396
397	if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) {
398		mcast->backoff = HZ;
399		down(&mcast_mutex);
400		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
401			queue_work(ipoib_workqueue, &priv->mcast_task);
402		up(&mcast_mutex);
403		complete(&mcast->done);
404		return;
405	}
406
407	if (status == -EINTR) {
408		complete(&mcast->done);
409		return;
410	}
411
412	if (status && mcast->logcount++ < 20) {
413		if (status == -ETIMEDOUT || status == -EINTR) {
414			ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT
415					", status %d\n",
416					IPOIB_GID_ARG(mcast->mcmember.mgid),
417					status);
418		} else {
419			ipoib_warn(priv, "multicast join failed for "
420				   IPOIB_GID_FMT ", status %d\n",
421				   IPOIB_GID_ARG(mcast->mcmember.mgid),
422				   status);
423		}
424	}
425
426	mcast->backoff *= 2;
427	if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
428		mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
429
430	mcast->query = NULL;
431
432	down(&mcast_mutex);
433	if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
434		if (status == -ETIMEDOUT)
435			queue_work(ipoib_workqueue, &priv->mcast_task);
436		else
437			queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
438					   mcast->backoff * HZ);
439	} else
440		complete(&mcast->done);
441	up(&mcast_mutex);
442
443	return;
444}
445
446static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
447			     int create)
448{
449	struct ipoib_dev_priv *priv = netdev_priv(dev);
450	struct ib_sa_mcmember_rec rec = {
451		.join_state = 1
452	};
453	ib_sa_comp_mask comp_mask;
454	int ret = 0;
455
456	ipoib_dbg_mcast(priv, "joining MGID " IPOIB_GID_FMT "\n",
457			IPOIB_GID_ARG(mcast->mcmember.mgid));
458
459	rec.mgid     = mcast->mcmember.mgid;
460	rec.port_gid = priv->local_gid;
461	rec.pkey     = be16_to_cpu(priv->pkey);
462
463	comp_mask =
464		IB_SA_MCMEMBER_REC_MGID		|
465		IB_SA_MCMEMBER_REC_PORT_GID	|
466		IB_SA_MCMEMBER_REC_PKEY		|
467		IB_SA_MCMEMBER_REC_JOIN_STATE;
468
469	if (create) {
470		comp_mask |=
471			IB_SA_MCMEMBER_REC_QKEY		|
472			IB_SA_MCMEMBER_REC_SL		|
473			IB_SA_MCMEMBER_REC_FLOW_LABEL	|
474			IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
475
476		rec.qkey	  = priv->broadcast->mcmember.qkey;
477		rec.sl		  = priv->broadcast->mcmember.sl;
478		rec.flow_label	  = priv->broadcast->mcmember.flow_label;
479		rec.traffic_class = priv->broadcast->mcmember.traffic_class;
480	}
481
482	ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask,
483				     mcast->backoff * 1000, GFP_ATOMIC,
484				     ipoib_mcast_join_complete,
485				     mcast, &mcast->query);
486
487	if (ret < 0) {
488		ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret);
489
490		mcast->backoff *= 2;
491		if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
492			mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
493
494		down(&mcast_mutex);
495		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
496			queue_delayed_work(ipoib_workqueue,
497					   &priv->mcast_task,
498					   mcast->backoff);
499		up(&mcast_mutex);
500	} else
501		mcast->query_id = ret;
502}
503
504void ipoib_mcast_join_task(void *dev_ptr)
505{
506	struct net_device *dev = dev_ptr;
507	struct ipoib_dev_priv *priv = netdev_priv(dev);
508
509	if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
510		return;
511
512	if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
513		ipoib_warn(priv, "ib_gid_entry_get() failed\n");
514	else
515		memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
516
517	{
518		struct ib_port_attr attr;
519
520		if (!ib_query_port(priv->ca, priv->port, &attr)) {
521			priv->local_lid  = attr.lid;
522			priv->local_rate = attr.active_speed *
523				ib_width_enum_to_int(attr.active_width);
524		} else
525			ipoib_warn(priv, "ib_query_port failed\n");
526	}
527
528	if (!priv->broadcast) {
529		priv->broadcast = ipoib_mcast_alloc(dev, 1);
530		if (!priv->broadcast) {
531			ipoib_warn(priv, "failed to allocate broadcast group\n");
532			down(&mcast_mutex);
533			if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
534				queue_delayed_work(ipoib_workqueue,
535						   &priv->mcast_task, HZ);
536			up(&mcast_mutex);
537			return;
538		}
539
540		memcpy(priv->broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
541		       sizeof (union ib_gid));
542
543		spin_lock_irq(&priv->lock);
544		__ipoib_mcast_add(dev, priv->broadcast);
545		spin_unlock_irq(&priv->lock);
546	}
547
548	if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
549		ipoib_mcast_join(dev, priv->broadcast, 0);
550		return;
551	}
552
553	while (1) {
554		struct ipoib_mcast *mcast = NULL;
555
556		spin_lock_irq(&priv->lock);
557		list_for_each_entry(mcast, &priv->multicast_list, list) {
558			if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
559			    && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
560			    && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
561				/* Found the next unjoined group */
562				break;
563			}
564		}
565		spin_unlock_irq(&priv->lock);
566
567		if (&mcast->list == &priv->multicast_list) {
568			/* All done */
569			break;
570		}
571
572		ipoib_mcast_join(dev, mcast, 1);
573		return;
574	}
575
576	priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) -
577		IPOIB_ENCAP_LEN;
578	dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
579
580	ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
581
582	clear_bit(IPOIB_MCAST_RUN, &priv->flags);
583	netif_carrier_on(dev);
584}
585
586int ipoib_mcast_start_thread(struct net_device *dev)
587{
588	struct ipoib_dev_priv *priv = netdev_priv(dev);
589
590	ipoib_dbg_mcast(priv, "starting multicast thread\n");
591
592	down(&mcast_mutex);
593	if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
594		queue_work(ipoib_workqueue, &priv->mcast_task);
595	up(&mcast_mutex);
596
597	return 0;
598}
599
600int ipoib_mcast_stop_thread(struct net_device *dev)
601{
602	struct ipoib_dev_priv *priv = netdev_priv(dev);
603	struct ipoib_mcast *mcast;
604
605	ipoib_dbg_mcast(priv, "stopping multicast thread\n");
606
607	down(&mcast_mutex);
608	clear_bit(IPOIB_MCAST_RUN, &priv->flags);
609	cancel_delayed_work(&priv->mcast_task);
610	up(&mcast_mutex);
611
612	flush_workqueue(ipoib_workqueue);
613
614	if (priv->broadcast && priv->broadcast->query) {
615		ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query);
616		priv->broadcast->query = NULL;
617		ipoib_dbg_mcast(priv, "waiting for bcast\n");
618		wait_for_completion(&priv->broadcast->done);
619	}
620
621	list_for_each_entry(mcast, &priv->multicast_list, list) {
622		if (mcast->query) {
623			ib_sa_cancel_query(mcast->query_id, mcast->query);
624			mcast->query = NULL;
625			ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
626					IPOIB_GID_ARG(mcast->mcmember.mgid));
627			wait_for_completion(&mcast->done);
628		}
629	}
630
631	return 0;
632}
633
634static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
635{
636	struct ipoib_dev_priv *priv = netdev_priv(dev);
637	struct ib_sa_mcmember_rec rec = {
638		.join_state = 1
639	};
640	int ret = 0;
641
642	if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags))
643		return 0;
644
645	ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
646			IPOIB_GID_ARG(mcast->mcmember.mgid));
647
648	rec.mgid     = mcast->mcmember.mgid;
649	rec.port_gid = priv->local_gid;
650	rec.pkey     = be16_to_cpu(priv->pkey);
651
652	/* Remove ourselves from the multicast group */
653	ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
654				 &mcast->mcmember.mgid);
655	if (ret)
656		ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
657
658	/*
659	 * Just make one shot at leaving and don't wait for a reply;
660	 * if we fail, too bad.
661	 */
662	ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec,
663					IB_SA_MCMEMBER_REC_MGID		|
664					IB_SA_MCMEMBER_REC_PORT_GID	|
665					IB_SA_MCMEMBER_REC_PKEY		|
666					IB_SA_MCMEMBER_REC_JOIN_STATE,
667					0, GFP_ATOMIC, NULL,
668					mcast, &mcast->query);
669	if (ret < 0)
670		ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed "
671			   "for leave (result = %d)\n", ret);
672
673	return 0;
674}
675
676void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid,
677		      struct sk_buff *skb)
678{
679	struct ipoib_dev_priv *priv = netdev_priv(dev);
680	struct ipoib_mcast *mcast;
681
682	/*
683	 * We can only be called from ipoib_start_xmit, so we're
684	 * inside tx_lock -- no need to save/restore flags.
685	 */
686	spin_lock(&priv->lock);
687
688	mcast = __ipoib_mcast_find(dev, mgid);
689	if (!mcast) {
690		/* Let's create a new send only group now */
691		ipoib_dbg_mcast(priv, "setting up send only multicast group for "
692				IPOIB_GID_FMT "\n", IPOIB_GID_ARG(*mgid));
693
694		mcast = ipoib_mcast_alloc(dev, 0);
695		if (!mcast) {
696			ipoib_warn(priv, "unable to allocate memory for "
697				   "multicast structure\n");
698			dev_kfree_skb_any(skb);
699			goto out;
700		}
701
702		set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
703		mcast->mcmember.mgid = *mgid;
704		__ipoib_mcast_add(dev, mcast);
705		list_add_tail(&mcast->list, &priv->multicast_list);
706	}
707
708	if (!mcast->ah) {
709		if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
710			skb_queue_tail(&mcast->pkt_queue, skb);
711		else
712			dev_kfree_skb_any(skb);
713
714		if (mcast->query)
715			ipoib_dbg_mcast(priv, "no address vector, "
716					"but multicast join already started\n");
717		else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
718			ipoib_mcast_sendonly_join(mcast);
719
720		/*
721		 * If lookup completes between here and out:, don't
722		 * want to send packet twice.
723		 */
724		mcast = NULL;
725	}
726
727out:
728	if (mcast && mcast->ah) {
729		if (skb->dst            &&
730		    skb->dst->neighbour &&
731		    !*to_ipoib_neigh(skb->dst->neighbour)) {
732			struct ipoib_neigh *neigh = kmalloc(sizeof *neigh, GFP_ATOMIC);
733
734			if (neigh) {
735				kref_get(&mcast->ah->ref);
736				neigh->ah  	= mcast->ah;
737				neigh->neighbour = skb->dst->neighbour;
738				*to_ipoib_neigh(skb->dst->neighbour) = neigh;
739				list_add_tail(&neigh->list, &mcast->neigh_list);
740			}
741		}
742
743		ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
744	}
745
746	spin_unlock(&priv->lock);
747}
748
749void ipoib_mcast_dev_flush(struct net_device *dev)
750{
751	struct ipoib_dev_priv *priv = netdev_priv(dev);
752	LIST_HEAD(remove_list);
753	struct ipoib_mcast *mcast, *tmcast, *nmcast;
754	unsigned long flags;
755
756	ipoib_dbg_mcast(priv, "flushing multicast list\n");
757
758	spin_lock_irqsave(&priv->lock, flags);
759	list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
760		nmcast = ipoib_mcast_alloc(dev, 0);
761		if (nmcast) {
762			nmcast->flags =
763				mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY);
764
765			nmcast->mcmember.mgid = mcast->mcmember.mgid;
766
767			/* Add the new group in before the to-be-destroyed group */
768			list_add_tail(&nmcast->list, &mcast->list);
769			list_del_init(&mcast->list);
770
771			rb_replace_node(&mcast->rb_node, &nmcast->rb_node,
772					&priv->multicast_tree);
773
774			list_add_tail(&mcast->list, &remove_list);
775		} else {
776			ipoib_warn(priv, "could not reallocate multicast group "
777				   IPOIB_GID_FMT "\n",
778				   IPOIB_GID_ARG(mcast->mcmember.mgid));
779		}
780	}
781
782	if (priv->broadcast) {
783		nmcast = ipoib_mcast_alloc(dev, 0);
784		if (nmcast) {
785			nmcast->mcmember.mgid = priv->broadcast->mcmember.mgid;
786
787			rb_replace_node(&priv->broadcast->rb_node,
788					&nmcast->rb_node,
789					&priv->multicast_tree);
790
791			list_add_tail(&priv->broadcast->list, &remove_list);
792		}
793
794		priv->broadcast = nmcast;
795	}
796
797	spin_unlock_irqrestore(&priv->lock, flags);
798
799	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
800		ipoib_mcast_leave(dev, mcast);
801		ipoib_mcast_free(mcast);
802	}
803}
804
805void ipoib_mcast_dev_down(struct net_device *dev)
806{
807	struct ipoib_dev_priv *priv = netdev_priv(dev);
808	unsigned long flags;
809
810	/* Delete broadcast since it will be recreated */
811	if (priv->broadcast) {
812		ipoib_dbg_mcast(priv, "deleting broadcast group\n");
813
814		spin_lock_irqsave(&priv->lock, flags);
815		rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
816		spin_unlock_irqrestore(&priv->lock, flags);
817		ipoib_mcast_leave(dev, priv->broadcast);
818		ipoib_mcast_free(priv->broadcast);
819		priv->broadcast = NULL;
820	}
821}
822
823void ipoib_mcast_restart_task(void *dev_ptr)
824{
825	struct net_device *dev = dev_ptr;
826	struct ipoib_dev_priv *priv = netdev_priv(dev);
827	struct dev_mc_list *mclist;
828	struct ipoib_mcast *mcast, *tmcast;
829	LIST_HEAD(remove_list);
830	unsigned long flags;
831
832	ipoib_dbg_mcast(priv, "restarting multicast task\n");
833
834	ipoib_mcast_stop_thread(dev);
835
836	spin_lock_irqsave(&priv->lock, flags);
837
838	/*
839	 * Unfortunately, the networking core only gives us a list of all of
840	 * the multicast hardware addresses. We need to figure out which ones
841	 * are new and which ones have been removed
842	 */
843
844	/* Clear out the found flag */
845	list_for_each_entry(mcast, &priv->multicast_list, list)
846		clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
847
848	/* Mark all of the entries that are found or don't exist */
849	for (mclist = dev->mc_list; mclist; mclist = mclist->next) {
850		union ib_gid mgid;
851
852		memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid);
853
854		/* Add in the P_Key */
855		mgid.raw[4] = (priv->pkey >> 8) & 0xff;
856		mgid.raw[5] = priv->pkey & 0xff;
857
858		mcast = __ipoib_mcast_find(dev, &mgid);
859		if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
860			struct ipoib_mcast *nmcast;
861
862			/* Not found or send-only group, let's add a new entry */
863			ipoib_dbg_mcast(priv, "adding multicast entry for mgid "
864					IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid));
865
866			nmcast = ipoib_mcast_alloc(dev, 0);
867			if (!nmcast) {
868				ipoib_warn(priv, "unable to allocate memory for multicast structure\n");
869				continue;
870			}
871
872			set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags);
873
874			nmcast->mcmember.mgid = mgid;
875
876			if (mcast) {
877				/* Destroy the send only entry */
878				list_del(&mcast->list);
879				list_add_tail(&mcast->list, &remove_list);
880
881				rb_replace_node(&mcast->rb_node,
882						&nmcast->rb_node,
883						&priv->multicast_tree);
884			} else
885				__ipoib_mcast_add(dev, nmcast);
886
887			list_add_tail(&nmcast->list, &priv->multicast_list);
888		}
889
890		if (mcast)
891			set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
892	}
893
894	/* Remove all of the entries don't exist anymore */
895	list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
896		if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) &&
897		    !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
898			ipoib_dbg_mcast(priv, "deleting multicast group " IPOIB_GID_FMT "\n",
899					IPOIB_GID_ARG(mcast->mcmember.mgid));
900
901			rb_erase(&mcast->rb_node, &priv->multicast_tree);
902
903			/* Move to the remove list */
904			list_del(&mcast->list);
905			list_add_tail(&mcast->list, &remove_list);
906		}
907	}
908	spin_unlock_irqrestore(&priv->lock, flags);
909
910	/* We have to cancel outside of the spinlock */
911	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
912		ipoib_mcast_leave(mcast->dev, mcast);
913		ipoib_mcast_free(mcast);
914	}
915
916	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
917		ipoib_mcast_start_thread(dev);
918}
919
920struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
921{
922	struct ipoib_mcast_iter *iter;
923
924	iter = kmalloc(sizeof *iter, GFP_KERNEL);
925	if (!iter)
926		return NULL;
927
928	iter->dev = dev;
929	memset(iter->mgid.raw, 0, sizeof iter->mgid);
930
931	if (ipoib_mcast_iter_next(iter)) {
932		ipoib_mcast_iter_free(iter);
933		return NULL;
934	}
935
936	return iter;
937}
938
939void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter)
940{
941	kfree(iter);
942}
943
944int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
945{
946	struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
947	struct rb_node *n;
948	struct ipoib_mcast *mcast;
949	int ret = 1;
950
951	spin_lock_irq(&priv->lock);
952
953	n = rb_first(&priv->multicast_tree);
954
955	while (n) {
956		mcast = rb_entry(n, struct ipoib_mcast, rb_node);
957
958		if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw,
959			   sizeof (union ib_gid)) < 0) {
960			iter->mgid      = mcast->mcmember.mgid;
961			iter->created   = mcast->created;
962			iter->queuelen  = skb_queue_len(&mcast->pkt_queue);
963			iter->complete  = !!mcast->ah;
964			iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY));
965
966			ret = 0;
967
968			break;
969		}
970
971		n = rb_next(n);
972	}
973
974	spin_unlock_irq(&priv->lock);
975
976	return ret;
977}
978
979void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
980			   union ib_gid *mgid,
981			   unsigned long *created,
982			   unsigned int *queuelen,
983			   unsigned int *complete,
984			   unsigned int *send_only)
985{
986	*mgid      = iter->mgid;
987	*created   = iter->created;
988	*queuelen  = iter->queuelen;
989	*complete  = iter->complete;
990	*send_only = iter->send_only;
991}