/net/core/fib_rules.c

http://github.com/mirrors/linux · C · 1275 lines · 1014 code · 235 blank · 26 comment · 290 complexity · 8ba3cf8a5260be48e87ba4d67eb62171 MD5 · raw file

  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * net/core/fib_rules.c Generic Routing Rules
  4. *
  5. * Authors: Thomas Graf <tgraf@suug.ch>
  6. */
  7. #include <linux/types.h>
  8. #include <linux/kernel.h>
  9. #include <linux/slab.h>
  10. #include <linux/list.h>
  11. #include <linux/module.h>
  12. #include <net/net_namespace.h>
  13. #include <net/sock.h>
  14. #include <net/fib_rules.h>
  15. #include <net/ip_tunnels.h>
  16. static const struct fib_kuid_range fib_kuid_range_unset = {
  17. KUIDT_INIT(0),
  18. KUIDT_INIT(~0),
  19. };
  20. bool fib_rule_matchall(const struct fib_rule *rule)
  21. {
  22. if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id ||
  23. rule->flags)
  24. return false;
  25. if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1)
  26. return false;
  27. if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
  28. !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
  29. return false;
  30. if (fib_rule_port_range_set(&rule->sport_range))
  31. return false;
  32. if (fib_rule_port_range_set(&rule->dport_range))
  33. return false;
  34. return true;
  35. }
  36. EXPORT_SYMBOL_GPL(fib_rule_matchall);
  37. int fib_default_rule_add(struct fib_rules_ops *ops,
  38. u32 pref, u32 table, u32 flags)
  39. {
  40. struct fib_rule *r;
  41. r = kzalloc(ops->rule_size, GFP_KERNEL);
  42. if (r == NULL)
  43. return -ENOMEM;
  44. refcount_set(&r->refcnt, 1);
  45. r->action = FR_ACT_TO_TBL;
  46. r->pref = pref;
  47. r->table = table;
  48. r->flags = flags;
  49. r->proto = RTPROT_KERNEL;
  50. r->fr_net = ops->fro_net;
  51. r->uid_range = fib_kuid_range_unset;
  52. r->suppress_prefixlen = -1;
  53. r->suppress_ifgroup = -1;
  54. /* The lock is not required here, the list in unreacheable
  55. * at the moment this function is called */
  56. list_add_tail(&r->list, &ops->rules_list);
  57. return 0;
  58. }
  59. EXPORT_SYMBOL(fib_default_rule_add);
  60. static u32 fib_default_rule_pref(struct fib_rules_ops *ops)
  61. {
  62. struct list_head *pos;
  63. struct fib_rule *rule;
  64. if (!list_empty(&ops->rules_list)) {
  65. pos = ops->rules_list.next;
  66. if (pos->next != &ops->rules_list) {
  67. rule = list_entry(pos->next, struct fib_rule, list);
  68. if (rule->pref)
  69. return rule->pref - 1;
  70. }
  71. }
  72. return 0;
  73. }
  74. static void notify_rule_change(int event, struct fib_rule *rule,
  75. struct fib_rules_ops *ops, struct nlmsghdr *nlh,
  76. u32 pid);
  77. static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
  78. {
  79. struct fib_rules_ops *ops;
  80. rcu_read_lock();
  81. list_for_each_entry_rcu(ops, &net->rules_ops, list) {
  82. if (ops->family == family) {
  83. if (!try_module_get(ops->owner))
  84. ops = NULL;
  85. rcu_read_unlock();
  86. return ops;
  87. }
  88. }
  89. rcu_read_unlock();
  90. return NULL;
  91. }
  92. static void rules_ops_put(struct fib_rules_ops *ops)
  93. {
  94. if (ops)
  95. module_put(ops->owner);
  96. }
  97. static void flush_route_cache(struct fib_rules_ops *ops)
  98. {
  99. if (ops->flush_cache)
  100. ops->flush_cache(ops);
  101. }
  102. static int __fib_rules_register(struct fib_rules_ops *ops)
  103. {
  104. int err = -EEXIST;
  105. struct fib_rules_ops *o;
  106. struct net *net;
  107. net = ops->fro_net;
  108. if (ops->rule_size < sizeof(struct fib_rule))
  109. return -EINVAL;
  110. if (ops->match == NULL || ops->configure == NULL ||
  111. ops->compare == NULL || ops->fill == NULL ||
  112. ops->action == NULL)
  113. return -EINVAL;
  114. spin_lock(&net->rules_mod_lock);
  115. list_for_each_entry(o, &net->rules_ops, list)
  116. if (ops->family == o->family)
  117. goto errout;
  118. list_add_tail_rcu(&ops->list, &net->rules_ops);
  119. err = 0;
  120. errout:
  121. spin_unlock(&net->rules_mod_lock);
  122. return err;
  123. }
  124. struct fib_rules_ops *
  125. fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
  126. {
  127. struct fib_rules_ops *ops;
  128. int err;
  129. ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
  130. if (ops == NULL)
  131. return ERR_PTR(-ENOMEM);
  132. INIT_LIST_HEAD(&ops->rules_list);
  133. ops->fro_net = net;
  134. err = __fib_rules_register(ops);
  135. if (err) {
  136. kfree(ops);
  137. ops = ERR_PTR(err);
  138. }
  139. return ops;
  140. }
  141. EXPORT_SYMBOL_GPL(fib_rules_register);
  142. static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
  143. {
  144. struct fib_rule *rule, *tmp;
  145. list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
  146. list_del_rcu(&rule->list);
  147. if (ops->delete)
  148. ops->delete(rule);
  149. fib_rule_put(rule);
  150. }
  151. }
  152. void fib_rules_unregister(struct fib_rules_ops *ops)
  153. {
  154. struct net *net = ops->fro_net;
  155. spin_lock(&net->rules_mod_lock);
  156. list_del_rcu(&ops->list);
  157. spin_unlock(&net->rules_mod_lock);
  158. fib_rules_cleanup_ops(ops);
  159. kfree_rcu(ops, rcu);
  160. }
  161. EXPORT_SYMBOL_GPL(fib_rules_unregister);
  162. static int uid_range_set(struct fib_kuid_range *range)
  163. {
  164. return uid_valid(range->start) && uid_valid(range->end);
  165. }
  166. static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb)
  167. {
  168. struct fib_rule_uid_range *in;
  169. struct fib_kuid_range out;
  170. in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]);
  171. out.start = make_kuid(current_user_ns(), in->start);
  172. out.end = make_kuid(current_user_ns(), in->end);
  173. return out;
  174. }
  175. static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
  176. {
  177. struct fib_rule_uid_range out = {
  178. from_kuid_munged(current_user_ns(), range->start),
  179. from_kuid_munged(current_user_ns(), range->end)
  180. };
  181. return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
  182. }
  183. static int nla_get_port_range(struct nlattr *pattr,
  184. struct fib_rule_port_range *port_range)
  185. {
  186. const struct fib_rule_port_range *pr = nla_data(pattr);
  187. if (!fib_rule_port_range_valid(pr))
  188. return -EINVAL;
  189. port_range->start = pr->start;
  190. port_range->end = pr->end;
  191. return 0;
  192. }
  193. static int nla_put_port_range(struct sk_buff *skb, int attrtype,
  194. struct fib_rule_port_range *range)
  195. {
  196. return nla_put(skb, attrtype, sizeof(*range), range);
  197. }
  198. static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
  199. struct flowi *fl, int flags,
  200. struct fib_lookup_arg *arg)
  201. {
  202. int ret = 0;
  203. if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
  204. goto out;
  205. if (rule->oifindex && (rule->oifindex != fl->flowi_oif))
  206. goto out;
  207. if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
  208. goto out;
  209. if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
  210. goto out;
  211. if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
  212. goto out;
  213. if (uid_lt(fl->flowi_uid, rule->uid_range.start) ||
  214. uid_gt(fl->flowi_uid, rule->uid_range.end))
  215. goto out;
  216. ret = ops->match(rule, fl, flags);
  217. out:
  218. return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
  219. }
  220. int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
  221. int flags, struct fib_lookup_arg *arg)
  222. {
  223. struct fib_rule *rule;
  224. int err;
  225. rcu_read_lock();
  226. list_for_each_entry_rcu(rule, &ops->rules_list, list) {
  227. jumped:
  228. if (!fib_rule_match(rule, ops, fl, flags, arg))
  229. continue;
  230. if (rule->action == FR_ACT_GOTO) {
  231. struct fib_rule *target;
  232. target = rcu_dereference(rule->ctarget);
  233. if (target == NULL) {
  234. continue;
  235. } else {
  236. rule = target;
  237. goto jumped;
  238. }
  239. } else if (rule->action == FR_ACT_NOP)
  240. continue;
  241. else
  242. err = ops->action(rule, fl, flags, arg);
  243. if (!err && ops->suppress && ops->suppress(rule, arg))
  244. continue;
  245. if (err != -EAGAIN) {
  246. if ((arg->flags & FIB_LOOKUP_NOREF) ||
  247. likely(refcount_inc_not_zero(&rule->refcnt))) {
  248. arg->rule = rule;
  249. goto out;
  250. }
  251. break;
  252. }
  253. }
  254. err = -ESRCH;
  255. out:
  256. rcu_read_unlock();
  257. return err;
  258. }
  259. EXPORT_SYMBOL_GPL(fib_rules_lookup);
  260. static int call_fib_rule_notifier(struct notifier_block *nb,
  261. enum fib_event_type event_type,
  262. struct fib_rule *rule, int family,
  263. struct netlink_ext_ack *extack)
  264. {
  265. struct fib_rule_notifier_info info = {
  266. .info.family = family,
  267. .info.extack = extack,
  268. .rule = rule,
  269. };
  270. return call_fib_notifier(nb, event_type, &info.info);
  271. }
  272. static int call_fib_rule_notifiers(struct net *net,
  273. enum fib_event_type event_type,
  274. struct fib_rule *rule,
  275. struct fib_rules_ops *ops,
  276. struct netlink_ext_ack *extack)
  277. {
  278. struct fib_rule_notifier_info info = {
  279. .info.family = ops->family,
  280. .info.extack = extack,
  281. .rule = rule,
  282. };
  283. ops->fib_rules_seq++;
  284. return call_fib_notifiers(net, event_type, &info.info);
  285. }
  286. /* Called with rcu_read_lock() */
  287. int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
  288. struct netlink_ext_ack *extack)
  289. {
  290. struct fib_rules_ops *ops;
  291. struct fib_rule *rule;
  292. int err = 0;
  293. ops = lookup_rules_ops(net, family);
  294. if (!ops)
  295. return -EAFNOSUPPORT;
  296. list_for_each_entry_rcu(rule, &ops->rules_list, list) {
  297. err = call_fib_rule_notifier(nb, FIB_EVENT_RULE_ADD,
  298. rule, family, extack);
  299. if (err)
  300. break;
  301. }
  302. rules_ops_put(ops);
  303. return err;
  304. }
  305. EXPORT_SYMBOL_GPL(fib_rules_dump);
  306. unsigned int fib_rules_seq_read(struct net *net, int family)
  307. {
  308. unsigned int fib_rules_seq;
  309. struct fib_rules_ops *ops;
  310. ASSERT_RTNL();
  311. ops = lookup_rules_ops(net, family);
  312. if (!ops)
  313. return 0;
  314. fib_rules_seq = ops->fib_rules_seq;
  315. rules_ops_put(ops);
  316. return fib_rules_seq;
  317. }
  318. EXPORT_SYMBOL_GPL(fib_rules_seq_read);
  319. static struct fib_rule *rule_find(struct fib_rules_ops *ops,
  320. struct fib_rule_hdr *frh,
  321. struct nlattr **tb,
  322. struct fib_rule *rule,
  323. bool user_priority)
  324. {
  325. struct fib_rule *r;
  326. list_for_each_entry(r, &ops->rules_list, list) {
  327. if (rule->action && r->action != rule->action)
  328. continue;
  329. if (rule->table && r->table != rule->table)
  330. continue;
  331. if (user_priority && r->pref != rule->pref)
  332. continue;
  333. if (rule->iifname[0] &&
  334. memcmp(r->iifname, rule->iifname, IFNAMSIZ))
  335. continue;
  336. if (rule->oifname[0] &&
  337. memcmp(r->oifname, rule->oifname, IFNAMSIZ))
  338. continue;
  339. if (rule->mark && r->mark != rule->mark)
  340. continue;
  341. if (rule->suppress_ifgroup != -1 &&
  342. r->suppress_ifgroup != rule->suppress_ifgroup)
  343. continue;
  344. if (rule->suppress_prefixlen != -1 &&
  345. r->suppress_prefixlen != rule->suppress_prefixlen)
  346. continue;
  347. if (rule->mark_mask && r->mark_mask != rule->mark_mask)
  348. continue;
  349. if (rule->tun_id && r->tun_id != rule->tun_id)
  350. continue;
  351. if (r->fr_net != rule->fr_net)
  352. continue;
  353. if (rule->l3mdev && r->l3mdev != rule->l3mdev)
  354. continue;
  355. if (uid_range_set(&rule->uid_range) &&
  356. (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
  357. !uid_eq(r->uid_range.end, rule->uid_range.end)))
  358. continue;
  359. if (rule->ip_proto && r->ip_proto != rule->ip_proto)
  360. continue;
  361. if (rule->proto && r->proto != rule->proto)
  362. continue;
  363. if (fib_rule_port_range_set(&rule->sport_range) &&
  364. !fib_rule_port_range_compare(&r->sport_range,
  365. &rule->sport_range))
  366. continue;
  367. if (fib_rule_port_range_set(&rule->dport_range) &&
  368. !fib_rule_port_range_compare(&r->dport_range,
  369. &rule->dport_range))
  370. continue;
  371. if (!ops->compare(r, frh, tb))
  372. continue;
  373. return r;
  374. }
  375. return NULL;
  376. }
  377. #ifdef CONFIG_NET_L3_MASTER_DEV
  378. static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
  379. struct netlink_ext_ack *extack)
  380. {
  381. nlrule->l3mdev = nla_get_u8(nla);
  382. if (nlrule->l3mdev != 1) {
  383. NL_SET_ERR_MSG(extack, "Invalid l3mdev attribute");
  384. return -1;
  385. }
  386. return 0;
  387. }
  388. #else
  389. static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
  390. struct netlink_ext_ack *extack)
  391. {
  392. NL_SET_ERR_MSG(extack, "l3mdev support is not enabled in kernel");
  393. return -1;
  394. }
  395. #endif
  396. static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
  397. struct netlink_ext_ack *extack,
  398. struct fib_rules_ops *ops,
  399. struct nlattr *tb[],
  400. struct fib_rule **rule,
  401. bool *user_priority)
  402. {
  403. struct net *net = sock_net(skb->sk);
  404. struct fib_rule_hdr *frh = nlmsg_data(nlh);
  405. struct fib_rule *nlrule = NULL;
  406. int err = -EINVAL;
  407. if (frh->src_len)
  408. if (!tb[FRA_SRC] ||
  409. frh->src_len > (ops->addr_size * 8) ||
  410. nla_len(tb[FRA_SRC]) != ops->addr_size) {
  411. NL_SET_ERR_MSG(extack, "Invalid source address");
  412. goto errout;
  413. }
  414. if (frh->dst_len)
  415. if (!tb[FRA_DST] ||
  416. frh->dst_len > (ops->addr_size * 8) ||
  417. nla_len(tb[FRA_DST]) != ops->addr_size) {
  418. NL_SET_ERR_MSG(extack, "Invalid dst address");
  419. goto errout;
  420. }
  421. nlrule = kzalloc(ops->rule_size, GFP_KERNEL);
  422. if (!nlrule) {
  423. err = -ENOMEM;
  424. goto errout;
  425. }
  426. refcount_set(&nlrule->refcnt, 1);
  427. nlrule->fr_net = net;
  428. if (tb[FRA_PRIORITY]) {
  429. nlrule->pref = nla_get_u32(tb[FRA_PRIORITY]);
  430. *user_priority = true;
  431. } else {
  432. nlrule->pref = fib_default_rule_pref(ops);
  433. }
  434. nlrule->proto = tb[FRA_PROTOCOL] ?
  435. nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;
  436. if (tb[FRA_IIFNAME]) {
  437. struct net_device *dev;
  438. nlrule->iifindex = -1;
  439. nla_strlcpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
  440. dev = __dev_get_by_name(net, nlrule->iifname);
  441. if (dev)
  442. nlrule->iifindex = dev->ifindex;
  443. }
  444. if (tb[FRA_OIFNAME]) {
  445. struct net_device *dev;
  446. nlrule->oifindex = -1;
  447. nla_strlcpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
  448. dev = __dev_get_by_name(net, nlrule->oifname);
  449. if (dev)
  450. nlrule->oifindex = dev->ifindex;
  451. }
  452. if (tb[FRA_FWMARK]) {
  453. nlrule->mark = nla_get_u32(tb[FRA_FWMARK]);
  454. if (nlrule->mark)
  455. /* compatibility: if the mark value is non-zero all bits
  456. * are compared unless a mask is explicitly specified.
  457. */
  458. nlrule->mark_mask = 0xFFFFFFFF;
  459. }
  460. if (tb[FRA_FWMASK])
  461. nlrule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
  462. if (tb[FRA_TUN_ID])
  463. nlrule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
  464. err = -EINVAL;
  465. if (tb[FRA_L3MDEV] &&
  466. fib_nl2rule_l3mdev(tb[FRA_L3MDEV], nlrule, extack) < 0)
  467. goto errout_free;
  468. nlrule->action = frh->action;
  469. nlrule->flags = frh->flags;
  470. nlrule->table = frh_get_table(frh, tb);
  471. if (tb[FRA_SUPPRESS_PREFIXLEN])
  472. nlrule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
  473. else
  474. nlrule->suppress_prefixlen = -1;
  475. if (tb[FRA_SUPPRESS_IFGROUP])
  476. nlrule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
  477. else
  478. nlrule->suppress_ifgroup = -1;
  479. if (tb[FRA_GOTO]) {
  480. if (nlrule->action != FR_ACT_GOTO) {
  481. NL_SET_ERR_MSG(extack, "Unexpected goto");
  482. goto errout_free;
  483. }
  484. nlrule->target = nla_get_u32(tb[FRA_GOTO]);
  485. /* Backward jumps are prohibited to avoid endless loops */
  486. if (nlrule->target <= nlrule->pref) {
  487. NL_SET_ERR_MSG(extack, "Backward goto not supported");
  488. goto errout_free;
  489. }
  490. } else if (nlrule->action == FR_ACT_GOTO) {
  491. NL_SET_ERR_MSG(extack, "Missing goto target for action goto");
  492. goto errout_free;
  493. }
  494. if (nlrule->l3mdev && nlrule->table) {
  495. NL_SET_ERR_MSG(extack, "l3mdev and table are mutually exclusive");
  496. goto errout_free;
  497. }
  498. if (tb[FRA_UID_RANGE]) {
  499. if (current_user_ns() != net->user_ns) {
  500. err = -EPERM;
  501. NL_SET_ERR_MSG(extack, "No permission to set uid");
  502. goto errout_free;
  503. }
  504. nlrule->uid_range = nla_get_kuid_range(tb);
  505. if (!uid_range_set(&nlrule->uid_range) ||
  506. !uid_lte(nlrule->uid_range.start, nlrule->uid_range.end)) {
  507. NL_SET_ERR_MSG(extack, "Invalid uid range");
  508. goto errout_free;
  509. }
  510. } else {
  511. nlrule->uid_range = fib_kuid_range_unset;
  512. }
  513. if (tb[FRA_IP_PROTO])
  514. nlrule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
  515. if (tb[FRA_SPORT_RANGE]) {
  516. err = nla_get_port_range(tb[FRA_SPORT_RANGE],
  517. &nlrule->sport_range);
  518. if (err) {
  519. NL_SET_ERR_MSG(extack, "Invalid sport range");
  520. goto errout_free;
  521. }
  522. }
  523. if (tb[FRA_DPORT_RANGE]) {
  524. err = nla_get_port_range(tb[FRA_DPORT_RANGE],
  525. &nlrule->dport_range);
  526. if (err) {
  527. NL_SET_ERR_MSG(extack, "Invalid dport range");
  528. goto errout_free;
  529. }
  530. }
  531. *rule = nlrule;
  532. return 0;
  533. errout_free:
  534. kfree(nlrule);
  535. errout:
  536. return err;
  537. }
  538. static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
  539. struct nlattr **tb, struct fib_rule *rule)
  540. {
  541. struct fib_rule *r;
  542. list_for_each_entry(r, &ops->rules_list, list) {
  543. if (r->action != rule->action)
  544. continue;
  545. if (r->table != rule->table)
  546. continue;
  547. if (r->pref != rule->pref)
  548. continue;
  549. if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
  550. continue;
  551. if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
  552. continue;
  553. if (r->mark != rule->mark)
  554. continue;
  555. if (r->suppress_ifgroup != rule->suppress_ifgroup)
  556. continue;
  557. if (r->suppress_prefixlen != rule->suppress_prefixlen)
  558. continue;
  559. if (r->mark_mask != rule->mark_mask)
  560. continue;
  561. if (r->tun_id != rule->tun_id)
  562. continue;
  563. if (r->fr_net != rule->fr_net)
  564. continue;
  565. if (r->l3mdev != rule->l3mdev)
  566. continue;
  567. if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
  568. !uid_eq(r->uid_range.end, rule->uid_range.end))
  569. continue;
  570. if (r->ip_proto != rule->ip_proto)
  571. continue;
  572. if (r->proto != rule->proto)
  573. continue;
  574. if (!fib_rule_port_range_compare(&r->sport_range,
  575. &rule->sport_range))
  576. continue;
  577. if (!fib_rule_port_range_compare(&r->dport_range,
  578. &rule->dport_range))
  579. continue;
  580. if (!ops->compare(r, frh, tb))
  581. continue;
  582. return 1;
  583. }
  584. return 0;
  585. }
  586. int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
  587. struct netlink_ext_ack *extack)
  588. {
  589. struct net *net = sock_net(skb->sk);
  590. struct fib_rule_hdr *frh = nlmsg_data(nlh);
  591. struct fib_rules_ops *ops = NULL;
  592. struct fib_rule *rule = NULL, *r, *last = NULL;
  593. struct nlattr *tb[FRA_MAX + 1];
  594. int err = -EINVAL, unresolved = 0;
  595. bool user_priority = false;
  596. if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
  597. NL_SET_ERR_MSG(extack, "Invalid msg length");
  598. goto errout;
  599. }
  600. ops = lookup_rules_ops(net, frh->family);
  601. if (!ops) {
  602. err = -EAFNOSUPPORT;
  603. NL_SET_ERR_MSG(extack, "Rule family not supported");
  604. goto errout;
  605. }
  606. err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX,
  607. ops->policy, extack);
  608. if (err < 0) {
  609. NL_SET_ERR_MSG(extack, "Error parsing msg");
  610. goto errout;
  611. }
  612. err = fib_nl2rule(skb, nlh, extack, ops, tb, &rule, &user_priority);
  613. if (err)
  614. goto errout;
  615. if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
  616. rule_exists(ops, frh, tb, rule)) {
  617. err = -EEXIST;
  618. goto errout_free;
  619. }
  620. err = ops->configure(rule, skb, frh, tb, extack);
  621. if (err < 0)
  622. goto errout_free;
  623. err = call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops,
  624. extack);
  625. if (err < 0)
  626. goto errout_free;
  627. list_for_each_entry(r, &ops->rules_list, list) {
  628. if (r->pref == rule->target) {
  629. RCU_INIT_POINTER(rule->ctarget, r);
  630. break;
  631. }
  632. }
  633. if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
  634. unresolved = 1;
  635. list_for_each_entry(r, &ops->rules_list, list) {
  636. if (r->pref > rule->pref)
  637. break;
  638. last = r;
  639. }
  640. if (last)
  641. list_add_rcu(&rule->list, &last->list);
  642. else
  643. list_add_rcu(&rule->list, &ops->rules_list);
  644. if (ops->unresolved_rules) {
  645. /*
  646. * There are unresolved goto rules in the list, check if
  647. * any of them are pointing to this new rule.
  648. */
  649. list_for_each_entry(r, &ops->rules_list, list) {
  650. if (r->action == FR_ACT_GOTO &&
  651. r->target == rule->pref &&
  652. rtnl_dereference(r->ctarget) == NULL) {
  653. rcu_assign_pointer(r->ctarget, rule);
  654. if (--ops->unresolved_rules == 0)
  655. break;
  656. }
  657. }
  658. }
  659. if (rule->action == FR_ACT_GOTO)
  660. ops->nr_goto_rules++;
  661. if (unresolved)
  662. ops->unresolved_rules++;
  663. if (rule->tun_id)
  664. ip_tunnel_need_metadata();
  665. notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
  666. flush_route_cache(ops);
  667. rules_ops_put(ops);
  668. return 0;
  669. errout_free:
  670. kfree(rule);
  671. errout:
  672. rules_ops_put(ops);
  673. return err;
  674. }
  675. EXPORT_SYMBOL_GPL(fib_nl_newrule);
  676. int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
  677. struct netlink_ext_ack *extack)
  678. {
  679. struct net *net = sock_net(skb->sk);
  680. struct fib_rule_hdr *frh = nlmsg_data(nlh);
  681. struct fib_rules_ops *ops = NULL;
  682. struct fib_rule *rule = NULL, *r, *nlrule = NULL;
  683. struct nlattr *tb[FRA_MAX+1];
  684. int err = -EINVAL;
  685. bool user_priority = false;
  686. if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
  687. NL_SET_ERR_MSG(extack, "Invalid msg length");
  688. goto errout;
  689. }
  690. ops = lookup_rules_ops(net, frh->family);
  691. if (ops == NULL) {
  692. err = -EAFNOSUPPORT;
  693. NL_SET_ERR_MSG(extack, "Rule family not supported");
  694. goto errout;
  695. }
  696. err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX,
  697. ops->policy, extack);
  698. if (err < 0) {
  699. NL_SET_ERR_MSG(extack, "Error parsing msg");
  700. goto errout;
  701. }
  702. err = fib_nl2rule(skb, nlh, extack, ops, tb, &nlrule, &user_priority);
  703. if (err)
  704. goto errout;
  705. rule = rule_find(ops, frh, tb, nlrule, user_priority);
  706. if (!rule) {
  707. err = -ENOENT;
  708. goto errout;
  709. }
  710. if (rule->flags & FIB_RULE_PERMANENT) {
  711. err = -EPERM;
  712. goto errout;
  713. }
  714. if (ops->delete) {
  715. err = ops->delete(rule);
  716. if (err)
  717. goto errout;
  718. }
  719. if (rule->tun_id)
  720. ip_tunnel_unneed_metadata();
  721. list_del_rcu(&rule->list);
  722. if (rule->action == FR_ACT_GOTO) {
  723. ops->nr_goto_rules--;
  724. if (rtnl_dereference(rule->ctarget) == NULL)
  725. ops->unresolved_rules--;
  726. }
  727. /*
  728. * Check if this rule is a target to any of them. If so,
  729. * adjust to the next one with the same preference or
  730. * disable them. As this operation is eventually very
  731. * expensive, it is only performed if goto rules, except
  732. * current if it is goto rule, have actually been added.
  733. */
  734. if (ops->nr_goto_rules > 0) {
  735. struct fib_rule *n;
  736. n = list_next_entry(rule, list);
  737. if (&n->list == &ops->rules_list || n->pref != rule->pref)
  738. n = NULL;
  739. list_for_each_entry(r, &ops->rules_list, list) {
  740. if (rtnl_dereference(r->ctarget) != rule)
  741. continue;
  742. rcu_assign_pointer(r->ctarget, n);
  743. if (!n)
  744. ops->unresolved_rules++;
  745. }
  746. }
  747. call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
  748. NULL);
  749. notify_rule_change(RTM_DELRULE, rule, ops, nlh,
  750. NETLINK_CB(skb).portid);
  751. fib_rule_put(rule);
  752. flush_route_cache(ops);
  753. rules_ops_put(ops);
  754. kfree(nlrule);
  755. return 0;
  756. errout:
  757. kfree(nlrule);
  758. rules_ops_put(ops);
  759. return err;
  760. }
  761. EXPORT_SYMBOL_GPL(fib_nl_delrule);
  762. static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
  763. struct fib_rule *rule)
  764. {
  765. size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
  766. + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */
  767. + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
  768. + nla_total_size(4) /* FRA_PRIORITY */
  769. + nla_total_size(4) /* FRA_TABLE */
  770. + nla_total_size(4) /* FRA_SUPPRESS_PREFIXLEN */
  771. + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
  772. + nla_total_size(4) /* FRA_FWMARK */
  773. + nla_total_size(4) /* FRA_FWMASK */
  774. + nla_total_size_64bit(8) /* FRA_TUN_ID */
  775. + nla_total_size(sizeof(struct fib_kuid_range))
  776. + nla_total_size(1) /* FRA_PROTOCOL */
  777. + nla_total_size(1) /* FRA_IP_PROTO */
  778. + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
  779. + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
  780. if (ops->nlmsg_payload)
  781. payload += ops->nlmsg_payload(rule);
  782. return payload;
  783. }
  784. static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
  785. u32 pid, u32 seq, int type, int flags,
  786. struct fib_rules_ops *ops)
  787. {
  788. struct nlmsghdr *nlh;
  789. struct fib_rule_hdr *frh;
  790. nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
  791. if (nlh == NULL)
  792. return -EMSGSIZE;
  793. frh = nlmsg_data(nlh);
  794. frh->family = ops->family;
  795. frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT;
  796. if (nla_put_u32(skb, FRA_TABLE, rule->table))
  797. goto nla_put_failure;
  798. if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
  799. goto nla_put_failure;
  800. frh->res1 = 0;
  801. frh->res2 = 0;
  802. frh->action = rule->action;
  803. frh->flags = rule->flags;
  804. if (nla_put_u8(skb, FRA_PROTOCOL, rule->proto))
  805. goto nla_put_failure;
  806. if (rule->action == FR_ACT_GOTO &&
  807. rcu_access_pointer(rule->ctarget) == NULL)
  808. frh->flags |= FIB_RULE_UNRESOLVED;
  809. if (rule->iifname[0]) {
  810. if (nla_put_string(skb, FRA_IIFNAME, rule->iifname))
  811. goto nla_put_failure;
  812. if (rule->iifindex == -1)
  813. frh->flags |= FIB_RULE_IIF_DETACHED;
  814. }
  815. if (rule->oifname[0]) {
  816. if (nla_put_string(skb, FRA_OIFNAME, rule->oifname))
  817. goto nla_put_failure;
  818. if (rule->oifindex == -1)
  819. frh->flags |= FIB_RULE_OIF_DETACHED;
  820. }
  821. if ((rule->pref &&
  822. nla_put_u32(skb, FRA_PRIORITY, rule->pref)) ||
  823. (rule->mark &&
  824. nla_put_u32(skb, FRA_FWMARK, rule->mark)) ||
  825. ((rule->mark_mask || rule->mark) &&
  826. nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) ||
  827. (rule->target &&
  828. nla_put_u32(skb, FRA_GOTO, rule->target)) ||
  829. (rule->tun_id &&
  830. nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
  831. (rule->l3mdev &&
  832. nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
  833. (uid_range_set(&rule->uid_range) &&
  834. nla_put_uid_range(skb, &rule->uid_range)) ||
  835. (fib_rule_port_range_set(&rule->sport_range) &&
  836. nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
  837. (fib_rule_port_range_set(&rule->dport_range) &&
  838. nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
  839. (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
  840. goto nla_put_failure;
  841. if (rule->suppress_ifgroup != -1) {
  842. if (nla_put_u32(skb, FRA_SUPPRESS_IFGROUP, rule->suppress_ifgroup))
  843. goto nla_put_failure;
  844. }
  845. if (ops->fill(rule, skb, frh) < 0)
  846. goto nla_put_failure;
  847. nlmsg_end(skb, nlh);
  848. return 0;
  849. nla_put_failure:
  850. nlmsg_cancel(skb, nlh);
  851. return -EMSGSIZE;
  852. }
  853. static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
  854. struct fib_rules_ops *ops)
  855. {
  856. int idx = 0;
  857. struct fib_rule *rule;
  858. int err = 0;
  859. rcu_read_lock();
  860. list_for_each_entry_rcu(rule, &ops->rules_list, list) {
  861. if (idx < cb->args[1])
  862. goto skip;
  863. err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
  864. cb->nlh->nlmsg_seq, RTM_NEWRULE,
  865. NLM_F_MULTI, ops);
  866. if (err)
  867. break;
  868. skip:
  869. idx++;
  870. }
  871. rcu_read_unlock();
  872. cb->args[1] = idx;
  873. rules_ops_put(ops);
  874. return err;
  875. }
  876. static int fib_valid_dumprule_req(const struct nlmsghdr *nlh,
  877. struct netlink_ext_ack *extack)
  878. {
  879. struct fib_rule_hdr *frh;
  880. if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
  881. NL_SET_ERR_MSG(extack, "Invalid header for fib rule dump request");
  882. return -EINVAL;
  883. }
  884. frh = nlmsg_data(nlh);
  885. if (frh->dst_len || frh->src_len || frh->tos || frh->table ||
  886. frh->res1 || frh->res2 || frh->action || frh->flags) {
  887. NL_SET_ERR_MSG(extack,
  888. "Invalid values in header for fib rule dump request");
  889. return -EINVAL;
  890. }
  891. if (nlmsg_attrlen(nlh, sizeof(*frh))) {
  892. NL_SET_ERR_MSG(extack, "Invalid data after header in fib rule dump request");
  893. return -EINVAL;
  894. }
  895. return 0;
  896. }
  897. static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
  898. {
  899. const struct nlmsghdr *nlh = cb->nlh;
  900. struct net *net = sock_net(skb->sk);
  901. struct fib_rules_ops *ops;
  902. int idx = 0, family;
  903. if (cb->strict_check) {
  904. int err = fib_valid_dumprule_req(nlh, cb->extack);
  905. if (err < 0)
  906. return err;
  907. }
  908. family = rtnl_msg_family(nlh);
  909. if (family != AF_UNSPEC) {
  910. /* Protocol specific dump request */
  911. ops = lookup_rules_ops(net, family);
  912. if (ops == NULL)
  913. return -EAFNOSUPPORT;
  914. dump_rules(skb, cb, ops);
  915. return skb->len;
  916. }
  917. rcu_read_lock();
  918. list_for_each_entry_rcu(ops, &net->rules_ops, list) {
  919. if (idx < cb->args[0] || !try_module_get(ops->owner))
  920. goto skip;
  921. if (dump_rules(skb, cb, ops) < 0)
  922. break;
  923. cb->args[1] = 0;
  924. skip:
  925. idx++;
  926. }
  927. rcu_read_unlock();
  928. cb->args[0] = idx;
  929. return skb->len;
  930. }
  931. static void notify_rule_change(int event, struct fib_rule *rule,
  932. struct fib_rules_ops *ops, struct nlmsghdr *nlh,
  933. u32 pid)
  934. {
  935. struct net *net;
  936. struct sk_buff *skb;
  937. int err = -ENOBUFS;
  938. net = ops->fro_net;
  939. skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
  940. if (skb == NULL)
  941. goto errout;
  942. err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
  943. if (err < 0) {
  944. /* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */
  945. WARN_ON(err == -EMSGSIZE);
  946. kfree_skb(skb);
  947. goto errout;
  948. }
  949. rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
  950. return;
  951. errout:
  952. if (err < 0)
  953. rtnl_set_sk_err(net, ops->nlgroup, err);
  954. }
  955. static void attach_rules(struct list_head *rules, struct net_device *dev)
  956. {
  957. struct fib_rule *rule;
  958. list_for_each_entry(rule, rules, list) {
  959. if (rule->iifindex == -1 &&
  960. strcmp(dev->name, rule->iifname) == 0)
  961. rule->iifindex = dev->ifindex;
  962. if (rule->oifindex == -1 &&
  963. strcmp(dev->name, rule->oifname) == 0)
  964. rule->oifindex = dev->ifindex;
  965. }
  966. }
  967. static void detach_rules(struct list_head *rules, struct net_device *dev)
  968. {
  969. struct fib_rule *rule;
  970. list_for_each_entry(rule, rules, list) {
  971. if (rule->iifindex == dev->ifindex)
  972. rule->iifindex = -1;
  973. if (rule->oifindex == dev->ifindex)
  974. rule->oifindex = -1;
  975. }
  976. }
  977. static int fib_rules_event(struct notifier_block *this, unsigned long event,
  978. void *ptr)
  979. {
  980. struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  981. struct net *net = dev_net(dev);
  982. struct fib_rules_ops *ops;
  983. ASSERT_RTNL();
  984. switch (event) {
  985. case NETDEV_REGISTER:
  986. list_for_each_entry(ops, &net->rules_ops, list)
  987. attach_rules(&ops->rules_list, dev);
  988. break;
  989. case NETDEV_CHANGENAME:
  990. list_for_each_entry(ops, &net->rules_ops, list) {
  991. detach_rules(&ops->rules_list, dev);
  992. attach_rules(&ops->rules_list, dev);
  993. }
  994. break;
  995. case NETDEV_UNREGISTER:
  996. list_for_each_entry(ops, &net->rules_ops, list)
  997. detach_rules(&ops->rules_list, dev);
  998. break;
  999. }
  1000. return NOTIFY_DONE;
  1001. }
  1002. static struct notifier_block fib_rules_notifier = {
  1003. .notifier_call = fib_rules_event,
  1004. };
  1005. static int __net_init fib_rules_net_init(struct net *net)
  1006. {
  1007. INIT_LIST_HEAD(&net->rules_ops);
  1008. spin_lock_init(&net->rules_mod_lock);
  1009. return 0;
  1010. }
  1011. static void __net_exit fib_rules_net_exit(struct net *net)
  1012. {
  1013. WARN_ON_ONCE(!list_empty(&net->rules_ops));
  1014. }
  1015. static struct pernet_operations fib_rules_net_ops = {
  1016. .init = fib_rules_net_init,
  1017. .exit = fib_rules_net_exit,
  1018. };
  1019. static int __init fib_rules_init(void)
  1020. {
  1021. int err;
  1022. rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0);
  1023. rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0);
  1024. rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, 0);
  1025. err = register_pernet_subsys(&fib_rules_net_ops);
  1026. if (err < 0)
  1027. goto fail;
  1028. err = register_netdevice_notifier(&fib_rules_notifier);
  1029. if (err < 0)
  1030. goto fail_unregister;
  1031. return 0;
  1032. fail_unregister:
  1033. unregister_pernet_subsys(&fib_rules_net_ops);
  1034. fail:
  1035. rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
  1036. rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
  1037. rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
  1038. return err;
  1039. }
  1040. subsys_initcall(fib_rules_init);