/net/netfilter/ipvs/ip_vs_nfct.c

http://github.com/mirrors/linux · C · 280 lines · 181 code · 31 blank · 68 comment · 32 complexity · e392e29425ab4ffc3ebedb211e0ca238 MD5 · raw file

  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
  4. *
  5. * Portions Copyright (C) 2001-2002
  6. * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
  7. *
  8. * Portions Copyright (C) 2003-2010
  9. * Julian Anastasov
  10. *
  11. * Authors:
  12. * Ben North <ben@redfrontdoor.org>
  13. * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
  14. * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match
  15. *
  16. * Current status:
  17. *
  18. * - provide conntrack confirmation for new and related connections, by
  19. * this way we can see their proper conntrack state in all hooks
  20. * - support for all forwarding methods, not only NAT
  21. * - FTP support (NAT), ability to support other NAT apps with expectations
  22. * - to correctly create expectations for related NAT connections the proper
  23. * NF conntrack support must be already installed, eg. ip_vs_ftp requires
  24. * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables
  25. * NAT rules are needed)
  26. * - alter reply for NAT when forwarding packet in original direction:
  27. * conntrack from client in NEW or RELATED (Passive FTP DATA) state or
  28. * when RELATED conntrack is created from real server (Active FTP DATA)
  29. * - if iptables_nat is not loaded the Passive FTP will not work (the
  30. * PASV response can not be NAT-ed) but Active FTP should work
  31. */
  32. #define KMSG_COMPONENT "IPVS"
  33. #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  34. #include <linux/module.h>
  35. #include <linux/types.h>
  36. #include <linux/kernel.h>
  37. #include <linux/errno.h>
  38. #include <linux/compiler.h>
  39. #include <linux/vmalloc.h>
  40. #include <linux/skbuff.h>
  41. #include <net/ip.h>
  42. #include <linux/netfilter.h>
  43. #include <linux/netfilter_ipv4.h>
  44. #include <net/ip_vs.h>
  45. #include <net/netfilter/nf_conntrack_core.h>
  46. #include <net/netfilter/nf_conntrack_expect.h>
  47. #include <net/netfilter/nf_conntrack_seqadj.h>
  48. #include <net/netfilter/nf_conntrack_helper.h>
  49. #include <net/netfilter/nf_conntrack_zones.h>
  50. #define FMT_TUPLE "%s:%u->%s:%u/%u"
  51. #define ARG_TUPLE(T) IP_VS_DBG_ADDR((T)->src.l3num, &(T)->src.u3), \
  52. ntohs((T)->src.u.all), \
  53. IP_VS_DBG_ADDR((T)->src.l3num, &(T)->dst.u3), \
  54. ntohs((T)->dst.u.all), \
  55. (T)->dst.protonum
  56. #define FMT_CONN "%s:%u->%s:%u->%s:%u/%u:%u"
  57. #define ARG_CONN(C) IP_VS_DBG_ADDR((C)->af, &((C)->caddr)), \
  58. ntohs((C)->cport), \
  59. IP_VS_DBG_ADDR((C)->af, &((C)->vaddr)), \
  60. ntohs((C)->vport), \
  61. IP_VS_DBG_ADDR((C)->daf, &((C)->daddr)), \
  62. ntohs((C)->dport), \
  63. (C)->protocol, (C)->state
  64. void
  65. ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
  66. {
  67. enum ip_conntrack_info ctinfo;
  68. struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
  69. struct nf_conntrack_tuple new_tuple;
  70. if (ct == NULL || nf_ct_is_confirmed(ct) ||
  71. nf_ct_is_dying(ct))
  72. return;
  73. /* Never alter conntrack for non-NAT conns */
  74. if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
  75. return;
  76. /* Never alter conntrack for OPS conns (no reply is expected) */
  77. if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
  78. return;
  79. /* Alter reply only in original direction */
  80. if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
  81. return;
  82. /* Applications may adjust TCP seqs */
  83. if (cp->app && nf_ct_protonum(ct) == IPPROTO_TCP &&
  84. !nfct_seqadj(ct) && !nfct_seqadj_ext_add(ct))
  85. return;
  86. /*
  87. * The connection is not yet in the hashtable, so we update it.
  88. * CIP->VIP will remain the same, so leave the tuple in
  89. * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
  90. * real-server we will see RIP->DIP.
  91. */
  92. new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
  93. /*
  94. * This will also take care of UDP and other protocols.
  95. */
  96. if (outin) {
  97. new_tuple.src.u3 = cp->daddr;
  98. if (new_tuple.dst.protonum != IPPROTO_ICMP &&
  99. new_tuple.dst.protonum != IPPROTO_ICMPV6)
  100. new_tuple.src.u.tcp.port = cp->dport;
  101. } else {
  102. new_tuple.dst.u3 = cp->vaddr;
  103. if (new_tuple.dst.protonum != IPPROTO_ICMP &&
  104. new_tuple.dst.protonum != IPPROTO_ICMPV6)
  105. new_tuple.dst.u.tcp.port = cp->vport;
  106. }
  107. IP_VS_DBG_BUF(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
  108. "ctinfo=%d, old reply=" FMT_TUPLE "\n",
  109. __func__, ct, ct->status, ctinfo,
  110. ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple));
  111. IP_VS_DBG_BUF(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
  112. "ctinfo=%d, new reply=" FMT_TUPLE "\n",
  113. __func__, ct, ct->status, ctinfo,
  114. ARG_TUPLE(&new_tuple));
  115. nf_conntrack_alter_reply(ct, &new_tuple);
  116. IP_VS_DBG_BUF(7, "%s: Updated conntrack ct=%p for cp=" FMT_CONN "\n",
  117. __func__, ct, ARG_CONN(cp));
  118. }
  119. int ip_vs_confirm_conntrack(struct sk_buff *skb)
  120. {
  121. return nf_conntrack_confirm(skb);
  122. }
  123. /*
  124. * Called from init_conntrack() as expectfn handler.
  125. */
  126. static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
  127. struct nf_conntrack_expect *exp)
  128. {
  129. struct nf_conntrack_tuple *orig, new_reply;
  130. struct ip_vs_conn *cp;
  131. struct ip_vs_conn_param p;
  132. struct net *net = nf_ct_net(ct);
  133. /*
  134. * We assume that no NF locks are held before this callback.
  135. * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
  136. * expectations even if they use wildcard values, now we provide the
  137. * actual values from the newly created original conntrack direction.
  138. * The conntrack is confirmed when packet reaches IPVS hooks.
  139. */
  140. /* RS->CLIENT */
  141. orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
  142. ip_vs_conn_fill_param(net_ipvs(net), exp->tuple.src.l3num, orig->dst.protonum,
  143. &orig->src.u3, orig->src.u.tcp.port,
  144. &orig->dst.u3, orig->dst.u.tcp.port, &p);
  145. cp = ip_vs_conn_out_get(&p);
  146. if (cp) {
  147. /* Change reply CLIENT->RS to CLIENT->VS */
  148. IP_VS_DBG_BUF(7, "%s: for ct=%p, status=0x%lX found inout cp="
  149. FMT_CONN "\n",
  150. __func__, ct, ct->status, ARG_CONN(cp));
  151. new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
  152. IP_VS_DBG_BUF(7, "%s: ct=%p before alter: reply tuple="
  153. FMT_TUPLE "\n",
  154. __func__, ct, ARG_TUPLE(&new_reply));
  155. new_reply.dst.u3 = cp->vaddr;
  156. new_reply.dst.u.tcp.port = cp->vport;
  157. goto alter;
  158. }
  159. /* CLIENT->VS */
  160. cp = ip_vs_conn_in_get(&p);
  161. if (cp) {
  162. /* Change reply VS->CLIENT to RS->CLIENT */
  163. IP_VS_DBG_BUF(7, "%s: for ct=%p, status=0x%lX found outin cp="
  164. FMT_CONN "\n",
  165. __func__, ct, ct->status, ARG_CONN(cp));
  166. new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
  167. IP_VS_DBG_BUF(7, "%s: ct=%p before alter: reply tuple="
  168. FMT_TUPLE "\n",
  169. __func__, ct, ARG_TUPLE(&new_reply));
  170. new_reply.src.u3 = cp->daddr;
  171. new_reply.src.u.tcp.port = cp->dport;
  172. goto alter;
  173. }
  174. IP_VS_DBG_BUF(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
  175. " - unknown expect\n",
  176. __func__, ct, ct->status, ARG_TUPLE(orig));
  177. return;
  178. alter:
  179. /* Never alter conntrack for non-NAT conns */
  180. if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
  181. nf_conntrack_alter_reply(ct, &new_reply);
  182. ip_vs_conn_put(cp);
  183. return;
  184. }
  185. /*
  186. * Create NF conntrack expectation with wildcard (optional) source port.
  187. * Then the default callback function will alter the reply and will confirm
  188. * the conntrack entry when the first packet comes.
  189. * Use port 0 to expect connection from any port.
  190. */
  191. void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
  192. struct ip_vs_conn *cp, u_int8_t proto,
  193. const __be16 port, int from_rs)
  194. {
  195. struct nf_conntrack_expect *exp;
  196. if (ct == NULL)
  197. return;
  198. exp = nf_ct_expect_alloc(ct);
  199. if (!exp)
  200. return;
  201. nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
  202. from_rs ? &cp->daddr : &cp->caddr,
  203. from_rs ? &cp->caddr : &cp->vaddr,
  204. proto, port ? &port : NULL,
  205. from_rs ? &cp->cport : &cp->vport);
  206. exp->expectfn = ip_vs_nfct_expect_callback;
  207. IP_VS_DBG_BUF(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
  208. __func__, ct, ARG_TUPLE(&exp->tuple));
  209. nf_ct_expect_related(exp, 0);
  210. nf_ct_expect_put(exp);
  211. }
  212. EXPORT_SYMBOL(ip_vs_nfct_expect_related);
  213. /*
  214. * Our connection was terminated, try to drop the conntrack immediately
  215. */
  216. void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
  217. {
  218. struct nf_conntrack_tuple_hash *h;
  219. struct nf_conn *ct;
  220. struct nf_conntrack_tuple tuple;
  221. if (!cp->cport)
  222. return;
  223. tuple = (struct nf_conntrack_tuple) {
  224. .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
  225. tuple.src.u3 = cp->caddr;
  226. tuple.src.u.all = cp->cport;
  227. tuple.src.l3num = cp->af;
  228. tuple.dst.u3 = cp->vaddr;
  229. tuple.dst.u.all = cp->vport;
  230. IP_VS_DBG_BUF(7, "%s: dropping conntrack for conn " FMT_CONN "\n",
  231. __func__, ARG_CONN(cp));
  232. h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
  233. if (h) {
  234. ct = nf_ct_tuplehash_to_ctrack(h);
  235. if (nf_ct_kill(ct)) {
  236. IP_VS_DBG_BUF(7, "%s: ct=%p deleted for tuple="
  237. FMT_TUPLE "\n",
  238. __func__, ct, ARG_TUPLE(&tuple));
  239. } else {
  240. IP_VS_DBG_BUF(7, "%s: ct=%p, no conntrack for tuple="
  241. FMT_TUPLE "\n",
  242. __func__, ct, ARG_TUPLE(&tuple));
  243. }
  244. nf_ct_put(ct);
  245. } else {
  246. IP_VS_DBG_BUF(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
  247. __func__, ARG_TUPLE(&tuple));
  248. }
  249. }