PageRenderTime 50ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/net/ipv4/ipvs/ip_vs_proto_udp.c

https://bitbucket.org/abioy/linux
C | 421 lines | 302 code | 67 blank | 52 comment | 53 complexity | 5c53a5cce7ea4ebf5f41fa01e667a88a MD5 | raw file
Possible License(s): CC-BY-SA-3.0, GPL-2.0, LGPL-2.0, AGPL-1.0
  1. /*
  2. * ip_vs_proto_udp.c: UDP load balancing support for IPVS
  3. *
  4. * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
  5. *
  6. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  7. * Julian Anastasov <ja@ssi.bg>
  8. *
  9. * This program is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU General Public License
  11. * as published by the Free Software Foundation; either version
  12. * 2 of the License, or (at your option) any later version.
  13. *
  14. * Changes:
  15. *
  16. */
  17. #include <linux/kernel.h>
  18. #include <linux/netfilter_ipv4.h>
  19. #include <net/ip_vs.h>
  20. static struct ip_vs_conn *
  21. udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  22. const struct iphdr *iph, unsigned int proto_off, int inverse)
  23. {
  24. struct ip_vs_conn *cp;
  25. __u16 ports[2];
  26. if (skb_copy_bits(skb, proto_off, ports, sizeof(ports)) < 0)
  27. return NULL;
  28. if (likely(!inverse)) {
  29. cp = ip_vs_conn_in_get(iph->protocol,
  30. iph->saddr, ports[0],
  31. iph->daddr, ports[1]);
  32. } else {
  33. cp = ip_vs_conn_in_get(iph->protocol,
  34. iph->daddr, ports[1],
  35. iph->saddr, ports[0]);
  36. }
  37. return cp;
  38. }
  39. static struct ip_vs_conn *
  40. udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  41. const struct iphdr *iph, unsigned int proto_off, int inverse)
  42. {
  43. struct ip_vs_conn *cp;
  44. __u16 ports[2];
  45. if (skb_copy_bits(skb, skb->nh.iph->ihl*4, ports, sizeof(ports)) < 0)
  46. return NULL;
  47. if (likely(!inverse)) {
  48. cp = ip_vs_conn_out_get(iph->protocol,
  49. iph->saddr, ports[0],
  50. iph->daddr, ports[1]);
  51. } else {
  52. cp = ip_vs_conn_out_get(iph->protocol,
  53. iph->daddr, ports[1],
  54. iph->saddr, ports[0]);
  55. }
  56. return cp;
  57. }
  58. static int
  59. udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
  60. int *verdict, struct ip_vs_conn **cpp)
  61. {
  62. struct ip_vs_service *svc;
  63. struct udphdr udph;
  64. if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &udph, sizeof(udph)) < 0) {
  65. *verdict = NF_DROP;
  66. return 0;
  67. }
  68. if ((svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
  69. skb->nh.iph->daddr, udph.dest))) {
  70. if (ip_vs_todrop()) {
  71. /*
  72. * It seems that we are very loaded.
  73. * We have to drop this packet :(
  74. */
  75. ip_vs_service_put(svc);
  76. *verdict = NF_DROP;
  77. return 0;
  78. }
  79. /*
  80. * Let the virtual server select a real server for the
  81. * incoming connection, and create a connection entry.
  82. */
  83. *cpp = ip_vs_schedule(svc, skb);
  84. if (!*cpp) {
  85. *verdict = ip_vs_leave(svc, skb, pp);
  86. return 0;
  87. }
  88. ip_vs_service_put(svc);
  89. }
  90. return 1;
  91. }
  92. static inline void
  93. udp_fast_csum_update(struct udphdr *uhdr, u32 oldip, u32 newip,
  94. u16 oldport, u16 newport)
  95. {
  96. uhdr->check =
  97. ip_vs_check_diff(~oldip, newip,
  98. ip_vs_check_diff(oldport ^ 0xFFFF,
  99. newport, uhdr->check));
  100. if (!uhdr->check)
  101. uhdr->check = 0xFFFF;
  102. }
  103. static int
  104. udp_snat_handler(struct sk_buff **pskb,
  105. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  106. {
  107. struct udphdr *udph;
  108. unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
  109. /* csum_check requires unshared skb */
  110. if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
  111. return 0;
  112. if (unlikely(cp->app != NULL)) {
  113. /* Some checks before mangling */
  114. if (pp->csum_check && !pp->csum_check(*pskb, pp))
  115. return 0;
  116. /*
  117. * Call application helper if needed
  118. */
  119. if (!ip_vs_app_pkt_out(cp, pskb))
  120. return 0;
  121. }
  122. udph = (void *)(*pskb)->nh.iph + udphoff;
  123. udph->source = cp->vport;
  124. /*
  125. * Adjust UDP checksums
  126. */
  127. if (!cp->app && (udph->check != 0)) {
  128. /* Only port and addr are changed, do fast csum update */
  129. udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
  130. cp->dport, cp->vport);
  131. if ((*pskb)->ip_summed == CHECKSUM_HW)
  132. (*pskb)->ip_summed = CHECKSUM_NONE;
  133. } else {
  134. /* full checksum calculation */
  135. udph->check = 0;
  136. (*pskb)->csum = skb_checksum(*pskb, udphoff,
  137. (*pskb)->len - udphoff, 0);
  138. udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
  139. (*pskb)->len - udphoff,
  140. cp->protocol,
  141. (*pskb)->csum);
  142. if (udph->check == 0)
  143. udph->check = 0xFFFF;
  144. IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%d)\n",
  145. pp->name, udph->check,
  146. (char*)&(udph->check) - (char*)udph);
  147. }
  148. return 1;
  149. }
  150. static int
  151. udp_dnat_handler(struct sk_buff **pskb,
  152. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  153. {
  154. struct udphdr *udph;
  155. unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
  156. /* csum_check requires unshared skb */
  157. if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
  158. return 0;
  159. if (unlikely(cp->app != NULL)) {
  160. /* Some checks before mangling */
  161. if (pp->csum_check && !pp->csum_check(*pskb, pp))
  162. return 0;
  163. /*
  164. * Attempt ip_vs_app call.
  165. * It will fix ip_vs_conn
  166. */
  167. if (!ip_vs_app_pkt_in(cp, pskb))
  168. return 0;
  169. }
  170. udph = (void *)(*pskb)->nh.iph + udphoff;
  171. udph->dest = cp->dport;
  172. /*
  173. * Adjust UDP checksums
  174. */
  175. if (!cp->app && (udph->check != 0)) {
  176. /* Only port and addr are changed, do fast csum update */
  177. udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
  178. cp->vport, cp->dport);
  179. if ((*pskb)->ip_summed == CHECKSUM_HW)
  180. (*pskb)->ip_summed = CHECKSUM_NONE;
  181. } else {
  182. /* full checksum calculation */
  183. udph->check = 0;
  184. (*pskb)->csum = skb_checksum(*pskb, udphoff,
  185. (*pskb)->len - udphoff, 0);
  186. udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
  187. (*pskb)->len - udphoff,
  188. cp->protocol,
  189. (*pskb)->csum);
  190. if (udph->check == 0)
  191. udph->check = 0xFFFF;
  192. (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
  193. }
  194. return 1;
  195. }
  196. static int
  197. udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
  198. {
  199. struct udphdr udph;
  200. unsigned int udphoff = skb->nh.iph->ihl*4;
  201. if (skb_copy_bits(skb, udphoff, &udph, sizeof(udph)) < 0)
  202. return 0;
  203. if (udph.check != 0) {
  204. switch (skb->ip_summed) {
  205. case CHECKSUM_NONE:
  206. skb->csum = skb_checksum(skb, udphoff,
  207. skb->len - udphoff, 0);
  208. case CHECKSUM_HW:
  209. if (csum_tcpudp_magic(skb->nh.iph->saddr,
  210. skb->nh.iph->daddr,
  211. skb->len - udphoff,
  212. skb->nh.iph->protocol,
  213. skb->csum)) {
  214. IP_VS_DBG_RL_PKT(0, pp, skb, 0,
  215. "Failed checksum for");
  216. return 0;
  217. }
  218. break;
  219. default:
  220. /* CHECKSUM_UNNECESSARY */
  221. break;
  222. }
  223. }
  224. return 1;
  225. }
  226. /*
  227. * Note: the caller guarantees that only one of register_app,
  228. * unregister_app or app_conn_bind is called each time.
  229. */
  230. #define UDP_APP_TAB_BITS 4
  231. #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
  232. #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
  233. static struct list_head udp_apps[UDP_APP_TAB_SIZE];
  234. static spinlock_t udp_app_lock = SPIN_LOCK_UNLOCKED;
  235. static inline __u16 udp_app_hashkey(__u16 port)
  236. {
  237. return ((port >> UDP_APP_TAB_BITS) ^ port) & UDP_APP_TAB_MASK;
  238. }
  239. static int udp_register_app(struct ip_vs_app *inc)
  240. {
  241. struct ip_vs_app *i;
  242. __u16 hash, port = inc->port;
  243. int ret = 0;
  244. hash = udp_app_hashkey(port);
  245. spin_lock_bh(&udp_app_lock);
  246. list_for_each_entry(i, &udp_apps[hash], p_list) {
  247. if (i->port == port) {
  248. ret = -EEXIST;
  249. goto out;
  250. }
  251. }
  252. list_add(&inc->p_list, &udp_apps[hash]);
  253. atomic_inc(&ip_vs_protocol_udp.appcnt);
  254. out:
  255. spin_unlock_bh(&udp_app_lock);
  256. return ret;
  257. }
  258. static void
  259. udp_unregister_app(struct ip_vs_app *inc)
  260. {
  261. spin_lock_bh(&udp_app_lock);
  262. atomic_dec(&ip_vs_protocol_udp.appcnt);
  263. list_del(&inc->p_list);
  264. spin_unlock_bh(&udp_app_lock);
  265. }
  266. static int udp_app_conn_bind(struct ip_vs_conn *cp)
  267. {
  268. int hash;
  269. struct ip_vs_app *inc;
  270. int result = 0;
  271. /* Default binding: bind app only for NAT */
  272. if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
  273. return 0;
  274. /* Lookup application incarnations and bind the right one */
  275. hash = udp_app_hashkey(cp->vport);
  276. spin_lock(&udp_app_lock);
  277. list_for_each_entry(inc, &udp_apps[hash], p_list) {
  278. if (inc->port == cp->vport) {
  279. if (unlikely(!ip_vs_app_inc_get(inc)))
  280. break;
  281. spin_unlock(&udp_app_lock);
  282. IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
  283. "%u.%u.%u.%u:%u to app %s on port %u\n",
  284. __FUNCTION__,
  285. NIPQUAD(cp->caddr), ntohs(cp->cport),
  286. NIPQUAD(cp->vaddr), ntohs(cp->vport),
  287. inc->name, ntohs(inc->port));
  288. cp->app = inc;
  289. if (inc->init_conn)
  290. result = inc->init_conn(inc, cp);
  291. goto out;
  292. }
  293. }
  294. spin_unlock(&udp_app_lock);
  295. out:
  296. return result;
  297. }
  298. static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
  299. [IP_VS_UDP_S_NORMAL] = 5*60*HZ,
  300. [IP_VS_UDP_S_LAST] = 2*HZ,
  301. };
  302. static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
  303. [IP_VS_UDP_S_NORMAL] = "UDP",
  304. [IP_VS_UDP_S_LAST] = "BUG!",
  305. };
  306. static int
  307. udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
  308. {
  309. return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
  310. udp_state_name_table, sname, to);
  311. }
  312. static const char * udp_state_name(int state)
  313. {
  314. if (state >= IP_VS_UDP_S_LAST)
  315. return "ERR!";
  316. return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
  317. }
  318. static int
  319. udp_state_transition(struct ip_vs_conn *cp, int direction,
  320. const struct sk_buff *skb,
  321. struct ip_vs_protocol *pp)
  322. {
  323. cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
  324. return 1;
  325. }
  326. static void udp_init(struct ip_vs_protocol *pp)
  327. {
  328. IP_VS_INIT_HASH_TABLE(udp_apps);
  329. pp->timeout_table = udp_timeouts;
  330. }
  331. static void udp_exit(struct ip_vs_protocol *pp)
  332. {
  333. }
  334. struct ip_vs_protocol ip_vs_protocol_udp = {
  335. .name = "UDP",
  336. .protocol = IPPROTO_UDP,
  337. .dont_defrag = 0,
  338. .init = udp_init,
  339. .exit = udp_exit,
  340. .conn_schedule = udp_conn_schedule,
  341. .conn_in_get = udp_conn_in_get,
  342. .conn_out_get = udp_conn_out_get,
  343. .snat_handler = udp_snat_handler,
  344. .dnat_handler = udp_dnat_handler,
  345. .csum_check = udp_csum_check,
  346. .state_transition = udp_state_transition,
  347. .state_name = udp_state_name,
  348. .register_app = udp_register_app,
  349. .unregister_app = udp_unregister_app,
  350. .app_conn_bind = udp_app_conn_bind,
  351. .debug_packet = ip_vs_tcpudp_debug_packet,
  352. .timeout_change = NULL,
  353. .set_state_timeout = udp_set_state_timeout,
  354. };