PageRenderTime 51ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/bsd/sys/netinet/tcp_syncache.cc

https://gitlab.com/jforge/osv
C++ | 1750 lines | 1158 code | 174 blank | 418 comment | 212 complexity | 0264deb65c3ea759d6e4f2f951bf58c8 MD5 | raw file
Possible License(s): BSD-3-Clause, 0BSD, MPL-2.0-no-copyleft-exception

Large files files are truncated, but you can click here to view the full file

  1. /*-
  2. * Copyright (c) 2001 McAfee, Inc.
  3. * Copyright (c) 2006 Andre Oppermann, Internet Business Solutions AG
  4. * All rights reserved.
  5. *
  6. * This software was developed for the FreeBSD Project by Jonathan Lemon
  7. * and McAfee Research, the Security Research Division of McAfee, Inc. under
  8. * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  9. * DARPA CHATS research program.
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. * 1. Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. * 2. Redistributions in binary form must reproduce the above copyright
  17. * notice, this list of conditions and the following disclaimer in the
  18. * documentation and/or other materials provided with the distribution.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  21. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  24. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  25. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  26. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  28. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  29. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30. * SUCH DAMAGE.
  31. */
  32. #include <sys/cdefs.h>
  33. #include <bsd/porting/netport.h>
  34. #include <bsd/porting/uma_stub.h>
  35. #include <bsd/porting/sync_stub.h>
  36. #include <bsd/sys/sys/libkern.h>
  37. #include <bsd/sys/sys/param.h>
  38. #include <bsd/sys/sys/limits.h>
  39. #include <bsd/sys/sys/mbuf.h>
  40. #include <bsd/sys/sys/md5.h>
  41. #include <bsd/sys/sys/socket.h>
  42. #include <bsd/sys/sys/socketvar.h>
  43. #include <bsd/sys/net/if.h>
  44. #include <bsd/sys/net/route.h>
  45. #include <bsd/sys/net/vnet.h>
  46. #include <bsd/sys/netinet/in.h>
  47. #include <bsd/sys/netinet/in_systm.h>
  48. #include <bsd/sys/netinet/ip.h>
  49. #include <bsd/sys/netinet/in_var.h>
  50. #include <bsd/sys/netinet/in_pcb.h>
  51. #include <bsd/sys/netinet/ip_var.h>
  52. #include <bsd/sys/netinet/ip_options.h>
  53. #ifdef INET6
  54. #include <bsd/sys/netinet/ip6.h>
  55. #include <bsd/sys/netinet/icmp6.h>
  56. #include <bsd/sys/netinet6/nd6.h>
  57. #include <bsd/sys/netinet6/ip6_var.h>
  58. #include <bsd/sys/netinet6/in6_pcb.h>
  59. #endif
  60. #include <bsd/sys/netinet/tcp.h>
  61. #include <bsd/sys/netinet/tcp_fsm.h>
  62. #include <bsd/sys/netinet/tcp_seq.h>
  63. #include <bsd/sys/netinet/tcp_timer.h>
  64. #include <bsd/sys/netinet/tcp_var.h>
  65. #include <bsd/sys/netinet/tcp_syncache.h>
  66. #ifdef INET6
  67. #include <bsd/sys/netinet6/tcp6_var.h>
  68. #endif
  69. #ifdef IPSEC
  70. #include <bsd/sys/netipsec/ipsec.h>
  71. #ifdef INET6
  72. #include <bsd/sys/netipsec/ipsec6.h>
  73. #endif
  74. #include <bsd/sys/netipsec/key.h>
  75. #endif /*IPSEC*/
  76. #include <machine/in_cksum.h>
  77. #include <functional>
  78. static VNET_DEFINE(int, tcp_syncookies) = 1;
  79. #define V_tcp_syncookies VNET(tcp_syncookies)
  80. SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_RW,
  81. &VNET_NAME(tcp_syncookies), 0,
  82. "Use TCP SYN cookies if the syncache overflows");
  83. static VNET_DEFINE(int, tcp_syncookiesonly) = 0;
  84. #define V_tcp_syncookiesonly VNET(tcp_syncookiesonly)
  85. SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_RW,
  86. &VNET_NAME(tcp_syncookiesonly), 0,
  87. "Use only TCP SYN cookies");
  88. #ifdef TCP_OFFLOAD_DISABLE
  89. #define TOEPCB_ISSET(sc) (0)
  90. #else
  91. #define TOEPCB_ISSET(sc) ((sc)->sc_toepcb != NULL)
  92. #endif
  93. static void syncache_drop(struct syncache *, struct syncache_head *);
  94. static void syncache_free(struct syncache *);
  95. static void syncache_insert(struct syncache *, struct syncache_head *);
  96. struct syncache *syncache_lookup(struct in_conninfo *, struct syncache_head **);
  97. static int syncache_respond(struct syncache *);
  98. static struct socket *syncache_socket(struct syncache *, struct socket *,
  99. struct mbuf *m);
  100. static void syncache_timeout(struct syncache *sc, struct syncache_head *sch,
  101. int docallout);
  102. static void syncache_timer(struct syncache_head *sch, serial_timer_task& timer);
  103. static void syncookie_generate(struct syncache_head *, struct syncache *,
  104. u_int32_t *);
  105. static struct syncache
  106. *syncookie_lookup(struct in_conninfo *, struct syncache_head *,
  107. struct syncache *, struct tcpopt *, struct tcphdr *, struct socket *);
  108. /*
  109. * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies.
  110. * 3 retransmits corresponds to a timeout of 3 * (1 + 2 + 4 + 8) == 45 seconds,
  111. * the odds are that the user has given up attempting to connect by then.
  112. */
  113. #define SYNCACHE_MAXREXMTS 3
  114. /* Arbitrary values */
  115. #define TCP_SYNCACHE_HASHSIZE 512
  116. #define TCP_SYNCACHE_BUCKETLIMIT 30
  117. static VNET_DEFINE(struct tcp_syncache, tcp_syncache);
  118. #define V_tcp_syncache VNET(tcp_syncache)
  119. SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache");
  120. SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN,
  121. &VNET_NAME(tcp_syncache.bucket_limit), 0,
  122. "Per-bucket hash limit for syncache");
  123. SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
  124. &VNET_NAME(tcp_syncache.cache_limit), 0,
  125. "Overall entry limit for syncache");
  126. SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD,
  127. &VNET_NAME(tcp_syncache.cache_count), 0,
  128. "Current number of entries in syncache");
  129. SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
  130. &VNET_NAME(tcp_syncache.hashsize), 0,
  131. "Size of TCP syncache hashtable");
  132. SYSCTL_VNET_UINT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW,
  133. &VNET_NAME(tcp_syncache.rexmt_limit), 0,
  134. "Limit on SYN/ACK retransmissions");
  135. VNET_DEFINE(int, tcp_sc_rst_sock_fail) = 0; // match Linux behavior
  136. SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, rst_on_sock_fail,
  137. CTLFLAG_RW, &VNET_NAME(tcp_sc_rst_sock_fail), 0,
  138. "Send reset on socket allocation failure");
  139. MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache");
  140. #define SYNCACHE_HASH(inc, mask) \
  141. ((V_tcp_syncache.hash_secret ^ \
  142. (inc)->inc_faddr.s_addr ^ \
  143. ((inc)->inc_faddr.s_addr >> 16) ^ \
  144. (inc)->inc_fport ^ (inc)->inc_lport) & mask)
  145. #define SYNCACHE_HASH6(inc, mask) \
  146. ((V_tcp_syncache.hash_secret ^ \
  147. (inc)->inc6_faddr.s6_addr32[0] ^ \
  148. (inc)->inc6_faddr.s6_addr32[3] ^ \
  149. (inc)->inc_fport ^ (inc)->inc_lport) & mask)
  150. #define ENDPTS_EQ(a, b) ( \
  151. (a)->ie_fport == (b)->ie_fport && \
  152. (a)->ie_lport == (b)->ie_lport && \
  153. (a)->ie_faddr.s_addr == (b)->ie_faddr.s_addr && \
  154. (a)->ie_laddr.s_addr == (b)->ie_laddr.s_addr \
  155. )
  156. #define ENDPTS6_EQ(a, b) (memcmp(a, b, sizeof(*a)) == 0)
  157. #define SCH_LOCK(sch) mutex_lock(&(sch)->sch_mtx)
  158. #define SCH_UNLOCK(sch) mutex_unlock(&(sch)->sch_mtx)
  159. #define SCH_LOCK_ASSERT(sch) assert(mutex_owned(&(sch)->sch_mtx))
  160. /*
  161. * Requires the syncache entry to be already removed from the bucket list.
  162. */
  163. static void syncache_free(struct syncache *sc)
  164. {
  165. if (sc->sc_ipopts)
  166. (void)m_free(sc->sc_ipopts);
  167. #ifdef MAC
  168. mac_syncache_destroy(&sc->sc_label);
  169. #endif
  170. uma_zfree(V_tcp_syncache.zone, sc);
  171. }
  172. syncache_head::syncache_head()
  173. : sch_timer(sch_mtx, std::bind(syncache_timer, this, std::placeholders::_1))
  174. {
  175. #ifdef VIMAGE
  176. sch_vnet = curvnet;
  177. #endif
  178. TAILQ_INIT(&sch_bucket);
  179. }
  180. void syncache_init(void)
  181. {
  182. V_tcp_syncache.cache_count = 0;
  183. V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
  184. V_tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
  185. V_tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
  186. V_tcp_syncache.hash_secret = arc4random();
  187. TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize",
  188. &V_tcp_syncache.hashsize); TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit",
  189. &V_tcp_syncache.bucket_limit);
  190. if (!powerof2(V_tcp_syncache.hashsize) || V_tcp_syncache.hashsize == 0) {
  191. printf("WARNING: syncache hash size is not a power of 2.\n");
  192. V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
  193. }
  194. V_tcp_syncache.hashmask = V_tcp_syncache.hashsize - 1;
  195. /* Set limits. */V_tcp_syncache.cache_limit = V_tcp_syncache.hashsize
  196. * V_tcp_syncache.bucket_limit;
  197. TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit",
  198. &V_tcp_syncache.cache_limit);
  199. /* Allocate the hash table. */
  200. V_tcp_syncache.hashbase = new syncache_head[V_tcp_syncache.hashsize];
  201. /* Create the syncache entry zone. */V_tcp_syncache.zone = uma_zcreate(
  202. "syncache", sizeof(struct syncache), NULL, NULL, NULL, NULL,
  203. UMA_ALIGN_PTR, 0);
  204. uma_zone_set_max(V_tcp_syncache.zone, V_tcp_syncache.cache_limit);
  205. }
  206. #ifdef VIMAGE
  207. void
  208. syncache_destroy(void)
  209. {
  210. struct syncache_head *sch;
  211. struct syncache *sc, *nsc;
  212. int i;
  213. /* Cleanup hash buckets: stop timers, free entries, destroy locks. */
  214. for (i = 0; i < V_tcp_syncache.hashsize; i++) {
  215. sch = &V_tcp_syncache.hashbase[i];
  216. sch->sch_timer.cancel_sync();
  217. SCH_LOCK(sch);
  218. TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc)
  219. syncache_drop(sc, sch);
  220. SCH_UNLOCK(sch);
  221. KASSERT(TAILQ_EMPTY(&sch->sch_bucket),
  222. ("%s: sch->sch_bucket not empty", __func__));
  223. KASSERT(sch->sch_length == 0, ("%s: sch->sch_length %d not 0",
  224. __func__, sch->sch_length));
  225. }
  226. KASSERT(V_tcp_syncache.cache_count == 0, ("%s: cache_count %d not 0",
  227. __func__, V_tcp_syncache.cache_count));
  228. /* Free the allocated global resources. */
  229. uma_zdestroy(V_tcp_syncache.zone);
  230. delete[] V_tcp_syncache.hashbase;
  231. }
  232. #endif
  233. /*
  234. * Inserts a syncache entry into the specified bucket row.
  235. * Locks and unlocks the syncache_head autonomously.
  236. */
  237. static void syncache_insert(struct syncache *sc, struct syncache_head *sch)
  238. {
  239. struct syncache *sc2;
  240. SCH_LOCK(sch);
  241. /*
  242. * Make sure that we don't overflow the per-bucket limit.
  243. * If the bucket is full, toss the oldest element.
  244. */
  245. if (sch->sch_length >= V_tcp_syncache.bucket_limit) {
  246. KASSERT(!TAILQ_EMPTY(&sch->sch_bucket), ("sch->sch_length incorrect"));
  247. sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head);
  248. syncache_drop(sc2, sch);
  249. TCPSTAT_INC(tcps_sc_bucketoverflow);
  250. }
  251. /* Put it into the bucket. */
  252. TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
  253. sch->sch_length++;
  254. /* Reinitialize the bucket row's timer. */
  255. if (sch->sch_length == 1)
  256. sch->sch_nextc = bsd_ticks + INT_MAX;
  257. syncache_timeout(sc, sch, 1);
  258. SCH_UNLOCK(sch);
  259. V_tcp_syncache.cache_count++;
  260. TCPSTAT_INC(tcps_sc_added);
  261. }
  262. /*
  263. * Remove and free entry from syncache bucket row.
  264. * Expects locked syncache head.
  265. */
  266. static void syncache_drop(struct syncache *sc, struct syncache_head *sch)
  267. {
  268. SCH_LOCK_ASSERT(sch);
  269. TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
  270. sch->sch_length--;
  271. syncache_free(sc);
  272. V_tcp_syncache.cache_count--;
  273. }
  274. /*
  275. * Engage/reengage time on bucket row.
  276. */
  277. static void syncache_timeout(struct syncache *sc, struct syncache_head *sch,
  278. int docallout)
  279. {
  280. sc->sc_rxttime = bsd_ticks + TCPTV_RTOBASE * (tcp_backoff[sc->sc_rxmits]);
  281. sc->sc_rxmits++;
  282. if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) {
  283. sch->sch_nextc = sc->sc_rxttime;
  284. if (docallout) {
  285. reschedule(sch->sch_timer, sch->sch_nextc - bsd_ticks);
  286. }
  287. }
  288. }
  289. /*
  290. * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
  291. * If we have retransmitted an entry the maximum number of times, expire it.
  292. * One separate timer for each bucket row.
  293. */
  294. static void syncache_timer(struct syncache_head *sch, serial_timer_task& timer)
  295. {
  296. struct syncache *sc, *nsc;
  297. int tick = bsd_ticks;
  298. char *s;
  299. CURVNET_SET(sch->sch_vnet);
  300. SCOPE_LOCK(sch->sch_mtx);
  301. if (!timer.try_fire()) {
  302. return;
  303. }
  304. /*
  305. * In the following cycle we may remove some entries and/or
  306. * advance some timeouts, so re-initialize the bucket timer.
  307. */
  308. sch->sch_nextc = tick + INT_MAX;
  309. TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc)
  310. {
  311. /*
  312. * We do not check if the listen socket still exists
  313. * and accept the case where the listen socket may be
  314. * gone by the time we resend the SYN/ACK. We do
  315. * not expect this to happens often. If it does,
  316. * then the RST will be sent by the time the remote
  317. * host does the SYN/ACK->ACK.
  318. */
  319. if (TSTMP_GT(sc->sc_rxttime, tick)) {
  320. if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc))
  321. sch->sch_nextc = sc->sc_rxttime;
  322. continue;
  323. }
  324. if (sc->sc_rxmits > V_tcp_syncache.rexmt_limit) {
  325. if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL ))) {
  326. bsd_log(LOG_DEBUG, "%s; %s: Retransmits exhausted, "
  327. "giving up and removing syncache entry\n", s, __func__);
  328. free(s);
  329. }
  330. syncache_drop(sc, sch);
  331. TCPSTAT_INC(tcps_sc_stale);
  332. continue;
  333. }
  334. if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL ))) {
  335. bsd_log(LOG_DEBUG, "%s; %s: Response timeout, "
  336. "retransmitting (%u) SYN|ACK\n", s, __func__, sc->sc_rxmits);
  337. free(s);
  338. }
  339. (void)syncache_respond(sc);
  340. TCPSTAT_INC(tcps_sc_retransmitted);
  341. syncache_timeout(sc, sch, 0);
  342. }
  343. if (!TAILQ_EMPTY(&(sch)->sch_bucket)) {
  344. reschedule(timer, (sch)->sch_nextc - tick);
  345. }
  346. CURVNET_RESTORE();
  347. }
  348. /*
  349. * Find an entry in the syncache.
  350. * Returns always with locked syncache_head plus a matching entry or NULL.
  351. */
  352. struct syncache *
  353. syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
  354. {
  355. struct syncache *sc;
  356. struct syncache_head *sch;
  357. #ifdef INET6
  358. if (inc->inc_flags & INC_ISIPV6) {
  359. sch = &V_tcp_syncache.hashbase[
  360. SYNCACHE_HASH6(inc, V_tcp_syncache.hashmask)];
  361. *schp = sch;
  362. SCH_LOCK(sch);
  363. /* Circle through bucket row to find matching entry. */
  364. TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
  365. if (ENDPTS6_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie))
  366. return (sc);
  367. }
  368. } else
  369. #endif
  370. {
  371. sch =
  372. &V_tcp_syncache.hashbase[SYNCACHE_HASH(inc, V_tcp_syncache.hashmask)];
  373. *schp = sch;
  374. SCH_LOCK(sch);
  375. /* Circle through bucket row to find matching entry. */
  376. TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash)
  377. {
  378. #ifdef INET6
  379. if (sc->sc_inc.inc_flags & INC_ISIPV6)
  380. continue;
  381. #endif
  382. if (ENDPTS_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie))
  383. return (sc);
  384. }
  385. }
  386. SCH_LOCK_ASSERT(*schp);
  387. return (NULL ); /* always returns with locked sch */
  388. }
  389. /*
  390. * This function is called when we get a RST for a
  391. * non-existent connection, so that we can see if the
  392. * connection is in the syn cache. If it is, zap it.
  393. */
  394. void syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th)
  395. {
  396. struct syncache *sc;
  397. struct syncache_head *sch;
  398. char *s = NULL;
  399. sc = syncache_lookup(inc, &sch); /* returns locked sch */
  400. SCH_LOCK_ASSERT(sch);
  401. /*
  402. * Any RST to our SYN|ACK must not carry ACK, SYN or FIN flags.
  403. * See RFC 793 page 65, section SEGMENT ARRIVES.
  404. */
  405. if (th->th_flags & (TH_ACK | TH_SYN | TH_FIN)) {
  406. if ((s = tcp_log_addrs(inc, th, NULL, NULL )))
  407. bsd_log(LOG_DEBUG, "%s; %s: Spurious RST with ACK, SYN or "
  408. "FIN flag set, segment ignored\n", s, __func__);
  409. TCPSTAT_INC(tcps_badrst);
  410. goto done;
  411. }
  412. /*
  413. * No corresponding connection was found in syncache.
  414. * If syncookies are enabled and possibly exclusively
  415. * used, or we are under memory pressure, a valid RST
  416. * may not find a syncache entry. In that case we're
  417. * done and no SYN|ACK retransmissions will happen.
  418. * Otherwise the RST was misdirected or spoofed.
  419. */
  420. if (sc == NULL ) {
  421. if ((s = tcp_log_addrs(inc, th, NULL, NULL )))
  422. bsd_log(LOG_DEBUG, "%s; %s: Spurious RST without matching "
  423. "syncache entry (possibly syncookie only), "
  424. "segment ignored\n", s, __func__);
  425. TCPSTAT_INC(tcps_badrst);
  426. goto done;
  427. }
  428. /*
  429. * If the RST bit is set, check the sequence number to see
  430. * if this is a valid reset segment.
  431. * RFC 793 page 37:
  432. * In all states except SYN-SENT, all reset (RST) segments
  433. * are validated by checking their SEQ-fields. A reset is
  434. * valid if its sequence number is in the window.
  435. *
  436. * The sequence number in the reset segment is normally an
  437. * echo of our outgoing acknowlegement numbers, but some hosts
  438. * send a reset with the sequence number at the rightmost edge
  439. * of our receive window, and we have to handle this case.
  440. */
  441. if (th->th_seq >= sc->sc_irs
  442. && th->th_seq <= sc->sc_irs + sc->sc_wnd) {
  443. syncache_drop(sc, sch);
  444. if ((s = tcp_log_addrs(inc, th, NULL, NULL )))
  445. bsd_log(LOG_DEBUG, "%s; %s: Our SYN|ACK was rejected, "
  446. "connection attempt aborted by remote endpoint\n", s, __func__);
  447. TCPSTAT_INC(tcps_sc_reset);
  448. } else {
  449. if ((s = tcp_log_addrs(inc, th, NULL, NULL )))
  450. bsd_log(LOG_DEBUG, "%s; %s: RST with invalid SEQ %u != "
  451. "IRS %u (+WND %u), segment ignored\n",
  452. s, __func__, th->th_seq, sc->sc_irs, sc->sc_wnd);
  453. TCPSTAT_INC(tcps_badrst);
  454. }
  455. done: if (s != NULL )
  456. free(s);
  457. SCH_UNLOCK(sch);
  458. }
  459. void syncache_badack(struct in_conninfo *inc)
  460. {
  461. struct syncache *sc;
  462. struct syncache_head *sch;
  463. sc = syncache_lookup(inc, &sch); /* returns locked sch */
  464. SCH_LOCK_ASSERT(sch);
  465. if (sc != NULL ) {
  466. syncache_drop(sc, sch);
  467. TCPSTAT_INC(tcps_sc_badack);
  468. }
  469. SCH_UNLOCK(sch);
  470. }
  471. void syncache_unreach(struct in_conninfo *inc, struct tcphdr *th)
  472. {
  473. struct syncache *sc;
  474. struct syncache_head *sch;
  475. sc = syncache_lookup(inc, &sch); /* returns locked sch */
  476. SCH_LOCK_ASSERT(sch);
  477. if (sc == NULL )
  478. goto done;
  479. /* If the sequence number != sc_iss, then it's a bogus ICMP msg */
  480. if (ntohl(th->th_seq) != sc->sc_iss)
  481. goto done;
  482. /*
  483. * If we've rertransmitted 3 times and this is our second error,
  484. * we remove the entry. Otherwise, we allow it to continue on.
  485. * This prevents us from incorrectly nuking an entry during a
  486. * spurious network outage.
  487. *
  488. * See tcp_notify().
  489. */
  490. if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxmits < 3 + 1) {
  491. sc->sc_flags |= SCF_UNREACH;
  492. goto done;
  493. }
  494. syncache_drop(sc, sch);
  495. TCPSTAT_INC(tcps_sc_unreach);
  496. done:
  497. SCH_UNLOCK(sch);
  498. }
  499. /*
  500. * Build a new TCP socket structure from a syncache entry.
  501. */
  502. static struct socket *
  503. syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
  504. {
  505. struct inpcb *inp = NULL;
  506. struct socket *so;
  507. struct tcpcb *tp;
  508. int error;
  509. char *s;
  510. INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
  511. /*
  512. * Ok, create the full blown connection, and set things up
  513. * as they would have been set up if we had created the
  514. * connection when the SYN arrived. If we can't create
  515. * the connection, abort it.
  516. */
  517. so = sonewconn(lso, 0);
  518. if (so == NULL ) {
  519. /*
  520. * Drop the connection; we will either send a RST or
  521. * have the peer retransmit its SYN again after its
  522. * RTO and try again.
  523. */
  524. TCPSTAT_INC(tcps_listendrop);
  525. if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL ))) {
  526. bsd_log(LOG_DEBUG, "%s; %s: Socket create failed "
  527. "due to limits or memory shortage\n", s, __func__);
  528. free(s);
  529. }
  530. goto abort2;
  531. }
  532. #ifdef MAC
  533. mac_socketpeer_set_from_mbuf(m, so);
  534. #endif
  535. inp = sotoinpcb(so);
  536. inp->inp_inc.inc_fibnum = so->so_fibnum;
  537. INP_LOCK(inp);
  538. INP_HASH_WLOCK(&V_tcbinfo);
  539. /* Insert new socket into PCB hash list. */
  540. inp->inp_inc.inc_flags = sc->sc_inc.inc_flags;
  541. #ifdef INET6
  542. if (sc->sc_inc.inc_flags & INC_ISIPV6) {
  543. inp->in6p_laddr = sc->sc_inc.inc6_laddr;
  544. } else {
  545. inp->inp_vflag &= ~INP_IPV6;
  546. inp->inp_vflag |= INP_IPV4;
  547. #endif
  548. inp->inp_laddr = sc->sc_inc.inc_laddr;
  549. #ifdef INET6
  550. }
  551. #endif
  552. /*
  553. * Install in the reservation hash table for now, but don't yet
  554. * install a connection group since the full 4-tuple isn't yet
  555. * configured.
  556. */
  557. inp->inp_lport = sc->sc_inc.inc_lport;
  558. if ((error = in_pcbinshash(inp)) != 0) {
  559. /*
  560. * Undo the assignments above if we failed to
  561. * put the PCB on the hash lists.
  562. */
  563. #ifdef INET6
  564. if (sc->sc_inc.inc_flags & INC_ISIPV6)
  565. inp->in6p_laddr = in6addr_any;
  566. else
  567. #endif
  568. inp->inp_laddr.s_addr = INADDR_ANY;
  569. inp->inp_lport = 0;
  570. if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL ))) {
  571. bsd_log(LOG_DEBUG, "%s; %s: in_pcbinshash failed "
  572. "with error %i\n", s, __func__, error);
  573. free(s);
  574. }
  575. INP_HASH_WUNLOCK(&V_tcbinfo);
  576. goto abort;
  577. }
  578. #ifdef IPSEC
  579. /* Copy old policy into new socket's. */
  580. if (ipsec_copy_policy(sotoinpcb(lso)->inp_sp, inp->inp_sp))
  581. printf("syncache_socket: could not copy policy\n");
  582. #endif
  583. #ifdef INET6
  584. if (sc->sc_inc.inc_flags & INC_ISIPV6) {
  585. struct inpcb *oinp = sotoinpcb(lso);
  586. struct in6_addr laddr6;
  587. struct bsd_sockaddr_in6 sin6;
  588. /*
  589. * Inherit socket options from the listening socket.
  590. * Note that in6p_inputopts are not (and should not be)
  591. * copied, since it stores previously received options and is
  592. * used to detect if each new option is different than the
  593. * previous one and hence should be passed to a user.
  594. * If we copied in6p_inputopts, a user would not be able to
  595. * receive options just after calling the accept system call.
  596. */
  597. inp->inp_flags |= oinp->inp_flags & INP_CONTROLOPTS;
  598. if (oinp->in6p_outputopts)
  599. inp->in6p_outputopts =
  600. ip6_copypktopts(oinp->in6p_outputopts, M_NOWAIT);
  601. sin6.sin6_family = AF_INET6;
  602. sin6.sin6_len = sizeof(sin6);
  603. sin6.sin6_addr = sc->sc_inc.inc6_faddr;
  604. sin6.sin6_port = sc->sc_inc.inc_fport;
  605. sin6.sin6_flowinfo = sin6.sin6_scope_id = 0;
  606. laddr6 = inp->in6p_laddr;
  607. if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
  608. inp->in6p_laddr = sc->sc_inc.inc6_laddr;
  609. if ((error = in6_pcbconnect_mbuf(inp, (struct bsd_sockaddr *)&sin6,
  610. thread0.td_ucred, m)) != 0) {
  611. inp->in6p_laddr = laddr6;
  612. if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
  613. bsd_log(LOG_DEBUG, "%s; %s: in6_pcbconnect failed "
  614. "with error %i\n",
  615. s, __func__, error);
  616. free(s);
  617. }
  618. INP_HASH_WUNLOCK(&V_tcbinfo);
  619. goto abort;
  620. }
  621. /* Override flowlabel from in6_pcbconnect. */
  622. inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
  623. inp->inp_flow |= sc->sc_flowlabel;
  624. }
  625. #endif /* INET6 */
  626. #if defined(INET) && defined(INET6)
  627. else
  628. #endif
  629. #ifdef INET
  630. {
  631. struct in_addr laddr;
  632. struct bsd_sockaddr_in sin;
  633. inp->inp_options = (m) ? ip_srcroute(m) : NULL;
  634. if (inp->inp_options == NULL ) {
  635. inp->inp_options = sc->sc_ipopts;
  636. sc->sc_ipopts = NULL;
  637. }
  638. sin.sin_family = AF_INET;
  639. sin.sin_len = sizeof(sin);
  640. sin.sin_addr = sc->sc_inc.inc_faddr;
  641. sin.sin_port = sc->sc_inc.inc_fport;
  642. bzero((caddr_t)sin.sin_zero, sizeof(sin.sin_zero));
  643. laddr = inp->inp_laddr;
  644. if (inp->inp_laddr.s_addr == INADDR_ANY)
  645. inp->inp_laddr = sc->sc_inc.inc_laddr;
  646. if ((error = in_pcbconnect_mbuf(inp, (struct bsd_sockaddr *)&sin, 0, m))
  647. != 0) {
  648. inp->inp_laddr = laddr;
  649. if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL ))) {
  650. bsd_log(LOG_DEBUG, "%s; %s: in_pcbconnect failed "
  651. "with error %i\n", s, __func__, error);
  652. free(s);
  653. }
  654. INP_HASH_WUNLOCK(&V_tcbinfo);
  655. goto abort;
  656. }
  657. }
  658. #endif /* INET */
  659. INP_HASH_WUNLOCK(&V_tcbinfo);
  660. tp = intotcpcb(inp);
  661. tp->set_state(TCPS_SYN_RECEIVED);
  662. tp->iss = sc->sc_iss;
  663. tp->irs = sc->sc_irs;
  664. tcp_rcvseqinit(tp);
  665. tcp_sendseqinit(tp);
  666. tp->snd_wl1 = sc->sc_irs;
  667. tp->snd_max = tp->iss + 1;
  668. tp->snd_nxt = tp->iss + 1;
  669. tp->rcv_up = sc->sc_irs + 1;
  670. tp->rcv_wnd = sc->sc_wnd;
  671. tp->rcv_adv += tp->rcv_wnd;
  672. tp->last_ack_sent = tp->rcv_nxt;
  673. tp->t_flags = sototcpcb(lso) ->t_flags & (TF_NOPUSH | TF_NODELAY);
  674. if (sc->sc_flags & SCF_NOOPT)
  675. tp->t_flags |= TF_NOOPT;
  676. else {
  677. if (sc->sc_flags & SCF_WINSCALE) {
  678. tp->t_flags |= TF_REQ_SCALE | TF_RCVD_SCALE;
  679. tp->snd_scale = sc->sc_requested_s_scale;
  680. tp->request_r_scale = sc->sc_requested_r_scale;
  681. }
  682. if (sc->sc_flags & SCF_TIMESTAMP) {
  683. tp->t_flags |= TF_REQ_TSTMP | TF_RCVD_TSTMP;
  684. tp->ts_recent = sc->sc_tsreflect;
  685. tp->ts_recent_age = tcp_ts_getticks();
  686. tp->ts_offset = sc->sc_tsoff;
  687. }
  688. #ifdef TCP_SIGNATURE
  689. if (sc->sc_flags & SCF_SIGNATURE)
  690. tp->t_flags |= TF_SIGNATURE;
  691. #endif
  692. if (sc->sc_flags & SCF_SACK)
  693. tp->t_flags |= TF_SACK_PERMIT;
  694. }
  695. if (sc->sc_flags & SCF_ECN)
  696. tp->t_flags |= TF_ECN_PERMIT;
  697. /*
  698. * Set up MSS and get cached values from tcp_hostcache.
  699. * This might overwrite some of the defaults we just set.
  700. */
  701. tcp_mss(tp, sc->sc_peer_mss);
  702. /*
  703. * If the SYN,ACK was retransmitted, reset cwnd to 1 segment.
  704. * NB: sc_rxmits counts all SYN,ACK transmits, not just retransmits.
  705. */
  706. if (sc->sc_rxmits > 1)
  707. tp->snd_cwnd = tp->t_maxseg;
  708. /*
  709. * Copy and activate timers.
  710. */
  711. tp->t_keepinit = sototcpcb(lso) ->t_keepinit;
  712. tp->t_keepidle = sototcpcb(lso) ->t_keepidle;
  713. tp->t_keepintvl = sototcpcb(lso) ->t_keepintvl;
  714. tp->t_keepcnt = sototcpcb(lso) ->t_keepcnt;
  715. tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
  716. INP_UNLOCK(inp);
  717. SOCK_LOCK(so);
  718. soisconnected(so);
  719. TCPSTAT_INC(tcps_accepts);
  720. return (so);
  721. abort: INP_UNLOCK(inp);
  722. abort2: if (so != NULL )
  723. soabort(so);
  724. return (NULL );
  725. }
  726. static void
  727. syncache_remove_and_free(struct syncache_head *sch, struct syncache *sc)
  728. {
  729. SCH_LOCK_ASSERT(sch);
  730. TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
  731. sch->sch_length--;
  732. V_tcp_syncache.cache_count--;
  733. SCH_UNLOCK(sch);
  734. syncache_free(sc);
  735. }
  736. /*
  737. * This function gets called when we receive an ACK for a
  738. * socket in the LISTEN state. We look up the connection
  739. * in the syncache, and if its there, we pull it out of
  740. * the cache and turn it into a full-blown connection in
  741. * the SYN-RECEIVED state.
  742. */
  743. int syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
  744. struct tcphdr *th, struct socket **lsop, struct mbuf *m)
  745. {
  746. struct syncache *sc;
  747. struct syncache_head *sch;
  748. struct syncache scs;
  749. char *s;
  750. /*
  751. * Global TCP locks are held because we manipulate the PCB lists
  752. * and create a new socket.
  753. */
  754. INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
  755. KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
  756. ("%s: can handle only ACK", __func__));
  757. sc = syncache_lookup(inc, &sch); /* returns locked sch */
  758. SCH_LOCK_ASSERT(sch);
  759. if (sc == NULL ) {
  760. /*
  761. * There is no syncache entry, so see if this ACK is
  762. * a returning syncookie. To do this, first:
  763. * A. See if this socket has had a syncache entry dropped in
  764. * the past. We don't want to accept a bogus syncookie
  765. * if we've never received a SYN.
  766. * B. check that the syncookie is valid. If it is, then
  767. * cobble up a fake syncache entry, and return.
  768. */
  769. if (!V_tcp_syncookies) {
  770. SCH_UNLOCK(sch);
  771. if ((s = tcp_log_addrs(inc, th, NULL, NULL )))
  772. bsd_log(LOG_DEBUG, "%s; %s: Spurious ACK, "
  773. "segment rejected (syncookies disabled)\n", s, __func__);
  774. goto failed;
  775. }
  776. bzero(&scs, sizeof(scs));
  777. sc = syncookie_lookup(inc, sch, &scs, to, th, *lsop);
  778. SCH_UNLOCK(sch);
  779. if (sc == NULL ) {
  780. if ((s = tcp_log_addrs(inc, th, NULL, NULL )))
  781. bsd_log(LOG_DEBUG, "%s; %s: Segment failed "
  782. "SYNCOOKIE authentication, segment rejected "
  783. "(probably spoofed)\n", s, __func__);
  784. goto failed;
  785. }
  786. }
  787. /*
  788. * Segment validation:
  789. * ACK must match our initial sequence number + 1 (the SYN|ACK).
  790. */
  791. if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
  792. if ((s = tcp_log_addrs(inc, th, NULL, NULL )))
  793. bsd_log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
  794. "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
  795. goto failed;
  796. }
  797. /*
  798. * The SEQ must fall in the window starting at the received
  799. * initial receive sequence number + 1 (the SYN).
  800. */
  801. if ((th->th_seq <= sc->sc_irs
  802. || th->th_seq > sc->sc_irs + sc->sc_wnd) &&
  803. !TOEPCB_ISSET(sc)){
  804. if ((s = tcp_log_addrs(inc, th, NULL, NULL )))
  805. bsd_log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
  806. "rejected\n", s, __func__, th->th_seq, sc->sc_irs);
  807. goto failed;
  808. }
  809. if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) {
  810. if ((s = tcp_log_addrs(inc, th, NULL, NULL )))
  811. bsd_log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
  812. "segment rejected\n", s, __func__);
  813. goto failed;
  814. }
  815. /*
  816. * If timestamps were negotiated the reflected timestamp
  817. * must be equal to what we actually sent in the SYN|ACK.
  818. */
  819. if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts&&
  820. !TOEPCB_ISSET(sc)) {if
  821. ( (s = tcp_log_addrs(inc, th, NULL, NULL)))
  822. bsd_log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
  823. "segment rejected\n",
  824. s, __func__, to->to_tsecr, sc->sc_ts);
  825. goto failed;
  826. }
  827. *lsop = syncache_socket(sc, *lsop, m);
  828. if (*lsop == NULL) {
  829. TCPSTAT_INC(tcps_sc_aborted);
  830. } else {
  831. TCPSTAT_INC(tcps_sc_completed);
  832. if (sc != &scs) {
  833. syncache_remove_and_free(sch, sc);
  834. }
  835. }
  836. return (1);
  837. failed:
  838. if (sc != NULL && sc != &scs) {
  839. syncache_remove_and_free(sch, sc);
  840. }
  841. if (s != NULL )
  842. free(s);
  843. *lsop = NULL;
  844. return (0);
  845. }
  846. /*
  847. * Given a LISTEN socket and an inbound SYN request, add
  848. * this to the syn cache, and send back a segment:
  849. * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
  850. * to the source.
  851. *
  852. * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN.
  853. * Doing so would require that we hold onto the data and deliver it
  854. * to the application. However, if we are the target of a SYN-flood
  855. * DoS attack, an attacker could send data which would eventually
  856. * consume all available buffer space if it were ACKed. By not ACKing
  857. * the data, we avoid this DoS scenario.
  858. */
  859. static void _syncache_add(struct in_conninfo *inc, struct tcpopt *to,
  860. struct tcphdr *th, struct inpcb *inp, struct socket **lsop, struct mbuf *m,
  861. struct toe_usrreqs *tu, void *toepcb)
  862. {
  863. struct tcpcb *tp;
  864. struct socket *so;
  865. struct syncache *sc = NULL;
  866. struct syncache_head *sch;
  867. struct mbuf *ipopts = NULL;
  868. u_int32_t flowtmp;
  869. u_int ltflags;
  870. int win, sb_hiwat, ip_ttl, ip_tos;
  871. char *s;
  872. #ifdef INET6
  873. int autoflowlabel = 0;
  874. #endif
  875. #ifdef MAC
  876. struct label *maclabel;
  877. #endif
  878. struct syncache scs;
  879. INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
  880. INP_LOCK_ASSERT(inp); /* listen socket */
  881. KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
  882. ("%s: unexpected tcp flags", __func__));
  883. /*
  884. * Combine all so/tp operations very early to drop the INP lock as
  885. * soon as possible.
  886. */
  887. so = *lsop;
  888. tp = sototcpcb(so);
  889. #ifdef INET6
  890. if ((inc->inc_flags & INC_ISIPV6) &&
  891. (inp->inp_flags & IN6P_AUTOFLOWLABEL))
  892. autoflowlabel = 1;
  893. #endif
  894. ip_ttl = inp->inp_ip_ttl;
  895. ip_tos = inp->inp_ip_tos;
  896. win = sbspace(&so->so_rcv);
  897. sb_hiwat = so->so_rcv.sb_hiwat;
  898. ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
  899. /* By the time we drop the lock these should no longer be used. */
  900. so = NULL;
  901. tp = NULL;
  902. #ifdef MAC
  903. if (mac_syncache_init(&maclabel) != 0) {
  904. INP_UNLOCK(inp);
  905. INP_INFO_WUNLOCK(&V_tcbinfo);
  906. goto done;
  907. } else
  908. mac_syncache_create(maclabel, inp);
  909. #endif
  910. INP_UNLOCK(inp);
  911. INP_INFO_WUNLOCK(&V_tcbinfo);
  912. /*
  913. * Remember the IP options, if any.
  914. */
  915. #ifdef INET6
  916. if (!(inc->inc_flags & INC_ISIPV6))
  917. #endif
  918. #ifdef INET
  919. ipopts = (m) ? ip_srcroute(m) : NULL;
  920. #else
  921. ipopts = NULL;
  922. #endif
  923. /*
  924. * See if we already have an entry for this connection.
  925. * If we do, resend the SYN,ACK, and reset the retransmit timer.
  926. *
  927. * XXX: should the syncache be re-initialized with the contents
  928. * of the new SYN here (which may have different options?)
  929. *
  930. * XXX: We do not check the sequence number to see if this is a
  931. * real retransmit or a new connection attempt. The question is
  932. * how to handle such a case; either ignore it as spoofed, or
  933. * drop the current entry and create a new one?
  934. */
  935. sc = syncache_lookup(inc, &sch); /* returns locked entry */
  936. SCH_LOCK_ASSERT(sch);
  937. if (sc != NULL ) {
  938. TCPSTAT_INC(tcps_sc_dupsyn);
  939. if (ipopts) {
  940. /*
  941. * If we were remembering a previous source route,
  942. * forget it and use the new one we've been given.
  943. */
  944. if (sc->sc_ipopts)
  945. (void)m_free(sc->sc_ipopts);
  946. sc->sc_ipopts = ipopts;
  947. }
  948. /*
  949. * Update timestamp if present.
  950. */
  951. if ((sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS))
  952. sc->sc_tsreflect = to->to_tsval;
  953. else
  954. sc->sc_flags &= ~SCF_TIMESTAMP;
  955. #ifdef MAC
  956. /*
  957. * Since we have already unconditionally allocated label
  958. * storage, free it up. The syncache entry will already
  959. * have an initialized label we can use.
  960. */
  961. mac_syncache_destroy(&maclabel);
  962. #endif
  963. /* Retransmit SYN|ACK and reset retransmit count. */
  964. if ((s = tcp_log_addrs(&sc->sc_inc, th, NULL, NULL ))) {
  965. bsd_log(LOG_DEBUG, "%s; %s: Received duplicate SYN, "
  966. "resetting timer and retransmitting SYN|ACK\n", s, __func__);
  967. free(s);
  968. }
  969. if (!TOEPCB_ISSET(sc) && syncache_respond(sc) == 0) {
  970. sc->sc_rxmits = 0;
  971. syncache_timeout(sc, sch, 1);
  972. TCPSTAT_INC(tcps_sndacks);
  973. TCPSTAT_INC(tcps_sndtotal);
  974. }
  975. SCH_UNLOCK(sch);
  976. goto done;
  977. }
  978. sc = (syncache *)uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
  979. if (sc == NULL ) {
  980. /*
  981. * The zone allocator couldn't provide more entries.
  982. * Treat this as if the cache was full; drop the oldest
  983. * entry and insert the new one.
  984. */
  985. TCPSTAT_INC(tcps_sc_zonefail);
  986. if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head) )!= NULL)
  987. syncache_drop(sc, sch);
  988. sc = (syncache *)uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
  989. if (sc == NULL ) {
  990. if (V_tcp_syncookies) {
  991. bzero(&scs, sizeof(scs));
  992. sc = &scs;
  993. } else {
  994. SCH_UNLOCK(sch);
  995. if (ipopts)
  996. (void)m_free(ipopts);
  997. goto done;
  998. }
  999. }
  1000. }
  1001. /*
  1002. * Fill in the syncache values.
  1003. */
  1004. #ifdef MAC
  1005. sc->sc_label = maclabel;
  1006. #endif
  1007. sc->sc_ipopts = ipopts;
  1008. bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
  1009. #ifdef INET6
  1010. if (!(inc->inc_flags & INC_ISIPV6))
  1011. #endif
  1012. {
  1013. sc->sc_ip_tos = ip_tos;
  1014. sc->sc_ip_ttl = ip_ttl;
  1015. }
  1016. #ifndef TCP_OFFLOAD_DISABLE
  1017. sc->sc_tu = tu;
  1018. sc->sc_toepcb = toepcb;
  1019. #endif
  1020. sc->sc_irs = th->th_seq;
  1021. sc->sc_iss = tcp_seq(arc4random());
  1022. sc->sc_flags = 0;
  1023. sc->sc_flowlabel = 0;
  1024. /*
  1025. * Initial receive window: clip sbspace to [0 .. TCP_MAXWIN].
  1026. * win was derived from socket earlier in the function.
  1027. */
  1028. win = imax(win, 0);
  1029. win = imin(win, TCP_MAXWIN);
  1030. sc->sc_wnd = win;
  1031. if (V_tcp_do_rfc1323) {
  1032. /*
  1033. * A timestamp received in a SYN makes
  1034. * it ok to send timestamp requests and replies.
  1035. */
  1036. if (to->to_flags & TOF_TS) {
  1037. sc->sc_tsreflect = to->to_tsval;
  1038. sc->sc_ts = tcp_ts_getticks();
  1039. sc->sc_flags |= SCF_TIMESTAMP;
  1040. }
  1041. if (to->to_flags & TOF_SCALE) {
  1042. int wscale = 0;
  1043. /*
  1044. * Pick the smallest possible scaling factor that
  1045. * will still allow us to scale up to sb_max, aka
  1046. * kern.ipc.maxsockbuf.
  1047. *
  1048. * We do this because there are broken firewalls that
  1049. * will corrupt the window scale option, leading to
  1050. * the other endpoint believing that our advertised
  1051. * window is unscaled. At scale factors larger than
  1052. * 5 the unscaled window will drop below 1500 bytes,
  1053. * leading to serious problems when traversing these
  1054. * broken firewalls.
  1055. *
  1056. * With the default maxsockbuf of 256K, a scale factor
  1057. * of 3 will be chosen by this algorithm. Those who
  1058. * choose a larger maxsockbuf should watch out
  1059. * for the compatiblity problems mentioned above.
  1060. *
  1061. * RFC1323: The Window field in a SYN (i.e., a <SYN>
  1062. * or <SYN,ACK>) segment itself is never scaled.
  1063. */
  1064. while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < sb_max)
  1065. wscale++;
  1066. sc->sc_requested_r_scale = wscale;
  1067. sc->sc_requested_s_scale = to->to_wscale;
  1068. sc->sc_flags |= SCF_WINSCALE;
  1069. }
  1070. }
  1071. #ifdef TCP_SIGNATURE
  1072. /*
  1073. * If listening socket requested TCP digests, and received SYN
  1074. * contains the option, flag this in the syncache so that
  1075. * syncache_respond() will do the right thing with the SYN+ACK.
  1076. * XXX: Currently we always record the option by default and will
  1077. * attempt to use it in syncache_respond().
  1078. */
  1079. if (to->to_flags & TOF_SIGNATURE || ltflags & TF_SIGNATURE)
  1080. sc->sc_flags |= SCF_SIGNATURE;
  1081. #endif
  1082. if (to->to_flags & TOF_SACKPERM)
  1083. sc->sc_flags |= SCF_SACK;
  1084. if (to->to_flags & TOF_MSS)
  1085. sc->sc_peer_mss = to->to_mss; /* peer mss may be zero */
  1086. if (ltflags & TF_NOOPT)
  1087. sc->sc_flags |= SCF_NOOPT;
  1088. if ((th->th_flags & (TH_ECE | TH_CWR)) && V_tcp_do_ecn)
  1089. sc->sc_flags |= SCF_ECN;
  1090. if (V_tcp_syncookies) {
  1091. syncookie_generate(sch, sc, &flowtmp);
  1092. #ifdef INET6
  1093. if (autoflowlabel)
  1094. sc->sc_flowlabel = flowtmp;
  1095. #endif
  1096. } else {
  1097. #ifdef INET6
  1098. if (autoflowlabel)
  1099. sc->sc_flowlabel =
  1100. (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
  1101. #endif
  1102. }
  1103. SCH_UNLOCK(sch);
  1104. /*
  1105. * Do a standard 3-way handshake.
  1106. */
  1107. if (TOEPCB_ISSET(sc) || syncache_respond(sc) == 0) {
  1108. if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
  1109. syncache_free(sc);
  1110. else if (sc != &scs)
  1111. syncache_insert(sc, sch); /* locks and unlocks sch */
  1112. TCPSTAT_INC(tcps_sndacks);
  1113. TCPSTAT_INC(tcps_sndtotal);
  1114. } else {
  1115. if (sc != &scs)
  1116. syncache_free(sc);
  1117. TCPSTAT_INC(tcps_sc_dropped);
  1118. }
  1119. done:
  1120. #ifdef MAC
  1121. if (sc == &scs)
  1122. mac_syncache_destroy(&maclabel);
  1123. #endif
  1124. if (m) {
  1125. *lsop = NULL;
  1126. m_freem(m);
  1127. }
  1128. }
  1129. static int syncache_respond(struct syncache *sc)
  1130. {
  1131. struct ip *ip = NULL;
  1132. struct mbuf *m;
  1133. struct tcphdr *th = NULL;
  1134. int optlen, error = 0; /* Make compiler happy */
  1135. u_int16_t hlen, tlen, mssopt;
  1136. struct tcpopt to;
  1137. #ifdef INET6
  1138. struct ip6_hdr *ip6 = NULL;
  1139. #endif
  1140. hlen =
  1141. #ifdef INET6
  1142. (sc->sc_inc.inc_flags & INC_ISIPV6) ? sizeof(struct ip6_hdr) :
  1143. #endif
  1144. sizeof(struct ip);
  1145. tlen = hlen + sizeof(struct tcphdr);
  1146. /* Determine MSS we advertize to other end of connection. */
  1147. mssopt = tcp_mssopt(&sc->sc_inc);
  1148. if (sc->sc_peer_mss)
  1149. mssopt = bsd_max(bsd_min(sc->sc_peer_mss, mssopt), V_tcp_minmss);
  1150. /* XXX: Assume that the entire packet will fit in a header mbuf. */
  1151. KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN,
  1152. ("syncache: mbuf too small"));
  1153. /* Create the IP+TCP header from scratch. */
  1154. m = m_gethdr(M_DONTWAIT, MT_DATA);
  1155. if (m == NULL )
  1156. return (ENOBUFS);
  1157. #ifdef MAC
  1158. mac_syncache_create_mbuf(sc->sc_label, m);
  1159. #endif
  1160. m->m_hdr.mh_data += max_linkhdr;
  1161. m->m_hdr.mh_len = tlen;
  1162. m->M_dat.MH.MH_pkthdr.len = tlen;
  1163. m->M_dat.MH.MH_pkthdr.rcvif = NULL;
  1164. #ifdef INET6
  1165. if (sc->sc_inc.inc_flags & INC_ISIPV6) {
  1166. ip6 = mtod(m, struct ip6_hdr *);
  1167. ip6->ip6_vfc = IPV6_VERSION;
  1168. ip6->ip6_nxt = IPPROTO_TCP;
  1169. ip6->ip6_src = sc->sc_inc.inc6_laddr;
  1170. ip6->ip6_dst = sc->sc_inc.inc6_faddr;
  1171. ip6->ip6_plen = htons(tlen - hlen);
  1172. /* ip6_hlim is set after checksum */
  1173. ip6->ip6_flow &= ~IPV6_FLOWLABEL_MASK;
  1174. ip6->ip6_flow |= sc->sc_flowlabel;
  1175. th = (struct tcphdr *)(ip6 + 1);
  1176. }
  1177. #endif
  1178. #if defined(INET6) && defined(INET)
  1179. else
  1180. #endif
  1181. #ifdef INET
  1182. {
  1183. ip = mtod(m, struct ip *);
  1184. ip->ip_v = IPVERSION;
  1185. ip->ip_hl = sizeof(struct ip) >> 2;
  1186. ip->ip_len = tlen;
  1187. ip->ip_id = 0;
  1188. ip->ip_off = 0;
  1189. ip->ip_sum = 0;
  1190. ip->ip_p = IPPROTO_TCP;
  1191. ip->ip_src = sc->sc_inc.inc_laddr;
  1192. ip->ip_dst = sc->sc_inc.inc_faddr;
  1193. ip->ip_ttl = sc->sc_ip_ttl;
  1194. ip->ip_tos = sc->sc_ip_tos;
  1195. /*
  1196. * See if we should do MTU discovery. Route lookups are
  1197. * expensive, so we will only unset the DF bit if:
  1198. *
  1199. * 1) path_mtu_discovery is disabled
  1200. * 2) the SCF_UNREACH flag has been set
  1201. */
  1202. if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
  1203. ip->ip_off |= IP_DF;
  1204. th = (struct tcphdr *)(ip + 1);
  1205. }
  1206. #endif /* INET */
  1207. th->th_sport = sc->sc_inc.inc_lport;
  1208. th->th_dport = sc->sc_inc.inc_fport;
  1209. th->th_seq = htonl(sc->sc_iss);
  1210. th->th_ack = htonl(sc->sc_irs + 1);
  1211. th->th_off = sizeof(struct tcphdr) >> 2;
  1212. th->th_x2 = 0;
  1213. th->th_flags = TH_SYN | TH_ACK;
  1214. th->th_win = htons(sc->sc_wnd);
  1215. th->th_urp = 0;
  1216. if (sc->sc_flags & SCF_ECN) {
  1217. th->th_flags |= TH_ECE;
  1218. TCPSTAT_INC(tcps_ecn_shs);
  1219. }
  1220. /* Tack on the TCP options. */
  1221. if ((sc->sc_flags & SCF_NOOPT) == 0) {
  1222. to.to_flags = 0;
  1223. to.to_mss = mssopt;
  1224. to.to_flags = TOF_MSS;
  1225. if (sc->sc_flags & SCF_WINSCALE) {
  1226. to.to_wscale = sc->sc_requested_r_scale;
  1227. to.to_flags |= TOF_SCALE;
  1228. }
  1229. if (sc->sc_flags & SCF_TIMESTAMP) {
  1230. /* Virgin timestamp or TCP cookie enhanced one. */
  1231. to.to_tsval = sc->sc_ts;
  1232. to.to_tsecr = sc->sc_tsreflect;
  1233. to.to_flags |= TOF_TS;
  1234. }
  1235. if (sc->sc_flags & SCF_SACK)
  1236. to.to_flags |= TOF_SACKPERM;
  1237. #ifdef TCP_SIGNATURE
  1238. if (sc->sc_flags & SCF_SIGNATURE)
  1239. to.to_flags |= TOF_SIGNATURE;
  1240. #endif
  1241. optlen = tcp_addoptions(&to, (u_char *)(th + 1));
  1242. /* Adjust headers by option size. */
  1243. th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
  1244. m->m_hdr.mh_len += optlen;
  1245. m->M_dat.MH.MH_pkthdr.len += optlen;
  1246. #ifdef TCP_SIGNATURE
  1247. if (sc->sc_flags & SCF_SIGNATURE)
  1248. tcp_signature_compute(m, 0, 0, optlen,
  1249. to.to_signature, IPSEC_DIR_OUTBOUND);
  1250. #endif
  1251. #ifdef INET6
  1252. if (sc->sc_inc.inc_flags & INC_ISIPV6)
  1253. ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen);
  1254. else
  1255. #endif
  1256. ip->ip_len += optlen;
  1257. } else
  1258. optlen = 0;
  1259. M_SETFIB(m, sc->sc_inc.inc_fibnum);
  1260. m->M_dat.MH.MH_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
  1261. #ifdef INET6
  1262. if (sc->sc_inc.inc_flags & INC_ISIPV6) {
  1263. m->M_dat.MH.MH_pkthdr.csum_flags = CSUM_TCP_IPV6;
  1264. th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
  1265. IPPROTO_TCP, 0);
  1266. ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
  1267. error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
  1268. }
  1269. #endif
  1270. #if defined(INET6) && defined(INET)
  1271. else
  1272. #endif
  1273. #ifdef INET
  1274. {
  1275. m->M_dat.MH.MH_pkthdr.csum_flags = CSUM_TCP;
  1276. th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
  1277. htons(tlen + optlen - hlen + IPPROTO_TCP));
  1278. error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL );
  1279. }
  1280. #endif
  1281. return (error);
  1282. }
  1283. void syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
  1284. struct inpcb *inp, struct socket **lsop, struct mbuf *m)
  1285. {
  1286. _syncache_add(inc, to, th, inp, lsop, m, NULL, NULL );
  1287. }
  1288. /*
  1289. * The purpose of SYN cookies is to avoid keeping track of all SYN's we
  1290. * receive and to be able to handle SYN floods from bogus source addresses
  1291. * (where we will never receive any reply). SYN floods try to exhaust all
  1292. * our memory and available slots in the SYN cache table to cause a denial
  1293. * of service to legitimate users of the local host.
  1294. *
  1295. * The idea of SYN cookies is to encode and include all necessary information
  1296. * about the connection setup state within the SYN-ACK we send back and thus
  1297. * to get along without keeping any local state until the ACK to the SYN-ACK
  1298. * arrives (if ever). Everything we need to know should be available from
  1299. * the information we encoded in the SYN-ACK.
  1300. *
  1301. * More information about the theory behind SYN cookies and its first
  1302. * discussion and specification can be found at:
  1303. * http://cr.yp.to/syncookies.html (overview)
  1304. * http://cr.yp.to/syncookies/archive (gory details)
  1305. *
  1306. * This implementation extends the orginal idea and first implementation
  1307. * of FreeBSD by using not only the initial sequence number field to store
  1308. * information but also the timestamp field if present. This way we can
  1309. * keep track of the entire state we need to know to recreate the session in
  1310. * its original form. Almost all TCP speakers implement RFC1323 timestamps
  1311. * these days. For those that do not we still have to live with the known
  1312. * shortcomings of the ISN only SYN cookies.
  1313. *
  1314. * Cookie layers:
  1315. *
  1316. * Initial sequence number we send:
  1317. * 31|................................|0
  1318. * DDDDDDDDDDDDDDDDDDDDDDDDDMMMRRRP
  1319. * D = MD5 Digest (first dword)
  1320. * M = MSS index
  1321. * R = Rotation of secret
  1322. * P = Odd or Even secret
  1323. *
  1324. * The MD5 Digest is computed with over following parameters:
  1325. * a) randomly rotated secret
  1326. * b) struct in_conninfo containing the remote/local ip/port (IPv4&IPv6)
  1327. * c) the received initial sequence number from remote host
  1328. * d) the rotation offset and odd/even bit
  1329. *
  1330. * Timestamp we send:
  1331. * 31|................................|0
  1332. * DDDDDDDDDDDDDDDDDDDDDDSSSSRRRRA5
  1333. * D = MD5 Digest (third dword) (only as filler)
  1334. * S = Requested send window scale
  1335. * R = Requested receive window scale
  1336. * A = SACK allowed
  1337. * 5 = TCP-MD5 enabled (not implemented yet)
  1338. * XORed with MD5 Digest (forth dword)
  1339. *
  1340. * The timestamp isn't cryptographically secure and doesn't need to be.
  1341. * The double use of the MD5 digest dwords ties it to a specific remote/
  1342. * local host/port, remote initial sequence number and our local time
  1343. * limited secret. A received timestamp is reverted (XORed) and then
  1344. * the contained MD5 dword is compared to the computed one to ensure the
  1345. * timestamp belongs to the SYN-ACK we sent. The other parameters may
  1346. * have been tampered with but this isn't different from supplying bogus
  1347. * values in the SYN in the first place.
  1348. *
  1349. * Some problems with SYN cookies remain however:
  1350. * Consider the problem of a recreated (and retransmitted) cookie. If the
  1351. * original SYN was accepted, the connection is established. The second
  1352. * SYN is inflight, and if it arrives with an ISN that falls within the
  1353. * receive window, the connection is killed.
  1354. *
  1355. * Notes:
  1356. * A heuristic to determine when to accept syn cookies is not necessary.
  1357. * An ACK flood would cause the syncookie verification to be attempted,
  1358. * but a SYN flood causes syncookies to be generated. Both are of equal
  1359. * cost, so there's no point in trying to optimize the ACK flood case.
  1360. * Also, if you don't process certain ACKs for some reason, then all someone
  1361. * would have to do is launch a SYN and ACK flood at the same time, which
  1362. * would stop cookie verification and defeat the entire purpose of syncookies.
  1363. */
  1364. static int tcp_sc_msstab[] =
  1365. { 0, 256, 468, 536, 996, 1452, 1460, 8960 };
  1366. static void syncookie_generate(struct syncache_head *sch, struct syncache *sc,
  1367. u_int32_t *flowlabel)
  1368. {
  1369. MD5_CTX ctx;
  1370. u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
  1371. u_int32_t data;
  1372. u_int32_t *secbits;
  1373. u_int off, pmss, mss;
  1374. int i;
  1375. SCH_LOCK_ASSERT(sch);
  1376. /* Which of the two secrets to use. */
  1377. secbits = sch->sch_oddeven ? sch->sch_secbits_odd : sch->sch_secbits_even;
  1378. /* Reseed secret if too old. */
  1379. if (sch->sch_reseed < time_uptime) {
  1380. sch->sch_oddeven = sch->sch_oddeven ? 0 : 1; /* toggle */
  1381. secbits =
  1382. sch->sch_oddeven ? sch->sch_secbits_odd : sch->sch_secbits_even;
  1383. for (i = 0; i < SYNCOOKIE_SECRET_SIZE; i++)
  1384. secbits[i] = arc4random();
  1385. sch->sch_reseed = time_uptime + SYNCOOKIE_LIFETIME;
  1386. }
  1387. /* Secret rotation offset. */
  1388. off = sc->sc_iss.raw() & 0x7; /* iss was randomized before */
  1389. /* Maximum segment size calculation. */
  1390. pmss = bsd_max(bsd_min(sc->sc_peer_mss, tcp_mssopt(&sc->sc_inc)), V_tcp_minmss);
  1391. for (mss = sizeof(tcp_sc_msstab) / sizeof(int) - 1; mss > 0; mss--)
  1392. if (tcp_sc_msstab[mss] <= pmss)
  1393. break;
  1394. /* Fold parameters and MD5 digest into the ISN we will send. */
  1395. data = sch->sch_oddeven;/* odd or even secret, 1 bit */
  1396. data |= off << 1; /* secret offset, derived from iss, 3 bits */
  1397. data |= mss << 4; /* mss, 3 bits */
  1398. MD5Init(&ctx);
  1399. MD5Update(&ctx, ((u_int8_t *)secbits) + off,
  1400. SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
  1401. MD5Update(&ctx, secbits, off);
  1402. MD5Update(&ctx, &sc->sc_inc, sizeof(sc->sc_inc));
  1403. MD5Update(&ctx, &sc->sc_irs, sizeof(sc->sc_irs));
  1404. MD5Update(&ctx, &data, sizeof(data));
  1405. MD5Final((u_int8_t *)&md5_buffer, &ctx);
  1406. data |= (md5_buffer[0] << 7);
  1407. sc->sc_iss = tcp_seq(data);
  1408. #ifdef INET6
  1409. *flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
  1410. #endif
  1411. /* Additional parameters are stored in the timestamp if present. */
  1412. if (sc->sc_flags & SCF_TIMESTAMP) {
  1413. data = ((sc->sc_flags & SCF_SIGNATURE) ? 1 : 0); /* TCP-MD5, 1 bit */
  1414. data |= ((sc->sc_flags & SCF_SACK) ? 1 : 0) << 1; /* SACK, 1 bit */
  1415. data |= sc->sc_requested_s_scale << 2; /* SWIN scale, 4 bits */
  1416. data |= sc->sc_requested_r_scale << 6; /* RWIN scale, 4 bits */
  1417. data |= md5_buffer[2] << 10; /* more digest bits */
  1418. data ^= md5_buffer[3];
  1419. sc->sc_ts = data;
  1420. sc->sc_tsoff = data - tcp_ts_getticks(); /* after XOR */
  1421. }
  1422. TCPSTAT_INC(tcps_sc_sendcookie);
  1423. }
  1424. static struct syncache *
  1425. syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
  1426. struct syncache *sc, struct tcpopt *to, struct tcphdr *th,
  1427. struct socket *so)
  1428. {
  1429. MD5_CTX ctx;
  1430. u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
  1431. u_int32_t data = 0;
  1432. u_int32_t *secbits;
  1433. u_int32_t ack, seq;
  1434. int off, mss, wnd, flags;
  1435. SCH_LOCK_ASSERT(sch);
  1436. /*
  1437. * Pull information out of SYN-ACK/ACK and
  1438. * revert sequence number advances.
  1439. */
  1440. ack = th->th_ack.raw() - 1;
  1441. seq = th->th_seq.raw() - 1;
  1442. off = (ack >> 1) & 0x7;
  1443. mss = (ack >> 4) & 0x7;
  1444. flags = ack & 0x7f;
  1445. /* Which of the two secrets to use. */
  1446. secbits = (flags & 0x1) ? sch->sch_secbits_odd : sch->sch_secbits_even;
  1447. /*
  1448. * The secret wasn't updated for the lifetime of a syncookie,
  1449. * so this SYN-ACK/ACK is either too old (replay) or totally bogus.
  1450. */
  1451. if (sch->sch_reseed + SYNCOOKIE_LIFETIME < time_uptime) {
  1452. return (NULL );
  1453. }
  1454. /* Recompute the digest so we can compare it. */
  1455. MD5Init(&ctx);
  1456. MD5Update(&ctx, ((u_int8_t *)secbits) + off,
  1457. SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
  1458. MD5Update(&ctx, secbits, off);
  1459. MD5Update(&ctx, inc, sizeof(*inc));
  1460. MD5Update(&ctx, &seq, sizeof(seq));
  1461. MD5Update(&ctx, &flags, sizeof(flags));
  1462. MD5Final((u_int8_t *)&md5_buffer, &ctx);
  1463. /* Does the digest part of or ACK'ed ISS match? */
  1464. if ((ack & (~0x7f)) != (md5_buffer[0] << 7))
  1465. return (NULL );
  1466. /* Does the digest part of our reflected timestamp match? */
  1467. if (to->to_flags & TOF_TS) {
  1468. data = md5_buffer[3] ^ to->to_tsecr;
  1469. if ((data & (~0x3ff)) != (md5_buffer[2] << 10))
  1470. return (NULL );
  1471. }
  1472. /* Fill in the syncache values. */
  1473. bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
  1474. sc->sc_ipopts = NULL;
  1475. sc->sc_irs = tcp_seq(seq);
  1476. sc->sc_iss = tcp_seq(ack);
  1477. #ifdef INET6
  1478. if (inc->inc_flags & INC_ISIPV6) {
  1479. if (sotoinpcb(so)->inp_flags & IN6P_AUTOFLOWLABEL)
  1480. sc->sc_flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
  1481. } else
  1482. #endif
  1483. {
  1484. sc->sc_ip_ttl = sotoinpcb(so) ->inp_ip_ttl;
  1485. sc->sc_ip_tos = sotoinpcb(so) ->inp_ip_tos;
  1486. }
  1487. /* Additional parameters that were encoded in the timestamp. */
  1488. if (data) {
  1489. sc->sc_flags |= SCF_TIMESTAMP;
  1490. sc->sc_tsreflect = to->to_tsval;
  1491. sc->sc_ts = to->to_tsecr;
  1492. sc->sc_tsoff = to->to_tsecr - tcp_ts_getticks();
  1493. sc->sc_flags |= (data & 0x1) ? SCF_SIGNATURE : 0;
  1494. sc->sc_flags |= ((data >> 1) & 0x1) ? SCF_SACK : 0;
  1495. sc->sc_requested_s_scale = bsd_min((data >> 2) & 0xf, TCP_MAX_WINSHIFT);
  1496. sc->sc_requested_r_scale = bsd_min((data >> 6) & 0xf, TCP_MAX_WINSHIFT);
  1497. if (sc->sc_requested_s_scale || sc->sc_requested_r_scale)
  1498. sc->sc_flags |= SCF_WINSCALE;
  1499. } else
  1500. sc->sc_flags |= SCF_NOOPT;
  1501. wnd = sbspace(&so->so_rcv);
  1502. wnd = imax(wnd, 0);
  1503. wnd = imin(wnd, TCP_MAXWIN);
  1504. sc->sc_wnd = wnd;
  1505. sc->sc_rxmits = 0;
  1506. sc->sc_peer_mss = tcp_sc_msstab[mss];
  1507. TCPSTAT_INC(tcps_

Large files files are truncated, but you can click here to view the full file