PageRenderTime 65ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/src/plugins/ctp/sock/ctp_sock_api.c

https://github.com/storage-zuiwanyuan/cci
C | 5640 lines | 4325 code | 714 blank | 601 comment | 803 complexity | 192e2833f64ce1309bf7d36f5865392c MD5 | raw file
Possible License(s): BSD-3-Clause

Large files files are truncated, but you can click here to view the full file

  1. /* vim: set tabstop=8:softtabstop=8:shiftwidth=8:noexpandtab */
  2. /*
  3. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
  4. * Copyright © 2010-2013 UT-Battelle, LLC. All rights reserved.
  5. * Copyright © 2010-2013 Oak Ridge National Labs. All rights reserved.
  6. * Copyright © 2012 inria. All rights reserved.
  7. *
  8. * See COPYING in top-level directory
  9. *
  10. * $COPYRIGHT$
  11. *
  12. */
  13. #if defined(__INTEL_COMPILER)
  14. #pragma warning(disable:593)
  15. #pragma warning(disable:869)
  16. #pragma warning(disable:981)
  17. #pragma warning(disable:1338)
  18. #pragma warning(disable:2259)
  19. #endif /* __INTEL_COMPILER */
  20. #include "cci/private_config.h"
  21. #include <stdio.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include <unistd.h>
  25. #include <netinet/in.h>
  26. #include <arpa/inet.h>
  27. #include <sys/socket.h>
  28. #include <sys/types.h>
  29. #include <netdb.h>
  30. #include <fcntl.h>
  31. #include <inttypes.h>
  32. #ifdef HAVE_IFADDRS_H
  33. #include <ifaddrs.h>
  34. #include <net/if.h>
  35. #endif
  36. #ifdef HAVE_SYS_EPOLL_H
  37. #include <sys/epoll.h>
  38. #else
  39. #include <poll.h>
  40. #endif /* HAVE_SYS_EPOLL_H */
  41. #include "cci.h"
  42. #include "cci_lib_types.h"
  43. #include "cci-api.h"
  44. #include "plugins/ctp/ctp.h"
  45. #include "ctp_sock_internals.h"
  46. #define DEBUG_RNR 0
  47. #if DEBUG_RNR
  48. #include <stdbool.h>
  49. bool conn_established = false;
  50. #endif
  51. sock_globals_t *sglobals = NULL;
  52. static int threads_running = 0;
  53. /*
  54. * Local functions
  55. */
  56. static int ctp_sock_init(cci_plugin_ctp_t *plugin,
  57. uint32_t abi_ver,
  58. uint32_t flags,
  59. uint32_t * caps);
  60. static int ctp_sock_finalize(cci_plugin_ctp_t * plugin);
  61. static const char *ctp_sock_strerror(cci_endpoint_t * endpoint,
  62. enum cci_status status);
  63. static int ctp_sock_create_endpoint(cci_device_t * device,
  64. int flags,
  65. cci_endpoint_t ** endpoint,
  66. cci_os_handle_t * fd);
  67. static int ctp_sock_destroy_endpoint(cci_endpoint_t * endpoint);
  68. static int ctp_sock_accept(cci_event_t *event, const void *context);
  69. static int ctp_sock_reject(cci_event_t *conn_req);
  70. static int ctp_sock_connect(cci_endpoint_t * endpoint,
  71. const char *server_uri,
  72. const void *data_ptr,
  73. uint32_t data_len,
  74. cci_conn_attribute_t attribute,
  75. const void *context,
  76. int flags,
  77. const struct timeval *timeout);
  78. static int ctp_sock_disconnect(cci_connection_t * connection);
  79. static int ctp_sock_set_opt(cci_opt_handle_t * handle,
  80. cci_opt_name_t name,
  81. const void *val);
  82. static int ctp_sock_get_opt(cci_opt_handle_t * handle,
  83. cci_opt_name_t name,
  84. void *val);
  85. static int ctp_sock_arm_os_handle(cci_endpoint_t * endpoint, int flags);
  86. static int ctp_sock_get_event(cci_endpoint_t * endpoint,
  87. cci_event_t ** const event);
  88. static int ctp_sock_return_event(cci_event_t * event);
  89. static int ctp_sock_send(cci_connection_t * connection,
  90. const void *msg_ptr,
  91. uint32_t msg_len,
  92. const void *context,
  93. int flags);
  94. static int ctp_sock_sendv(cci_connection_t * connection,
  95. const struct iovec *data,
  96. uint32_t iovcnt,
  97. const void *context,
  98. int flags);
  99. static int ctp_sock_rma_register(cci_endpoint_t * endpoint,
  100. void *start,
  101. uint64_t length,
  102. int flags,
  103. cci_rma_handle_t ** rma_handle);
  104. static int ctp_sock_rma_deregister(cci_endpoint_t * endpoint,
  105. cci_rma_handle_t * rma_handle);
  106. static int ctp_sock_rma(cci_connection_t * connection,
  107. const void *header_ptr,
  108. uint32_t header_len,
  109. cci_rma_handle_t * local_handle,
  110. uint64_t local_offset,
  111. cci_rma_handle_t * remote_handle,
  112. uint64_t remote_offset,
  113. uint64_t data_len,
  114. const void *context,
  115. int flags);
  116. static uint8_t sock_ip_hash(in_addr_t ip, uint16_t port);
  117. static void sock_progress_sends(cci__ep_t * ep);
  118. static void *sock_progress_thread(void *arg);
  119. static void *sock_recv_thread(void *arg);
  120. static void sock_ack_conns(cci__ep_t * ep);
  121. static inline int pack_piggyback_ack(cci__ep_t *ep,
  122. sock_conn_t *sconn, sock_tx_t *tx);
  123. static inline int sock_ack_sconn(sock_ep_t *sep, sock_conn_t *sconn);
  124. static int sock_recvfrom_ep(cci__ep_t * ep);
  125. int progress_recv (cci__ep_t *ep);
  126. /*
  127. * Public plugin structure.
  128. *
  129. * The name of this structure must be of the following form:
  130. *
  131. * cci_ctp_<your_plugin_name>_plugin
  132. *
  133. * This allows the symbol to be found after the plugin is dynamically
  134. * opened.
  135. *
  136. * Note that your_plugin_name should match the direct name where the
  137. * plugin resides.
  138. */
  139. cci_plugin_ctp_t cci_ctp_sock_plugin = {
  140. {
  141. /* Logistics */
  142. CCI_ABI_VERSION,
  143. CCI_CTP_API_VERSION,
  144. "sock",
  145. CCI_MAJOR_VERSION, CCI_MINOR_VERSION, CCI_RELEASE_VERSION,
  146. 30,
  147. /* Bootstrap function pointers */
  148. cci_ctp_sock_post_load,
  149. cci_ctp_sock_pre_unload,
  150. },
  151. /* API function pointers */
  152. ctp_sock_init,
  153. ctp_sock_finalize,
  154. ctp_sock_strerror,
  155. ctp_sock_create_endpoint,
  156. ctp_sock_destroy_endpoint,
  157. ctp_sock_accept,
  158. ctp_sock_reject,
  159. ctp_sock_connect,
  160. ctp_sock_disconnect,
  161. ctp_sock_set_opt,
  162. ctp_sock_get_opt,
  163. ctp_sock_arm_os_handle,
  164. ctp_sock_get_event,
  165. ctp_sock_return_event,
  166. ctp_sock_send,
  167. ctp_sock_sendv,
  168. ctp_sock_rma_register,
  169. ctp_sock_rma_deregister,
  170. ctp_sock_rma
  171. };
  172. static inline int
  173. sock_recv_msg (int fd,
  174. void *ptr,
  175. uint32_t len,
  176. int flags,
  177. struct sockaddr_in *sin_out)
  178. {
  179. int ret = 0;
  180. uint32_t recv_len = 0;
  181. static int count = 0;
  182. uint32_t offset = 0;
  183. struct sockaddr_in sin;
  184. socklen_t sin_len = sizeof(sin);
  185. if (len == 0)
  186. return ret;
  187. again:
  188. do {
  189. ret = recvfrom (fd, (void*) ((uintptr_t)ptr + offset), len - recv_len, flags, (struct sockaddr *)&sin, &sin_len);
  190. if (ret < 0) {
  191. if ((count++ & 0xFFFF) == 0xFFFF)
  192. debug (CCI_DB_EP, "%s: recvfrom() failed with %s (%u of %u bytes)", __func__, strerror(ret), recv_len, len);
  193. if (ret == EAGAIN)
  194. goto again;
  195. goto out;
  196. } else if (ret == 0) {
  197. debug (CCI_DB_MSG, "%s: recvfrom() failed - socket closed", __func__);
  198. ret = -1;
  199. goto out;
  200. }
  201. recv_len += ret;
  202. offset += recv_len;
  203. } while (recv_len < len);
  204. ret = recv_len;
  205. if (sin_out != NULL)
  206. *sin_out = sin;
  207. out:
  208. return ret;
  209. }
  210. static inline void
  211. sock_sin_to_name(struct sockaddr_in sin, char *buffer, int len)
  212. {
  213. snprintf(buffer, len, "%s:%d", inet_ntoa(sin.sin_addr),
  214. ntohs(sin.sin_port));
  215. return;
  216. }
  217. static inline const char *sock_msg_type(sock_msg_type_t type)
  218. {
  219. switch (type) {
  220. case SOCK_MSG_CONN_REQUEST:
  221. return "conn_request";
  222. case SOCK_MSG_CONN_REPLY:
  223. return "conn_reply";
  224. case SOCK_MSG_CONN_ACK:
  225. return "conn_ack";
  226. case SOCK_MSG_DISCONNECT:
  227. return "disconnect";
  228. case SOCK_MSG_SEND:
  229. return "send";
  230. case SOCK_MSG_RNR:
  231. return "receiver not ready";
  232. case SOCK_MSG_KEEPALIVE:
  233. return "keepalive";
  234. case SOCK_MSG_PING:
  235. return "ping for RTTM";
  236. case SOCK_MSG_ACK_ONLY:
  237. return "ack_only";
  238. case SOCK_MSG_ACK_UP_TO:
  239. return "ack_up_to";
  240. case SOCK_MSG_SACK:
  241. return "selective ack";
  242. case SOCK_MSG_NACK:
  243. return "negative ack";
  244. case SOCK_MSG_RMA_WRITE:
  245. return "RMA write";
  246. case SOCK_MSG_RMA_WRITE_DONE:
  247. return "RMA write done";
  248. case SOCK_MSG_RMA_READ_REQUEST:
  249. return "RMA read request";
  250. case SOCK_MSG_RMA_READ_REPLY:
  251. return "RMA read reply";
  252. case SOCK_MSG_RMA_INVALID:
  253. return "invalid RMA handle";
  254. case SOCK_MSG_INVALID:
  255. assert(0);
  256. return "invalid";
  257. case SOCK_MSG_TYPE_MAX:
  258. assert(0);
  259. return "type_max";
  260. }
  261. return NULL;
  262. }
  263. static inline void sock_drop_msg(cci_os_handle_t sock)
  264. {
  265. char buf[4];
  266. struct sockaddr sa;
  267. socklen_t slen = sizeof(sa);
  268. recvfrom(sock, buf, 4, 0, &sa, &slen);
  269. return;
  270. }
  271. static inline int sock_create_threads (cci__ep_t *ep)
  272. {
  273. int ret;
  274. sock_ep_t *sep;
  275. assert (ep);
  276. sep = ep->priv;
  277. ret = pthread_create(&sep->recv_tid, NULL, sock_recv_thread, (void*)ep);
  278. if (ret)
  279. goto out;
  280. ret = pthread_create(&sep->progress_tid, NULL, sock_progress_thread, (void*)ep);
  281. if (ret)
  282. goto out;
  283. out:
  284. return ret;
  285. }
  286. static inline int sock_terminate_threads (sock_ep_t *sep)
  287. {
  288. CCI_ENTER;
  289. assert (sep);
  290. pthread_mutex_lock(&sep->progress_mutex);
  291. pthread_cond_signal(&sep->wait_condition);
  292. pthread_mutex_unlock(&sep->progress_mutex);
  293. pthread_join(sep->progress_tid, NULL);
  294. pthread_join(sep->recv_tid, NULL);
  295. CCI_EXIT;
  296. return CCI_SUCCESS;
  297. }
  298. static int ctp_sock_init(cci_plugin_ctp_t *plugin,
  299. uint32_t abi_ver, uint32_t flags, uint32_t * caps)
  300. {
  301. int ret;
  302. cci__dev_t *dev, *ndev;
  303. cci_device_t **devices;
  304. #ifdef HAVE_GETIFADDRS
  305. struct ifaddrs *addrs = NULL, *addr;
  306. #endif
  307. CCI_ENTER;
  308. /* Some unused parameters, the following avoids warnings from
  309. compilers */
  310. UNUSED_PARAM (abi_ver);
  311. UNUSED_PARAM (flags);
  312. UNUSED_PARAM (caps);
  313. #if DEBUG_RNR
  314. fprintf(stderr, "Warning, debug mode (RNR testing)!\n");
  315. #endif
  316. /* init sock globals */
  317. sglobals = calloc(1, sizeof(*sglobals));
  318. if (!sglobals) {
  319. CCI_EXIT;
  320. return CCI_ENOMEM;
  321. }
  322. srandom((unsigned int)sock_get_usecs());
  323. #ifdef HAVE_GETIFADDRS
  324. getifaddrs(&addrs);
  325. /* ignore errors, we've use defaults */
  326. #endif
  327. devices = calloc(CCI_MAX_DEVICES, sizeof(*sglobals->devices));
  328. if (!devices) {
  329. ret = CCI_ENOMEM;
  330. goto out;
  331. }
  332. if (!globals->configfile) {
  333. #ifdef HAVE_GETIFADDRS
  334. if (addrs) {
  335. for (addr = addrs; addr != NULL; addr = addr->ifa_next) {
  336. struct cci_device *device;
  337. sock_dev_t *sdev;
  338. uint32_t mtu = (uint32_t) -1;
  339. struct sockaddr_in *sai;
  340. if (!addr->ifa_addr)
  341. continue;
  342. if (addr->ifa_addr->sa_family != AF_INET)
  343. continue;
  344. if (addr->ifa_flags & IFF_LOOPBACK)
  345. continue;
  346. dev = calloc(1, sizeof(*dev));
  347. if (!dev) {
  348. ret = CCI_ENOMEM;
  349. goto out;
  350. }
  351. dev->priv = calloc(1, sizeof(*sdev));
  352. if (!dev->priv) {
  353. free(dev);
  354. ret = CCI_ENOMEM;
  355. goto out;
  356. }
  357. cci__init_dev(dev);
  358. dev->plugin = plugin;
  359. dev->priority = plugin->base.priority;
  360. /* FIXME GV: could use macro here */
  361. device = &dev->device;
  362. device->transport = strdup("sock");
  363. device->name = strdup(addr->ifa_name);
  364. sdev = dev->priv;
  365. sai = (struct sockaddr_in *) addr->ifa_addr;
  366. memcpy(&sdev->ip, &sai->sin_addr, sizeof(sai->sin_addr));
  367. /* default values */
  368. device->up = 1;
  369. device->rate = 0;
  370. device->pci.domain = -1; /* per CCI spec */
  371. device->pci.bus = -1; /* per CCI spec */
  372. device->pci.dev = -1; /* per CCI spec */
  373. device->pci.func = -1; /* per CCI spec */
  374. /* try to get the actual values */
  375. cci__get_dev_ifaddrs_info(dev, addr);
  376. mtu = device->max_send_size;
  377. if (mtu == (uint32_t) -1) {
  378. /* if no mtu, use default */
  379. device->max_send_size = SOCK_DEFAULT_MSS;
  380. } else {
  381. /* compute mss from mtu */
  382. if (mtu > SOCK_UDP_MAX)
  383. mtu = SOCK_UDP_MAX;
  384. mtu -= SOCK_MAX_HDR_SIZE;
  385. assert(mtu >= SOCK_MIN_MSS); /* FIXME rather ignore the device? */
  386. device->max_send_size = mtu;
  387. }
  388. cci__add_dev(dev);
  389. devices[sglobals->count] = device;
  390. sglobals->count++;
  391. threads_running = 1;
  392. }
  393. }
  394. #endif
  395. } else
  396. /* find devices that we own */
  397. TAILQ_FOREACH_SAFE(dev, &globals->configfile_devs, entry, ndev) {
  398. if (0 == strcmp("sock", dev->device.transport)) {
  399. const char * const *arg;
  400. const char *interface = NULL;
  401. struct cci_device *device;
  402. sock_dev_t *sdev;
  403. uint32_t mtu = (uint32_t) -1;
  404. dev->plugin = plugin;
  405. if (dev->priority == -1)
  406. dev->priority = plugin->base.priority;
  407. device = &dev->device;
  408. /* TODO determine link rate
  409. *
  410. * linux->driver->get ethtool settings->speed
  411. * bsd/darwin->ioctl(SIOCGIFMEDIA)->ifm_active
  412. * windows ?
  413. */
  414. dev->priv = calloc(1, sizeof(*sdev));
  415. if (!dev->priv) {
  416. ret = CCI_ENOMEM;
  417. goto out;
  418. }
  419. sdev = dev->priv;
  420. sdev->port = 0;
  421. sdev->bufsize = 0;
  422. /* default values */
  423. device->up = 1;
  424. device->rate = 0;
  425. device->pci.domain = -1; /* per CCI spec */
  426. device->pci.bus = -1; /* per CCI spec */
  427. device->pci.dev = -1; /* per CCI spec */
  428. device->pci.func = -1; /* per CCI spec */
  429. /* parse conf_argv */
  430. for (arg = device->conf_argv; *arg != NULL; arg++) {
  431. if (0 == strncmp("ip=", *arg, 3)) {
  432. const char *ip = *arg + 3;
  433. /* network order */
  434. sdev->ip = inet_addr(ip);
  435. } else if (0 == strncmp("mtu=", *arg, 4)) {
  436. const char *mtu_str = *arg + 4;
  437. mtu = strtol(mtu_str, NULL, 0);
  438. } else if (0 == strncmp("port=", *arg, 5)) {
  439. const char *s_port = *arg + 5;
  440. uint16_t port;
  441. port = atoi (s_port);
  442. sdev->port = htons(port);
  443. } else if (0 == strncmp("bufsize=", *arg, 8)) {
  444. const char *size_str = *arg + 8;
  445. sdev->bufsize = strtol(size_str,
  446. NULL, 0);
  447. } else if (0 == strncmp("interface=",
  448. *arg, 10))
  449. {
  450. interface = *arg + 10;
  451. }
  452. }
  453. if (sdev->ip != 0 || interface) {
  454. /* try to get the actual values now */
  455. #ifdef HAVE_GETIFADDRS
  456. if (addrs) {
  457. for (addr = addrs;
  458. addr != NULL;
  459. addr = addr->ifa_next)
  460. {
  461. struct sockaddr_in *sai;
  462. if (!addr->ifa_addr)
  463. continue;
  464. if (addr->ifa_addr->sa_family != AF_INET)
  465. continue;
  466. sai = (struct sockaddr_in *) addr->ifa_addr;
  467. if (!memcmp(&sdev->ip, &sai->sin_addr, sizeof(sdev->ip)))
  468. break;
  469. if (interface &&
  470. !strcmp(interface, addr->ifa_name)) {
  471. memcpy(&sdev->ip, &sai->sin_addr, sizeof(sdev->ip));
  472. break;
  473. }
  474. }
  475. if (!addr)
  476. /* no such device, don't initialize it */
  477. continue;
  478. cci__get_dev_ifaddrs_info(dev, addr);
  479. }
  480. #endif
  481. if (mtu == (uint32_t) -1) {
  482. /* if mtu not specified, use the ifaddr one */
  483. mtu = device->max_send_size;
  484. }
  485. if (mtu == (uint32_t) -1) {
  486. /* if still no mtu, use default */
  487. device->max_send_size = SOCK_DEFAULT_MSS;
  488. } else {
  489. /* compute mss from mtu */
  490. if (mtu > SOCK_UDP_MAX)
  491. mtu = SOCK_UDP_MAX;
  492. mtu -= SOCK_MAX_HDR_SIZE;
  493. assert(mtu >= SOCK_MIN_MSS); /* FIXME rather ignore the device? */
  494. device->max_send_size = mtu;
  495. }
  496. /* queue to the main device list now */
  497. TAILQ_REMOVE(&globals->configfile_devs, dev, entry);
  498. cci__add_dev(dev);
  499. devices[sglobals->count] = device;
  500. sglobals->count++;
  501. threads_running = 1;
  502. }
  503. }
  504. }
  505. devices =
  506. realloc(devices, (sglobals->count + 1) * sizeof(cci_device_t *));
  507. devices[sglobals->count] = NULL;
  508. *((cci_device_t ***) & sglobals->devices) = devices;
  509. #ifdef HAVE_GETIFADDRS
  510. freeifaddrs(addrs);
  511. #endif
  512. CCI_EXIT;
  513. return CCI_SUCCESS;
  514. out:
  515. if (devices) {
  516. int i = 0;
  517. cci_device_t *device;
  518. cci__dev_t *my_dev;
  519. while (devices[i] != NULL) {
  520. device = devices[i];
  521. my_dev = container_of(device, cci__dev_t, device);
  522. if (my_dev->priv)
  523. free(my_dev->priv);
  524. }
  525. free(devices);
  526. }
  527. if (sglobals) {
  528. free((void *)sglobals);
  529. sglobals = NULL;
  530. }
  531. #ifdef HAVE_GETIFADDRS
  532. if (addrs) {
  533. freeifaddrs(addrs);
  534. }
  535. #endif
  536. CCI_EXIT;
  537. return ret;
  538. }
  539. /* TODO */
  540. static const char *ctp_sock_strerror(cci_endpoint_t * endpoint,
  541. enum cci_status status)
  542. {
  543. CCI_ENTER;
  544. UNUSED_PARAM (endpoint);
  545. UNUSED_PARAM (status);
  546. CCI_EXIT;
  547. return NULL;
  548. }
  549. /* NOTE the CCI layer has already unbound all devices
  550. * and destroyed all endpoints.
  551. * All we need to do if free dev->priv
  552. */
  553. static int ctp_sock_finalize(cci_plugin_ctp_t * plugin)
  554. {
  555. cci__dev_t *dev = NULL;
  556. CCI_ENTER;
  557. UNUSED_PARAM (plugin);
  558. if (!sglobals) {
  559. CCI_EXIT;
  560. return CCI_ENODEV;
  561. }
  562. TAILQ_FOREACH(dev, &globals->devs, entry)
  563. if (!strcmp(dev->device.transport, "sock"))
  564. free(dev->priv);
  565. free(sglobals->devices);
  566. free((void *)sglobals);
  567. sglobals = NULL;
  568. CCI_EXIT;
  569. return CCI_SUCCESS;
  570. }
  571. static inline int
  572. sock_set_nonblocking(cci_os_handle_t sock, sock_fd_type_t type, void *p)
  573. {
  574. int ret, flags;
  575. UNUSED_PARAM (type);
  576. UNUSED_PARAM (p);
  577. flags = fcntl(sock, F_GETFL, 0);
  578. if (-1 == flags)
  579. flags = 0;
  580. ret = fcntl(sock, F_SETFL, flags | O_NONBLOCK);
  581. if (-1 == ret)
  582. return errno;
  583. return 0;
  584. }
  585. static inline void sock_close_socket(cci_os_handle_t sock)
  586. {
  587. close(sock);
  588. return;
  589. }
  590. static int ctp_sock_create_endpoint(cci_device_t * device,
  591. int flags,
  592. cci_endpoint_t ** endpointp,
  593. cci_os_handle_t * fd)
  594. {
  595. int ret;
  596. uint32_t i;
  597. sock_dev_t *sdev;
  598. struct sockaddr_in sin;
  599. socklen_t slen;
  600. char name[40];
  601. unsigned int sndbuf_size = SOCK_SNDBUF_SIZE;
  602. unsigned int rcvbuf_size = SOCK_RCVBUF_SIZE;
  603. cci__dev_t *dev = NULL;
  604. cci__ep_t *ep = NULL;
  605. sock_ep_t *sep = NULL;
  606. struct cci_endpoint *endpoint = (struct cci_endpoint *) *endpointp;
  607. CCI_ENTER;
  608. UNUSED_PARAM (flags);
  609. if (!sglobals) {
  610. CCI_EXIT;
  611. return CCI_ENODEV;
  612. }
  613. dev = container_of(device, cci__dev_t, device);
  614. if (0 != strcmp("sock", device->transport)) {
  615. ret = CCI_EINVAL;
  616. goto out;
  617. }
  618. ep = container_of(endpoint, cci__ep_t, endpoint);
  619. ep->priv = calloc(1, sizeof(*sep));
  620. if (!ep->priv) {
  621. ret = CCI_ENOMEM;
  622. goto out;
  623. }
  624. ep->rx_buf_cnt = SOCK_EP_RX_CNT;
  625. ep->tx_buf_cnt = SOCK_EP_TX_CNT;
  626. ep->buffer_len = dev->device.max_send_size + SOCK_MAX_HDRS;
  627. ep->tx_timeout = SOCK_EP_TX_TIMEOUT_SEC * 1000000;
  628. sep = ep->priv;
  629. sep->ids = calloc(SOCK_NUM_BLOCKS, sizeof(*sep->ids));
  630. if (!sep->ids) {
  631. ret = CCI_ENOMEM;
  632. goto out;
  633. }
  634. sep->closing = 0;
  635. pthread_mutex_init (&sep->progress_mutex, NULL);
  636. pthread_cond_init (&sep->wait_condition, NULL);
  637. sep->sock = socket(PF_INET, SOCK_DGRAM, 0);
  638. if (sep->sock == -1) {
  639. ret = errno;
  640. goto out;
  641. }
  642. sdev = dev->priv;
  643. if (sndbuf_size < sdev->bufsize)
  644. sndbuf_size = sdev->bufsize;
  645. if (rcvbuf_size < sdev->bufsize)
  646. rcvbuf_size = sdev->bufsize;
  647. if (sndbuf_size > 0) {
  648. ret = setsockopt (sep->sock, SOL_SOCKET, SO_SNDBUF,
  649. &sndbuf_size, sizeof (sndbuf_size));
  650. if (ret == -1)
  651. debug (CCI_DB_WARN,
  652. "%s: Cannot set send buffer size", __func__);
  653. }
  654. if (rcvbuf_size > 0) {
  655. ret = setsockopt (sep->sock, SOL_SOCKET, SO_RCVBUF,
  656. &rcvbuf_size, sizeof (rcvbuf_size));
  657. if (ret == -1)
  658. debug (CCI_DB_WARN, "%s: Cannot set recv buffer size",
  659. __func__);
  660. }
  661. #if CCI_DEBUG
  662. {
  663. socklen_t optlen;
  664. optlen = sizeof (sndbuf_size);
  665. ret = getsockopt (sep->sock, SOL_SOCKET, SO_SNDBUF,
  666. &sndbuf_size, &optlen);
  667. if (ret == -1)
  668. debug (CCI_DB_WARN, "%s: Cannot get send buffer size",
  669. __func__);
  670. debug (CCI_DB_CTP, "Send buffer size: %d bytes (you may also "
  671. "want to check the value of net.core.wmem_max using "
  672. "sysctl)", sndbuf_size);
  673. optlen = sizeof (rcvbuf_size);
  674. ret = getsockopt (sep->sock, SOL_SOCKET, SO_RCVBUF,
  675. &rcvbuf_size, &optlen);
  676. if (ret == -1)
  677. debug (CCI_DB_WARN, "%s: Cannot get recv buffer size",
  678. __func__);
  679. debug (CCI_DB_CTP, "Receive buffer size: %d bytes (you may also "
  680. "want to check the value of net.core.rmem_max using "
  681. "sysctl)", rcvbuf_size);
  682. }
  683. #endif
  684. /* bind socket to device */
  685. memset(&sin, 0, sizeof(sin));
  686. sin.sin_family = AF_INET;
  687. sin.sin_addr.s_addr = sdev->ip;
  688. if (sdev->port != 0)
  689. sin.sin_port = sdev->port;
  690. ret = bind(sep->sock, (const struct sockaddr *)&sin, sizeof(sin));
  691. if (ret) {
  692. ret = errno;
  693. goto out;
  694. }
  695. slen = sizeof(sep->sin);
  696. ret = getsockname(sep->sock, (struct sockaddr *)&sep->sin, &slen);
  697. if (ret) {
  698. ret = errno;
  699. goto out;
  700. }
  701. memset(name, 0, sizeof(name));
  702. sprintf(name, "sock://");
  703. sock_sin_to_name(sep->sin, name + (uintptr_t) 7, sizeof(name) - 7);
  704. ep->uri = strdup(name);
  705. for (i = 0; i < SOCK_EP_HASH_SIZE; i++) {
  706. TAILQ_INIT(&sep->conn_hash[i]);
  707. TAILQ_INIT(&sep->active_hash[i]);
  708. }
  709. TAILQ_INIT(&sep->idle_txs);
  710. TAILQ_INIT(&sep->idle_rxs);
  711. TAILQ_INIT(&sep->handles);
  712. TAILQ_INIT(&sep->rma_ops);
  713. TAILQ_INIT(&sep->queued);
  714. TAILQ_INIT(&sep->pending);
  715. sep->tx_buf = calloc (1, ep->tx_buf_cnt * ep->buffer_len);
  716. if (!sep->tx_buf) {
  717. ret = CCI_ENOMEM;
  718. goto out;
  719. }
  720. sep->txs = calloc (1, ep->tx_buf_cnt * sizeof (sock_tx_t));
  721. if (!sep->txs) {
  722. ret = CCI_ENOMEM;
  723. goto out;
  724. }
  725. /* alloc txs */
  726. for (i = 0; i < ep->tx_buf_cnt; i++) {
  727. sock_tx_t *tx = &sep->txs[i];
  728. tx->ctx = SOCK_CTX_TX;
  729. tx->evt.event.type = CCI_EVENT_SEND;
  730. tx->evt.ep = ep;
  731. tx->buffer = (void*)((uintptr_t)sep->tx_buf
  732. + (i * ep->buffer_len));
  733. tx->len = 0;
  734. TAILQ_INSERT_TAIL(&sep->idle_txs, tx, dentry);
  735. }
  736. sep->rx_buf = calloc (1, ep->rx_buf_cnt * ep->buffer_len);
  737. if (!sep->rx_buf) {
  738. ret = CCI_ENOMEM;
  739. goto out;
  740. }
  741. sep->rxs = calloc (1, ep->rx_buf_cnt * sizeof (sock_rx_t));
  742. if (!sep->rx_buf) {
  743. ret = CCI_ENOMEM;
  744. goto out;
  745. }
  746. /* alloc rxs */
  747. for (i = 0; i < ep->rx_buf_cnt; i++) {
  748. sock_rx_t *rx = &sep->rxs[i];
  749. rx->ctx = SOCK_CTX_RX;
  750. rx->evt.event.type = CCI_EVENT_RECV;
  751. rx->evt.ep = ep;
  752. rx->buffer = (void*)((uintptr_t)sep->rx_buf
  753. + (i * ep->buffer_len));
  754. rx->len = 0;
  755. TAILQ_INSERT_TAIL(&sep->idle_rxs, rx, entry);
  756. }
  757. ret = sock_set_nonblocking(sep->sock, SOCK_FD_EP, ep);
  758. if (ret)
  759. goto out;
  760. sep->event_fd = 0;
  761. #ifdef HAVE_SYS_EPOLL_H
  762. if (fd) {
  763. int fflags = 0;
  764. int rc;
  765. struct epoll_event ev;
  766. ret = epoll_create (2);
  767. if (ret == -1) {
  768. ret = errno;
  769. goto out;
  770. }
  771. sep->event_fd = ret;
  772. fflags = fcntl(sep->event_fd, F_GETFL, 0);
  773. if (fflags == -1) {
  774. ret = errno;
  775. goto out;
  776. }
  777. ret = fcntl(sep->event_fd, F_SETFL, fflags | O_NONBLOCK);
  778. if (ret == -1) {
  779. ret = errno;
  780. goto out;
  781. }
  782. ev.data.ptr = (void*)(uintptr_t)sock_recvfrom_ep;
  783. ev.events = EPOLLIN;
  784. ret = epoll_ctl (sep->event_fd, EPOLL_CTL_ADD, sep->sock, &ev);
  785. if (ret == -1) {
  786. ret = errno;
  787. goto out;
  788. }
  789. rc = pipe (sep->fd);
  790. if (rc == -1) {
  791. debug (CCI_DB_WARN, "%s: %s", __func__, strerror (errno));
  792. return CCI_ERROR;
  793. }
  794. *fd = sep->fd[0];
  795. }
  796. #else
  797. if (fd) {
  798. /* We will have poll on the receive thread so we just need to create a
  799. pipe so the receive and send thread can wake up the application
  800. thread */
  801. pipe (sep->fd);
  802. *fd = sep->fd[0];
  803. /* We set event_fd to value different than zero to know that we are
  804. in blocking mode at the application level */
  805. sep->event_fd = 1;
  806. }
  807. #endif /* HAVE_SYS_EPOLL_H */
  808. ret = sock_create_threads (ep);
  809. if (ret)
  810. goto out;
  811. CCI_EXIT;
  812. return CCI_SUCCESS;
  813. out:
  814. /* Note that there is no need to remove the ep even in the context of
  815. a failure because the ep is added to the list of active endpoints
  816. by cci_create_endpoint(), AFTER the call to this function. */
  817. if (sep) {
  818. if (sep->txs)
  819. free (sep->txs);
  820. if (sep->tx_buf)
  821. free (sep->tx_buf);
  822. if (sep->rxs)
  823. free (sep->rxs);
  824. if (sep->rx_buf)
  825. free (sep->rx_buf);
  826. if (sep->ids)
  827. free(sep->ids);
  828. if (sep->sock)
  829. sock_close_socket(sep->sock);
  830. free(sep);
  831. ep->priv = NULL;
  832. }
  833. if (ep) {
  834. free (ep->uri);
  835. }
  836. *endpointp = NULL;
  837. CCI_EXIT;
  838. return ret;
  839. }
  840. static int ctp_sock_destroy_endpoint(cci_endpoint_t * endpoint)
  841. {
  842. cci__ep_t *ep = NULL;
  843. cci__dev_t *dev = NULL;
  844. sock_ep_t *sep = NULL;
  845. CCI_ENTER;
  846. if (!sglobals) {
  847. CCI_EXIT;
  848. return CCI_ENODEV;
  849. }
  850. ep = container_of(endpoint, cci__ep_t, endpoint);
  851. dev = ep->dev;
  852. sep = ep->priv;
  853. pthread_mutex_lock(&dev->lock);
  854. pthread_mutex_lock(&ep->lock);
  855. if (sep) {
  856. int i;
  857. cci__conn_t *conn;
  858. sock_conn_t *sconn;
  859. sep->closing = 1;
  860. pthread_mutex_unlock(&dev->lock);
  861. pthread_mutex_unlock(&ep->lock);
  862. sock_terminate_threads (sep);
  863. pthread_mutex_lock(&dev->lock);
  864. pthread_mutex_lock(&ep->lock);
  865. if (sep->fd[0] > 0)
  866. close (sep->fd[0]);
  867. if (sep->fd[1] > 0)
  868. close (sep->fd[1]);
  869. if (sep->sock)
  870. sock_close_socket(sep->sock);
  871. for (i = 0; i < SOCK_EP_HASH_SIZE; i++) {
  872. while (!TAILQ_EMPTY(&sep->conn_hash[i])) {
  873. sconn = TAILQ_FIRST(&sep->conn_hash[i]);
  874. TAILQ_REMOVE(&sep->conn_hash[i], sconn, entry);
  875. conn = sconn->conn;
  876. free(conn);
  877. free(sconn);
  878. }
  879. while (!TAILQ_EMPTY(&sep->active_hash[i])) {
  880. sconn = TAILQ_FIRST(&sep->active_hash[i]);
  881. TAILQ_REMOVE(&sep->active_hash[i], sconn, entry);
  882. conn = sconn->conn;
  883. free(conn);
  884. free(sconn);
  885. }
  886. }
  887. free (sep->txs);
  888. free (sep->tx_buf);
  889. free (sep->rxs);
  890. free (sep->rx_buf);
  891. while (!TAILQ_EMPTY(&sep->rma_ops)) {
  892. sock_rma_op_t *rma_op = TAILQ_FIRST(&sep->rma_ops);
  893. TAILQ_REMOVE(&sep->rma_ops, rma_op, entry);
  894. free(rma_op);
  895. }
  896. while (!TAILQ_EMPTY(&sep->handles)) {
  897. sock_rma_handle_t *handle = TAILQ_FIRST(&sep->handles);
  898. TAILQ_REMOVE(&sep->handles, handle, entry);
  899. free(handle);
  900. }
  901. if (sep->ids)
  902. free(sep->ids);
  903. free(sep);
  904. ep->priv = NULL;
  905. }
  906. ep->priv = NULL;
  907. if (ep->uri)
  908. free((char *)ep->uri);
  909. pthread_mutex_unlock(&ep->lock);
  910. pthread_mutex_unlock(&dev->lock);
  911. CCI_EXIT;
  912. return CCI_SUCCESS;
  913. }
  914. static void sock_get_id(sock_ep_t * ep, uint32_t * id)
  915. {
  916. uint32_t n, block, offset;
  917. uint64_t *b;
  918. while (1) {
  919. n = random() % SOCK_NUM_BLOCKS;
  920. block = n / SOCK_BLOCK_SIZE;
  921. offset = n % SOCK_BLOCK_SIZE;
  922. b = &ep->ids[block];
  923. if ((*b & (1ULL << offset)) == 0) {
  924. *b |= (1ULL << offset);
  925. *id = (block * SOCK_BLOCK_SIZE) + offset;
  926. break;
  927. }
  928. }
  929. return;
  930. }
  931. #if 0
  932. static void sock_put_id(sock_ep_t * ep, uint32_t id)
  933. {
  934. uint32_t block, offset;
  935. uint64_t *b;
  936. block = id / SOCK_BLOCK_SIZE;
  937. offset = id % SOCK_BLOCK_SIZE;
  938. b = &ep->ids[block];
  939. assert((*b & (1 << offset)) == 1);
  940. *b &= ~(1 << offset);
  941. return;
  942. }
  943. #endif
  944. static inline uint32_t sock_get_new_seq(void)
  945. {
  946. return ((uint32_t) random() & SOCK_SEQ_MASK);
  947. }
  948. /* The endpoint maintains 256 lists. Hash the ip and port and return the index
  949. * of the list. We use all six bytes and this is endian agnostic. It evenly
  950. * disperses large blocks of addresses as well as large ranges of ports on the
  951. * same address.
  952. */
  953. static uint8_t sock_ip_hash(in_addr_t ip, uint16_t port)
  954. {
  955. port ^= (ip & 0x0000FFFF);
  956. port ^= (ip & 0xFFFF0000) >> 16;
  957. return (port & 0x00FF) ^ ((port & 0xFF00) >> 8);
  958. }
  959. static int ctp_sock_accept(cci_event_t *event, const void *context)
  960. {
  961. uint8_t a;
  962. uint16_t b;
  963. uint32_t unused;
  964. uint32_t peer_seq;
  965. uint32_t peer_ts;
  966. int i;
  967. cci_endpoint_t *endpoint;
  968. cci__ep_t *ep = NULL;
  969. cci__conn_t *conn = NULL;
  970. cci__evt_t *evt = NULL;
  971. cci__dev_t *dev = NULL;
  972. sock_ep_t *sep = NULL;
  973. sock_conn_t *sconn = NULL;
  974. sock_header_r_t *hdr_r = NULL;
  975. sock_msg_type_t type;
  976. sock_tx_t *tx = NULL;
  977. sock_rx_t *rx = NULL;
  978. sock_handshake_t *hs = NULL;
  979. uint32_t id, ack, max_recv_buffer_count, mss = 0, ka;
  980. CCI_ENTER;
  981. if (!sglobals) {
  982. CCI_EXIT;
  983. return CCI_ENODEV;
  984. }
  985. evt = container_of(event, cci__evt_t, event);
  986. rx = container_of(evt, sock_rx_t, evt);
  987. ep = evt->ep;
  988. endpoint = &ep->endpoint;
  989. sep = ep->priv;
  990. dev = ep->dev;
  991. conn = calloc(1, sizeof(*conn));
  992. if (!conn) {
  993. CCI_EXIT;
  994. return CCI_ENOMEM;
  995. }
  996. conn->plugin = ep->plugin;
  997. conn->tx_timeout = ep->tx_timeout;
  998. conn->priv = calloc(1, sizeof(*sconn));
  999. if (!conn->priv) {
  1000. free(conn);
  1001. CCI_EXIT;
  1002. return CCI_ENOMEM;
  1003. }
  1004. /* get a tx */
  1005. tx = sock_get_tx (ep);
  1006. if (!tx) {
  1007. free(conn->priv);
  1008. free(conn);
  1009. CCI_EXIT;
  1010. return CCI_ENOBUFS;
  1011. }
  1012. tx->rma_ptr = NULL;
  1013. tx->rma_len = 0;
  1014. hdr_r = rx->buffer;
  1015. sock_parse_header(&hdr_r->header, &type, &a, &b, &unused);
  1016. sock_parse_seq_ts(&hdr_r->seq_ts, &peer_seq, &peer_ts);
  1017. conn->connection.attribute = (enum cci_conn_attribute)a;
  1018. conn->connection.endpoint = endpoint;
  1019. conn->connection.context = (void *)context;
  1020. conn->connection.max_send_size = dev->device.max_send_size;
  1021. hs = (sock_handshake_t *)((uintptr_t)rx->buffer +
  1022. (uintptr_t) sizeof(sock_header_r_t));
  1023. sock_parse_handshake(hs, &id, &ack, &max_recv_buffer_count, &mss, &ka);
  1024. if (ka != 0UL) {
  1025. debug(CCI_DB_CONN, "%s: keepalive timeout: %d", __func__, ka);
  1026. conn->keepalive_timeout = ka;
  1027. }
  1028. if (mss < SOCK_MIN_MSS) {
  1029. /* FIXME do what? */
  1030. }
  1031. if (mss < conn->connection.max_send_size)
  1032. conn->connection.max_send_size = mss;
  1033. sconn = conn->priv;
  1034. TAILQ_INIT(&sconn->tx_seqs);
  1035. TAILQ_INIT(&sconn->acks);
  1036. TAILQ_INIT(&sconn->rmas);
  1037. sconn->conn = conn;
  1038. sconn->cwnd = SOCK_INITIAL_CWND;
  1039. sconn->status = SOCK_CONN_READY; /* set ready since the app thinks it is */
  1040. sconn->last_recvd_seq = 0;
  1041. *((struct sockaddr_in *)&sconn->sin) = rx->sin;
  1042. sconn->peer_id = id;
  1043. sock_get_id(sep, &sconn->id);
  1044. sconn->seq = sock_get_new_seq(); /* even for UU since this reply is reliable */
  1045. sconn->seq_pending = sconn->seq - 1;
  1046. if (cci_conn_is_reliable(conn)) {
  1047. sconn->max_tx_cnt = max_recv_buffer_count < ep->tx_buf_cnt ?
  1048. max_recv_buffer_count : ep->tx_buf_cnt;
  1049. sconn->last_ack_seq = sconn->seq;
  1050. sconn->last_ack_ts = sock_get_usecs();
  1051. sconn->ssthresh = sconn->max_tx_cnt;
  1052. sconn->seq_pending = sconn->seq;
  1053. }
  1054. /* insert in sock ep's list of conns */
  1055. i = sock_ip_hash(sconn->sin.sin_addr.s_addr, sconn->sin.sin_port);
  1056. pthread_mutex_lock(&ep->lock);
  1057. TAILQ_INSERT_TAIL(&sep->conn_hash[i], sconn, entry);
  1058. pthread_mutex_unlock(&ep->lock);
  1059. debug_ep(ep, CCI_DB_CONN, "%s: accepting conn with hash %d",
  1060. __func__, i);
  1061. /* prepare conn_reply */
  1062. tx->msg_type = SOCK_MSG_CONN_REPLY;
  1063. tx->last_attempt_us = 0ULL;
  1064. tx->timeout_us = 0ULL;
  1065. tx->rma_op = NULL;
  1066. evt = &tx->evt;
  1067. evt->ep = ep;
  1068. evt->conn = conn;
  1069. evt->event.type = CCI_EVENT_ACCEPT;
  1070. evt->event.accept.status = CCI_SUCCESS; /* for now */
  1071. evt->event.accept.context = (void *)context;
  1072. evt->event.accept.connection = &conn->connection;
  1073. /* pack the msg */
  1074. hdr_r = (sock_header_r_t *) tx->buffer;
  1075. sock_pack_conn_reply(&hdr_r->header, CCI_SUCCESS /* FIXME */ ,
  1076. sconn->peer_id);
  1077. sock_pack_seq_ts(&hdr_r->seq_ts, sconn->seq,
  1078. (uint32_t) sconn->last_ack_ts);
  1079. hs = (sock_handshake_t *) ((uintptr_t)tx->buffer + sizeof(*hdr_r));
  1080. sock_pack_handshake(hs, sconn->id, peer_seq,
  1081. ep->rx_buf_cnt,
  1082. conn->connection.max_send_size, 0);
  1083. tx->len = sizeof(*hdr_r) + sizeof(*hs);
  1084. tx->seq = sconn->seq;
  1085. debug_ep(ep, CCI_DB_CONN, "%s: queuing conn_reply with seq %u ts %x",
  1086. __func__, sconn->seq, sconn->ts);
  1087. /* insert at tail of device's queued list */
  1088. tx->state = SOCK_TX_QUEUED;
  1089. pthread_mutex_lock(&ep->lock);
  1090. TAILQ_INSERT_TAIL(&sep->queued, &tx->evt, entry);
  1091. pthread_mutex_unlock(&ep->lock);
  1092. /* try to progress txs */
  1093. pthread_mutex_lock(&sep->progress_mutex);
  1094. pthread_cond_signal(&sep->wait_condition);
  1095. pthread_mutex_unlock(&sep->progress_mutex);
  1096. CCI_EXIT;
  1097. return CCI_SUCCESS;
  1098. }
  1099. /* Send reject reply to client.
  1100. *
  1101. * We cannot use the event's buffer since the app will most likely return the
  1102. * event before we get an ack from the client. We will get a tx for the reply.
  1103. */
  1104. static int ctp_sock_reject(cci_event_t *event)
  1105. {
  1106. int ret = CCI_SUCCESS;
  1107. uint8_t a;
  1108. uint16_t b;
  1109. uint32_t peer_id;
  1110. uint32_t peer_seq;
  1111. uint32_t peer_ts;
  1112. cci__evt_t *evt = NULL;
  1113. cci__ep_t *ep = NULL;
  1114. sock_ep_t *sep = NULL;
  1115. sock_header_r_t *hdr_r = NULL;
  1116. sock_msg_type_t type;
  1117. sock_rx_t *rx = NULL;
  1118. sock_tx_t *tx = NULL;
  1119. CCI_ENTER;
  1120. if (!sglobals) {
  1121. CCI_EXIT;
  1122. return CCI_ENODEV;
  1123. }
  1124. evt = container_of(event, cci__evt_t, event);
  1125. ep = evt->ep;
  1126. sep = ep->priv;
  1127. rx = container_of(evt, sock_rx_t, evt);
  1128. hdr_r = rx->buffer;
  1129. sock_parse_header(&hdr_r->header, &type, &a, &b, &peer_id);
  1130. sock_parse_seq_ts(&hdr_r->seq_ts, &peer_seq, &peer_ts);
  1131. /* get a tx */
  1132. tx = sock_get_tx (ep);
  1133. if (!tx) {
  1134. ret = CCI_ENOBUFS;
  1135. goto out;
  1136. }
  1137. tx->rma_ptr = NULL;
  1138. tx->rma_len = 0;
  1139. /* prep the tx */
  1140. tx->msg_type = SOCK_MSG_CONN_REPLY;
  1141. tx->evt.ep = ep;
  1142. tx->evt.conn = NULL;
  1143. tx->evt.event.type = CCI_EVENT_CONNECT;
  1144. tx->evt.event.connect.status = CCI_ECONNREFUSED;
  1145. tx->evt.event.connect.connection = NULL;
  1146. tx->last_attempt_us = 0ULL;
  1147. tx->timeout_us = 0ULL;
  1148. tx->rma_op = NULL;
  1149. tx->sin = rx->sin;
  1150. /* prepare conn_reply */
  1151. hdr_r = (sock_header_r_t *) tx->buffer;
  1152. sock_pack_conn_reply(&hdr_r->header, CCI_ECONNREFUSED, peer_id);
  1153. sock_pack_seq_ts(&hdr_r->seq_ts, peer_seq, 0);
  1154. tx->len = sizeof(*hdr_r);
  1155. tx->state = SOCK_TX_QUEUED;
  1156. /* We have no connection and the request is rejected so we generate
  1157. a new seq since the client may or not ack the conn_reply. In the
  1158. worst case, the conn_reply associated to the reject is thrown away
  1159. when it times out */
  1160. tx->seq = sock_get_new_seq ();
  1161. /* insert at tail of endpoint's queued list */
  1162. pthread_mutex_lock(&ep->lock);
  1163. TAILQ_INSERT_TAIL(&sep->queued, &tx->evt, entry);
  1164. pthread_mutex_unlock(&ep->lock);
  1165. /* try to progress txs */
  1166. pthread_mutex_lock(&sep->progress_mutex);
  1167. pthread_cond_signal(&sep->wait_condition);
  1168. pthread_mutex_unlock(&sep->progress_mutex);
  1169. #if CCI_DEBUG
  1170. {
  1171. char name[32];
  1172. memset(name, 0, sizeof(name));
  1173. sock_sin_to_name(rx->sin, name, sizeof(name));
  1174. debug_ep(ep, (CCI_DB_MSG | CCI_DB_CONN),
  1175. "%s: queued conn_reply (reject) to %s (seq %u)",
  1176. __func__, name, tx->seq);
  1177. }
  1178. #endif
  1179. out:
  1180. CCI_EXIT;
  1181. return ret;
  1182. }
  1183. static int sock_getaddrinfo(const char *uri, in_addr_t * in, uint16_t * port)
  1184. {
  1185. int ret;
  1186. char *hostname, *svc, *colon;
  1187. struct addrinfo *ai = NULL, hints;
  1188. if (0 == strncmp("sock://", uri, 7))
  1189. hostname = strdup(&uri[7]);
  1190. else {
  1191. CCI_EXIT;
  1192. return CCI_EINVAL;
  1193. }
  1194. colon = strchr(hostname, ':');
  1195. if (colon) {
  1196. *colon = '\0';
  1197. } else {
  1198. free(hostname);
  1199. CCI_EXIT;
  1200. return CCI_EINVAL;
  1201. }
  1202. colon++;
  1203. svc = colon;
  1204. memset(&hints, 0, sizeof(hints));
  1205. hints.ai_family = AF_INET;
  1206. hints.ai_socktype = SOCK_DGRAM;
  1207. hints.ai_protocol = IPPROTO_UDP;
  1208. ret = getaddrinfo(hostname, svc, &hints, &ai);
  1209. free(hostname);
  1210. if (ret) {
  1211. if (ai)
  1212. freeaddrinfo(ai);
  1213. CCI_EXIT;
  1214. return ret;
  1215. }
  1216. *in = ((struct sockaddr_in *)ai->ai_addr)->sin_addr.s_addr;
  1217. *port = ((struct sockaddr_in *)ai->ai_addr)->sin_port;
  1218. freeaddrinfo(ai);
  1219. CCI_EXIT;
  1220. return CCI_SUCCESS;
  1221. }
  1222. static sock_conn_t *sock_find_open_conn(sock_ep_t * sep, in_addr_t ip,
  1223. uint16_t port, uint32_t id)
  1224. {
  1225. uint8_t i;
  1226. struct s_conns *conn_list;
  1227. sock_conn_t *sconn = NULL, *sc;
  1228. CCI_ENTER;
  1229. i = sock_ip_hash(ip, port);
  1230. conn_list = &sep->conn_hash[i];
  1231. TAILQ_FOREACH(sc, conn_list, entry) {
  1232. if (sc->sin.sin_addr.s_addr == ip &&
  1233. sc->sin.sin_port == port && sc->id == id) {
  1234. sconn = sc;
  1235. break;
  1236. }
  1237. }
  1238. CCI_EXIT;
  1239. return sconn;
  1240. }
  1241. static sock_conn_t *sock_find_active_conn(sock_ep_t * sep, in_addr_t ip,
  1242. uint32_t id)
  1243. {
  1244. uint8_t i;
  1245. struct s_active *active_list;
  1246. sock_conn_t *sconn = NULL, *sc;
  1247. CCI_ENTER;
  1248. i = sock_ip_hash(ip, 0);
  1249. active_list = &sep->active_hash[i];
  1250. TAILQ_FOREACH(sc, active_list, entry) {
  1251. if (sc->sin.sin_addr.s_addr == ip && sc->id == id) {
  1252. sconn = sc;
  1253. break;
  1254. }
  1255. }
  1256. CCI_EXIT;
  1257. return sconn;
  1258. }
  1259. static sock_conn_t *sock_find_conn(sock_ep_t * sep, in_addr_t ip, uint16_t port,
  1260. uint32_t id, sock_msg_type_t type)
  1261. {
  1262. switch (type) {
  1263. case SOCK_MSG_CONN_REPLY:
  1264. return sock_find_active_conn(sep, ip, id);
  1265. default:
  1266. return sock_find_open_conn(sep, ip, port, id);
  1267. }
  1268. }
  1269. static int ctp_sock_connect(cci_endpoint_t * endpoint,
  1270. const char *server_uri,
  1271. const void *data_ptr,
  1272. uint32_t data_len,
  1273. cci_conn_attribute_t attribute,
  1274. const void *context,
  1275. int flags,
  1276. const struct timeval *timeout)
  1277. {
  1278. int ret;
  1279. int i;
  1280. cci__ep_t *ep = NULL;
  1281. cci__dev_t *dev = NULL;
  1282. cci__conn_t *conn = NULL;
  1283. sock_ep_t *sep = NULL;
  1284. sock_conn_t *sconn = NULL;
  1285. sock_tx_t *tx = NULL;
  1286. sock_header_r_t *hdr_r = NULL;
  1287. cci__evt_t *evt = NULL;
  1288. struct cci_connection *connection = NULL;
  1289. struct sockaddr_in *sin = NULL;
  1290. void *ptr = NULL;
  1291. in_addr_t ip;
  1292. uint32_t ts = 0;
  1293. struct s_active *active_list;
  1294. sock_handshake_t *hs = NULL;
  1295. uint16_t port;
  1296. uint32_t keepalive = 0ULL;
  1297. CCI_ENTER;
  1298. UNUSED_PARAM (flags);
  1299. UNUSED_PARAM (timeout);
  1300. if (!sglobals) {
  1301. CCI_EXIT;
  1302. return CCI_ENODEV;
  1303. }
  1304. /* allocate a new connection */
  1305. conn = calloc(1, sizeof(*conn));
  1306. if (!conn) {
  1307. CCI_EXIT;
  1308. return CCI_ENOMEM;
  1309. }
  1310. conn->priv = calloc(1, sizeof(*sconn));
  1311. if (!conn->priv) {
  1312. ret = CCI_ENOMEM;
  1313. goto out;
  1314. }
  1315. sconn = conn->priv;
  1316. sconn->conn = conn;
  1317. TAILQ_INIT(&sconn->tx_seqs);
  1318. TAILQ_INIT(&sconn->acks);
  1319. TAILQ_INIT(&sconn->rmas);
  1320. /* conn->tx_timeout = 0 by default */
  1321. connection = &conn->connection;
  1322. connection->attribute = attribute;
  1323. connection->endpoint = endpoint;
  1324. connection->context = (void *)context;
  1325. /* set up sock specific info */
  1326. sconn->status = SOCK_CONN_ACTIVE;
  1327. sconn->cwnd = SOCK_INITIAL_CWND;
  1328. sconn->last_recvd_seq = 0;
  1329. sin = (struct sockaddr_in *)&sconn->sin;
  1330. memset(sin, 0, sizeof(*sin));
  1331. sin->sin_family = AF_INET;
  1332. ret = sock_getaddrinfo(server_uri, &ip, &port);
  1333. if (ret)
  1334. goto out;
  1335. sin->sin_addr.s_addr = ip; /* already in network order */
  1336. sin->sin_port = port; /* already in network order */
  1337. /* peer will assign id */
  1338. /* get our endpoint and device */
  1339. ep = container_of(endpoint, cci__ep_t, endpoint);
  1340. sep = ep->priv;
  1341. dev = ep->dev;
  1342. connection->max_send_size = dev->device.max_send_size;
  1343. conn->plugin = ep->plugin;
  1344. /* Dealing with keepalive, if set, include the keepalive timeout value into
  1345. the connection request */
  1346. if ((((attribute & CCI_CONN_ATTR_RO) == CCI_CONN_ATTR_RO)
  1347. || ((attribute & CCI_CONN_ATTR_RU) == CCI_CONN_ATTR_RU))
  1348. && ep->keepalive_timeout != 0UL) {
  1349. keepalive = ep->keepalive_timeout;
  1350. }
  1351. i = sock_ip_hash(ip, 0);
  1352. active_list = &sep->active_hash[i];
  1353. pthread_mutex_lock(&ep->lock);
  1354. TAILQ_INSERT_TAIL(active_list, sconn, entry);
  1355. pthread_mutex_unlock(&ep->lock);
  1356. /* get a tx */
  1357. tx = sock_get_tx (ep);
  1358. if (!tx) {
  1359. /* FIXME leak */
  1360. CCI_EXIT;
  1361. return CCI_ENOBUFS;
  1362. }
  1363. tx->rma_ptr = NULL;
  1364. tx->rma_len = 0;
  1365. /* prep the tx */
  1366. tx->msg_type = SOCK_MSG_CONN_REQUEST;
  1367. evt = &tx->evt;
  1368. evt->ep = ep;
  1369. evt->conn = conn;
  1370. evt->event.type = CCI_EVENT_CONNECT; /* for now */
  1371. evt->event.connect.status = CCI_SUCCESS;
  1372. evt->event.connect.context = (void *)context;
  1373. evt->event.connect.connection = connection;
  1374. /* pack the msg */
  1375. hdr_r = (sock_header_r_t *) tx->buffer;
  1376. sock_get_id(sep, &sconn->id);
  1377. sock_pack_conn_request(&hdr_r->header, attribute,
  1378. (uint16_t) data_len, sconn->id);
  1379. tx->len = sizeof(*hdr_r);
  1380. /* add seq and ack */
  1381. sconn->seq = sock_get_new_seq();
  1382. sconn->seq_pending = sconn->seq - 1;
  1383. sconn->last_ack_seq = sconn->seq;
  1384. tx->seq = sconn->seq;
  1385. sock_pack_seq_ts(&hdr_r->seq_ts, tx->seq, ts);
  1386. /* add handshake */
  1387. hs = (sock_handshake_t *) & hdr_r->data;
  1388. if (keepalive != 0UL)
  1389. conn->keepalive_timeout = keepalive;
  1390. sock_pack_handshake(hs, sconn->id, 0,
  1391. ep->rx_buf_cnt,
  1392. connection->max_send_size, keepalive);
  1393. tx->len += sizeof(*hs);
  1394. ptr = (void*)((uintptr_t)tx->buffer + tx->len);
  1395. debug_ep(ep,CCI_DB_CONN, "%s: queuing conn_request with seq %u ts %x",
  1396. __func__, tx->seq, ts);
  1397. /* zero even if unreliable */
  1398. tx->last_attempt_us = 0ULL;
  1399. tx->timeout_us = 0ULL;
  1400. tx->rma_op = NULL;
  1401. if (data_len)
  1402. memcpy(ptr, data_ptr, data_len);
  1403. tx->len += data_len;
  1404. assert(tx->len <= ep->buffer_len);
  1405. /* insert at tail of device's queued list */
  1406. tx->state = SOCK_TX_QUEUED;
  1407. pthread_mutex_lock(&ep->lock);
  1408. TAILQ_INSERT_TAIL(&sep->queued, &tx->evt, entry);
  1409. pthread_mutex_unlock(&ep->lock);
  1410. /* try to progress txs */
  1411. pthread_mutex_lock(&sep->progress_mutex);
  1412. pthread_cond_signal(&sep->wait_condition);
  1413. pthread_mutex_unlock(&sep->progress_mutex);
  1414. CCI_EXIT;
  1415. return CCI_SUCCESS;
  1416. out:
  1417. if (conn) {
  1418. if (conn->uri)
  1419. free((char *)conn->uri);
  1420. if (conn->priv)
  1421. free(conn->priv);
  1422. free(conn);
  1423. }
  1424. CCI_EXIT;
  1425. return ret;
  1426. }
  1427. static int ctp_sock_disconnect(cci_connection_t * connection)
  1428. {
  1429. int i = 0;
  1430. cci__conn_t *conn = NULL;
  1431. cci__ep_t *ep = NULL;
  1432. sock_conn_t *sconn = NULL;
  1433. sock_ep_t *sep = NULL;
  1434. CCI_ENTER;
  1435. if (!sglobals) {
  1436. CCI_EXIT;
  1437. return CCI_ENODEV;
  1438. }
  1439. /* need to clean up */
  1440. /* remove conn from ep->conn_hash[i] */
  1441. /* if sock conn uri, free it
  1442. * free sock conn
  1443. * free conn
  1444. */
  1445. conn = container_of(connection, cci__conn_t, connection);
  1446. sconn = conn->priv;
  1447. ep = container_of(connection->endpoint, cci__ep_t, endpoint);
  1448. sep = ep->priv;
  1449. if (conn->uri)
  1450. free((char *)conn->uri);
  1451. i = sock_ip_hash(sconn->sin.sin_addr.s_addr, sconn->sin.sin_port);
  1452. pthread_mutex_lock(&ep->lock);
  1453. TAILQ_REMOVE(&sep->conn_hash[i], sconn, entry);
  1454. pthread_mutex_unlock(&ep->lock);
  1455. free(sconn);
  1456. free(conn);
  1457. CCI_EXIT;
  1458. return CCI_SUCCESS;
  1459. }
  1460. static int ctp_sock_set_opt(cci_opt_handle_t * handle,
  1461. cci_opt_name_t name, const void *val)
  1462. {
  1463. int ret = CCI_SUCCESS;
  1464. cci__ep_t *ep = NULL;
  1465. cci__conn_t *conn = NULL;
  1466. CCI_ENTER;
  1467. if (!sglobals) {
  1468. CCI_EXIT;
  1469. return CCI_ENODEV;
  1470. }
  1471. switch (name) {
  1472. case CCI_OPT_ENDPT_SEND_TIMEOUT:
  1473. ep = container_of(handle, cci__ep_t, endpoint);
  1474. ep->tx_timeout = *((uint32_t*) val);
  1475. break;
  1476. case CCI_OPT_ENDPT_RECV_BUF_COUNT:
  1477. ret = CCI_ERR_NOT_IMPLEMENTED;
  1478. break;
  1479. case CCI_OPT_ENDPT_SEND_BUF_COUNT:
  1480. ret = CCI_ERR_NOT_IMPLEMENTED;
  1481. break;
  1482. case CCI_OPT_ENDPT_KEEPALIVE_TIMEOUT:
  1483. ep = container_of(handle, cci__ep_t, endpoint);
  1484. ep->keepalive_timeout = *((uint32_t*) val);
  1485. break;
  1486. case CCI_OPT_CONN_SEND_TIMEOUT:
  1487. conn->tx_timeout = *((uint32_t*) val);
  1488. break;
  1489. default:
  1490. debug(CCI_DB_INFO, "%s: unknown option %u", __func__, name);
  1491. ret = CCI_EINVAL;
  1492. }
  1493. CCI_EXIT;
  1494. return ret;
  1495. }
  1496. static int ctp_sock_get_opt(cci_opt_handle_t * handle,
  1497. cci_opt_name_t name, void *val)
  1498. {
  1499. int ret = CCI_SUCCESS;
  1500. cci_endpoint_t *endpoint = NULL;
  1501. cci__ep_t *ep = NULL;
  1502. CCI_ENTER;
  1503. if (!sglobals) {
  1504. CCI_EXIT;
  1505. return CCI_ENODEV;
  1506. }
  1507. endpoint = handle;
  1508. ep = container_of(endpoint, cci__ep_t, endpoint);
  1509. assert (ep);
  1510. switch (name) {
  1511. case CCI_OPT_ENDPT_RECV_BUF_COUNT:
  1512. {
  1513. uint32_t *cnt = val;
  1514. *cnt = ep->rx_buf_cnt;
  1515. break;
  1516. }
  1517. case CCI_OPT_ENDPT_SEND_BUF_COUNT:
  1518. {
  1519. uint32_t *cnt = val;
  1520. *cnt = ep->tx_buf_cnt;
  1521. break;
  1522. }
  1523. case CCI_OPT_ENDPT_KEEPALIVE_TIMEOUT:
  1524. {
  1525. uint32_t *timeout = val;
  1526. *timeout = ep->keepalive_timeout;
  1527. break;
  1528. }
  1529. default:
  1530. /* Invalid opt name */
  1531. ret = CCI_EINVAL;
  1532. }
  1533. CCI_EXIT;
  1534. return ret;
  1535. }
  1536. static int ctp_sock_arm_os_handle(cci_endpoint_t * endpoint, int flags)
  1537. {
  1538. CCI_ENTER;
  1539. UNUSED_PARAM (endpoint);
  1540. UNUSED_PARAM (flags);
  1541. if (!sglobals) {
  1542. CCI_EXIT;
  1543. return CCI_ENODEV;
  1544. }
  1545. CCI_EXIT;
  1546. return CCI_ERR_NOT_IMPLEMENTED;
  1547. }
  1548. static int
  1549. ctp_sock_get_event(cci_endpoint_t * endpoint, cci_event_t ** const event)
  1550. {
  1551. int ret = CCI_SUCCESS;
  1552. cci__ep_t *ep;
  1553. sock_ep_t *sep;
  1554. cci__evt_t *ev = NULL, *e;
  1555. CCI_ENTER;
  1556. if (!sglobals) {
  1557. CCI_EXIT;
  1558. return CCI_ENODEV;
  1559. }
  1560. ep = container_of(endpoint, cci__ep_t, endpoint);
  1561. sep = ep->priv;
  1562. /* try to progress sends... */
  1563. if (!sep->closing) {
  1564. pthread_mutex_lock(&sep->progress_mutex);
  1565. pthread_cond_signal(&sep->wait_condition);
  1566. pthread_mutex_unlock(&sep->progress_mutex);
  1567. }
  1568. pthread_mutex_lock(&ep->lock);
  1569. /* give the user the first event */
  1570. TAILQ_FOREACH(e, &ep->evts, entry) {
  1571. if (e->event.type == CCI_EVENT_SEND) {
  1572. /* NOTE: if it is blocking, skip it since sock_sendv()
  1573. * is waiting on it
  1574. */
  1575. sock_tx_t *tx = container_of(e, sock_tx_t, evt);
  1576. if (tx->flags & CCI_FLAG_BLOCKING) {
  1577. continue;
  1578. } else {
  1579. ev = e;
  1580. break;
  1581. }
  1582. } else {
  1583. ev = e;
  1584. break;
  1585. }
  1586. }
  1587. if (ev) {
  1588. TAILQ_REMOVE(&ep->evts, ev, entry);
  1589. *event = &ev->event;
  1590. } else {
  1591. *event = NULL;
  1592. /* No event is available and there are no available
  1593. receive buffers. The application must return events
  1594. before any more messages can be received. */
  1595. if (TAILQ_EMPTY(&sep->idle_rxs)) {
  1596. ret = CCI_ENOBUFS;
  1597. } else {
  1598. ret = CCI_EAGAIN;
  1599. }
  1600. }
  1601. pthread_mutex_unlock(&ep->lock);
  1602. /* We read on the fd to block again */
  1603. if (ev && sep->event_fd) {
  1604. char a[1];
  1605. int rc;
  1606. /* We bock again only and only if there is no more
  1607. pending events */
  1608. if (event_queue_is_empty (ep)) {
  1609. /* Draining events so the app thread can block */
  1610. rc = read (sep->fd[0], a, sizeof (a));
  1611. if (rc != sizeof (a)) {
  1612. ret = CCI_ERROR;
  1613. }
  1614. }
  1615. }
  1616. CCI_EXIT;
  1617. return ret;
  1618. }
  1619. static int ctp_sock_return_event(cci_event_t * event)
  1620. {
  1621. cci__ep_t *ep;
  1622. sock_ep_t *sep;
  1623. cci__evt_t *evt;
  1624. sock_tx_t *tx;
  1625. sock_rx_t *rx;
  1626. int ret = CCI_SUCCESS;
  1627. CCI_ENTER;
  1628. if (!sglobals) {
  1629. CCI_EXIT;
  1630. return CCI_ENODEV;
  1631. }
  1632. if (!event) {
  1633. CCI_EXIT;
  1634. return CCI_SUCCESS;
  1635. }
  1636. evt = container_of(event, cci__evt_t, event);
  1637. ep = evt->ep;
  1638. sep = ep->priv;
  1639. /* enqueue the event */
  1640. switch (event->type) {
  1641. case CCI_EVENT_SEND:
  1642. case CCI_EVENT_ACCEPT:
  1643. tx = container_of(evt, sock_tx_t, evt);
  1644. pthread_mutex_lock(&ep->lock);
  1645. /* insert at head to keep it in cache */
  1646. TAILQ_INSERT_HEAD(&sep->idle_txs, tx, dentry);
  1647. pthread_mutex_unlock(&ep->lock);
  1648. break;
  1649. case CCI_EVENT_RECV:
  1650. case CCI_EVENT_CONNECT_REQUEST:
  1651. rx = container_of(evt, sock_rx_t, evt);
  1652. pthread_mutex_lock(&ep->lock);
  1653. /* insert at head to keep it in cache */
  1654. TAILQ_INSERT_HEAD(&sep->idle_rxs, rx, entry);
  1655. pthread_mutex_unlock(&ep->lock);
  1656. break;
  1657. case CCI_EVENT_CONNECT:
  1658. rx = container_of (evt, sock_rx_t, evt);
  1659. if (rx->ctx == SOCK_CTX_RX) {
  1660. pthread_mutex_lock(&ep->lock);
  1661. TAILQ_INSERT_HEAD (&sep->idle_rxs, rx, entry);
  1662. pthread_mutex_unlock(&ep->lock);
  1663. } else {
  1664. tx = (sock_tx_t*)rx;
  1665. pthread_mutex_lock(&ep->lock);
  1666. TAILQ_INSERT_HEAD (&sep->idle_txs, tx, dentry);
  1667. pthread_mutex_unlock(&ep->lock);
  1668. }
  1669. break;
  1670. default:
  1671. debug (CCI_DB_EP,
  1672. "%s: unhandled %s event", __func__,
  1673. cci_event_type_str(event->type));
  1674. ret = CCI_ERROR;
  1675. break;
  1676. }
  1677. CCI_EXIT;
  1678. return ret;
  1679. }
  1680. static void sock_progress_pending(cci__ep_t * ep)
  1681. {
  1682. int ret;
  1683. uint64_t now;
  1684. sock_tx_t *tx;
  1685. cci__evt_t *evt, *tmp, *my_temp_evt;
  1686. union cci_event *event; /* generic CCI event */
  1687. cci__conn_t *conn;
  1688. sock_conn_t *sconn = NULL;
  1689. sock_ep_t *sep = ep->priv;
  1690. TAILQ_HEAD(s_idle_txs, sock_tx) idle_txs
  1691. = TAILQ_HEAD_INITIALIZER(idle_txs);
  1692. TAILQ_HEAD(s_evts, cci__evt) evts = TAILQ_HEAD_INITIALIZER(evts);
  1693. TAILQ_INIT(&idle_txs);
  1694. TAILQ_INIT(&evts);
  1695. CCI_ENTER;
  1696. now = sock_get_usecs();
  1697. /* This is only for reliable messages.
  1698. * Do not dequeue txs, just walk the list.
  1699. */
  1700. pthread_mutex_lock (&ep->lock);
  1701. TAILQ_FOREACH_SAFE(evt, &sep->pending, entry, tmp) {
  1702. sock_tx_t *tx = container_of (evt, sock_tx_t, evt);
  1703. conn = evt->conn;
  1704. if (conn)
  1705. sconn = conn->priv;
  1706. event = &evt->event;
  1707. assert(tx->last_attempt_us != 0ULL);
  1708. /* has it timed out? */
  1709. if (SOCK_U64_LT(tx->timeout_us, now)) {
  1710. /* dequeue */
  1711. debug_ep(ep, CCI_DB_WARN,
  1712. "%s: timeout of %s msg (seq %u)",
  1713. __func__, sock_msg_type(tx->msg_type),
  1714. tx->seq);
  1715. TAILQ_REMOVE(&sep->pending, &tx->evt, entry);
  1716. /* set status and add to completed events */
  1717. if (tx->msg_type == SOCK_MSG_SEND)
  1718. sconn->pending--;
  1719. switch (tx->msg_type) {
  1720. case SOCK_MSG_SEND:
  1721. event->send.status = CCI_ETIMEDOUT;
  1722. if (tx->rnr != 0) {
  1723. event->send.status = CCI_ERR_RNR;
  1724. /* If a message that is already marked
  1725. RNR times out, and if the connection
  1726. is reliable and ordered, we mark all
  1727. following messages as RNR */
  1728. if (conn->connection.attribute == CCI_CONN_ATTR_RO) {
  1729. sock_tx_t *my_temp_tx;
  1730. TAILQ_FOREACH_SAFE(my_temp_evt,
  1731. &sep->pending,
  1732. entry,
  1733. tmp)
  1734. {
  1735. my_temp_tx = container_of (my_temp_evt, sock_tx_t, evt);
  1736. if (my_temp_tx->seq > tx->seq)
  1737. my_temp_tx->rnr = 1;
  1738. }
  1739. }
  1740. }
  1741. break;
  1742. case SOCK_MSG_RMA_READ_REQUEST:
  1743. case SOCK_MSG_RMA_WRITE:
  1744. pthread_mutex_lock(&ep->lock);
  1745. tx->rma_op->pending--;
  1746. tx->rma_op->status = CCI_ETIMEDOUT;
  1747. pthread_mutex_unlock(&ep->lock);
  1748. break;
  1749. case SOCK_MSG_CONN_REQUEST: {
  1750. int i;
  1751. struct s_active *active_list;
  1752. event->connect.status = CCI_ETIMEDOUT;
  1753. event->connect.connection = NULL;
  1754. if (conn->uri)
  1755. free((char *)conn->uri);
  1756. sconn->status = SOCK_CONN_CLOSING;
  1757. i = sock_ip_hash(sconn->sin.sin_addr.s_addr,
  1758. 0);
  1759. active_list = &sep->active_hash[i];
  1760. pthread_mutex_lock(&ep->lock);
  1761. TAILQ_REMOVE(active_list, sconn, entry);
  1762. pthread_mutex_unlock(&ep->lock);
  1763. free(sconn);
  1764. free(conn);
  1765. sconn = NULL;
  1766. conn = NULL;
  1767. tx->evt.ep = ep;
  1768. tx->evt.conn = NULL;
  1769. break;
  1770. }
  1771. case SOCK_MSG_CONN_REPLY: {
  1772. /* The client is not requiered to ack a
  1773. conn_reply in the context of a reject, so
  1774. we just ignore the timeout in that
  1775. context */
  1776. if (tx->evt.event.connect.status
  1777. == CCI_ECONNREFUSED)
  1778. {
  1779. /* store locally until we can drop the
  1780. dev->lock */
  1781. debug_ep (ep, CCI_DB_CONN,
  1782. "%s: No ACK of the reject, "
  1783. "dropping pending msg",
  1784. __func__);
  1785. TAILQ_INSERT_HEAD(&idle_txs,
  1786. tx,
  1787. dentry);
  1788. break;
  1789. }
  1790. }
  1791. case SOCK_MSG_CONN_ACK:
  1792. default:
  1793. /* TODO */
  1794. CCI_EXIT;
  1795. return;
  1796. }
  1797. /* if SILENT, put idle tx */
  1798. if (tx->flags & CCI_FLAG_SILENT &&
  1799. (tx->msg_type == SOCK_MSG_SEND ||
  1800. tx->msg_type == SOCK_MSG_RMA_WRITE)) {
  1801. tx->state = SOCK_TX_IDLE;
  1802. /* store locally until we can drop the
  1803. dev->lock */
  1804. TAILQ_INSERT_HEAD(

Large files files are truncated, but you can click here to view the full file