PageRenderTime 60ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/bsd/sys/compat/linux/linux_socket.cc

https://gitlab.com/jforge/osv
C++ | 1284 lines | 957 code | 170 blank | 157 comment | 204 complexity | 19709262e59a0ec12941229b115657ab MD5 | raw file
Possible License(s): BSD-3-Clause, 0BSD, MPL-2.0-no-copyleft-exception
  1. /*-
  2. * Copyright (c) 1995 Soren Schmidt
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer
  10. * in this position and unchanged.
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. * 3. The name of the author may not be used to endorse or promote products
  15. * derived from this software without specific prior written permission
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  20. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  21. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  22. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. */
  28. #include <sys/cdefs.h>
  29. #include <unistd.h> /* for close() */
  30. /* XXX we use functions that might not exist. */
  31. #include <bsd/sys/sys/param.h>
  32. #include <fcntl.h>
  33. #include <osv/fcntl.h>
  34. #include <osv/file.h>
  35. #include <osv/uio.h>
  36. #include <bsd/uipc_syscalls.h>
  37. #include <bsd/sys/sys/limits.h>
  38. #include <bsd/sys/sys/mbuf.h>
  39. #include <bsd/sys/sys/socket.h>
  40. #include <bsd/sys/sys/socketvar.h>
  41. #include <bsd/sys/net/if.h>
  42. #include <bsd/sys/netinet/in.h>
  43. #include <bsd/sys/netinet/in_systm.h>
  44. #include <bsd/sys/netinet/ip.h>
  45. #ifdef INET6
  46. #include <bsd/sys/netinet/ip6.h>
  47. #include <bsd/sys/netinet6/ip6_var.h>
  48. #include <bsd/sys/netinet6/in6_var.h>
  49. #endif
  50. #include <bsd/sys/compat/linux/linux.h>
  51. #include <bsd/sys/compat/linux/linux_socket.h>
  52. #define __NEED_sa_family_t
  53. #include <bits/alltypes.h>
  54. static int linux_to_bsd_domain(int);
  55. /*
  56. * Like fget() but loads the underlying socket, or returns an error if the
  57. * descriptor does not represent a socket.
  58. *
  59. * We bump the ref count on the returned socket. XXX Also obtain the SX lock
  60. * in the future.
  61. *
  62. * Note: fgetsock() and fputsock() are deprecated, as consumers should rely
  63. * on their file descriptor reference to prevent the socket from being free'd
  64. * during use.
  65. */
  66. int
  67. fgetsock(int fd, struct socket **spp,
  68. u_int *fflagp)
  69. {
  70. struct file *fp;
  71. int error;
  72. *spp = NULL;
  73. if (fflagp != NULL)
  74. *fflagp = 0;
  75. if ((error = fget(fd, &fp)) != 0)
  76. return (error);
  77. if (file_type(fp) != DTYPE_SOCKET) {
  78. error = ENOTSOCK;
  79. } else {
  80. *spp = (socket*)file_data(fp);
  81. if (fflagp)
  82. *fflagp = file_flags(fp);
  83. SOCK_LOCK(*spp);
  84. soref(*spp);
  85. SOCK_UNLOCK(*spp);
  86. }
  87. fdrop(fp);
  88. return (error);
  89. }
  90. /*
  91. * Drop the reference count on the socket and XXX release the SX lock in the
  92. * future. The last reference closes the socket.
  93. *
  94. * Note: fputsock() is deprecated, see comment for fgetsock().
  95. */
  96. void
  97. fputsock(struct socket *so)
  98. {
  99. ACCEPT_LOCK();
  100. SOCK_LOCK(so);
  101. CURVNET_SET(so->so_vnet);
  102. sorele(so);
  103. CURVNET_RESTORE();
  104. }
  105. /*
  106. * Reads a linux bsd_sockaddr and does any necessary translation.
  107. * Linux bsd_sockaddrs don't have a length field, only a family.
  108. * Copy the bsd_osockaddr structure pointed to by osa to kernel, adjust
  109. * family and convert to bsd_sockaddr.
  110. */
  111. static int
  112. linux_getsockaddr(struct bsd_sockaddr **sap, const struct bsd_osockaddr *osa, int salen)
  113. {
  114. struct bsd_sockaddr *sa;
  115. struct bsd_osockaddr *kosa;
  116. #ifdef INET6
  117. struct bsd_sockaddr_in6 *sin6;
  118. int oldv6size;
  119. #endif
  120. #if 0
  121. char *name;
  122. int bdom, error, hdrlen, namelen;
  123. #else
  124. int error, bdom;
  125. #endif
  126. if (salen < 2 || salen > UCHAR_MAX || !osa)
  127. return (EINVAL);
  128. #ifdef INET6
  129. oldv6size = 0;
  130. /*
  131. * Check for old (pre-RFC2553) bsd_sockaddr_in6. We may accept it
  132. * if it's a v4-mapped address, so reserve the proper space
  133. * for it.
  134. */
  135. if (salen == sizeof(struct bsd_sockaddr_in6) - sizeof(uint32_t)) {
  136. salen += sizeof(uint32_t);
  137. oldv6size = 1;
  138. }
  139. #endif
  140. kosa = (bsd_osockaddr*)malloc(salen);
  141. if ((error = copyin(osa, kosa, salen)))
  142. goto out;
  143. bdom = linux_to_bsd_domain(kosa->sa_family);
  144. if (bdom == -1) {
  145. error = EAFNOSUPPORT;
  146. goto out;
  147. }
  148. #ifdef INET6
  149. /*
  150. * Older Linux IPv6 code uses obsolete RFC2133 struct bsd_sockaddr_in6,
  151. * which lacks the scope id compared with RFC2553 one. If we detect
  152. * the situation, reject the address and write a message to system log.
  153. *
  154. * Still accept addresses for which the scope id is not used.
  155. */
  156. if (oldv6size) {
  157. if (bdom == AF_INET6) {
  158. sin6 = (struct bsd_sockaddr_in6 *)kosa;
  159. if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
  160. (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
  161. !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
  162. !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
  163. !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
  164. !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
  165. sin6->sin6_scope_id = 0;
  166. } else {
  167. log(LOG_DEBUG,
  168. "obsolete pre-RFC2553 bsd_sockaddr_in6 rejected\n");
  169. error = EINVAL;
  170. goto out;
  171. }
  172. } else
  173. salen -= sizeof(uint32_t);
  174. }
  175. #endif
  176. if (bdom == AF_INET) {
  177. if ((size_t)salen < sizeof(struct bsd_sockaddr_in)) {
  178. error = EINVAL;
  179. goto out;
  180. }
  181. salen = sizeof(struct bsd_sockaddr_in);
  182. }
  183. /* FIXME: OSv - we don't support AD_LOCAL yet */
  184. assert(bdom != AF_LOCAL);
  185. #if 0
  186. if (bdom == AF_LOCAL && salen > sizeof(struct bsd_sockaddr_un)) {
  187. hdrlen = offsetof(struct bsd_sockaddr_un, sun_path);
  188. name = ((struct bsd_sockaddr_un *)kosa)->sun_path;
  189. if (*name == '\0') {
  190. /*
  191. * Linux abstract namespace starts with a NULL byte.
  192. * XXX We do not support abstract namespace yet.
  193. */
  194. namelen = strnlen(name + 1, salen - hdrlen - 1) + 1;
  195. } else
  196. namelen = strnlen(name, salen - hdrlen);
  197. salen = hdrlen + namelen;
  198. if (salen > sizeof(struct bsd_sockaddr_un)) {
  199. error = ENAMETOOLONG;
  200. goto out;
  201. }
  202. }
  203. #endif
  204. sa = (struct bsd_sockaddr *)kosa;
  205. sa->sa_family = bdom;
  206. sa->sa_len = salen;
  207. *sap = sa;
  208. return (0);
  209. out:
  210. free(kosa);
  211. return (error);
  212. }
  213. static int
  214. linux_to_bsd_domain(int domain)
  215. {
  216. switch (domain) {
  217. case LINUX_AF_UNSPEC:
  218. return (AF_UNSPEC);
  219. case LINUX_AF_UNIX:
  220. return (AF_LOCAL);
  221. case LINUX_AF_INET:
  222. return (AF_INET);
  223. case LINUX_AF_INET6:
  224. return (AF_INET6);
  225. case LINUX_AF_AX25:
  226. return (AF_CCITT);
  227. case LINUX_AF_IPX:
  228. return (AF_IPX);
  229. case LINUX_AF_APPLETALK:
  230. return (AF_APPLETALK);
  231. }
  232. return (-1);
  233. }
  234. static int
  235. bsd_to_linux_domain(int domain)
  236. {
  237. switch (domain) {
  238. case AF_UNSPEC:
  239. return (LINUX_AF_UNSPEC);
  240. case AF_LOCAL:
  241. return (LINUX_AF_UNIX);
  242. case AF_INET:
  243. return (LINUX_AF_INET);
  244. case AF_INET6:
  245. return (LINUX_AF_INET6);
  246. case AF_CCITT:
  247. return (LINUX_AF_AX25);
  248. case AF_IPX:
  249. return (LINUX_AF_IPX);
  250. case AF_APPLETALK:
  251. return (LINUX_AF_APPLETALK);
  252. }
  253. return (-1);
  254. }
  255. static int
  256. linux_to_bsd_ip_sockopt(int opt)
  257. {
  258. switch (opt) {
  259. case LINUX_IP_TOS:
  260. return (IP_TOS);
  261. case LINUX_IP_TTL:
  262. return (IP_TTL);
  263. case LINUX_IP_OPTIONS:
  264. return (IP_OPTIONS);
  265. case LINUX_IP_MULTICAST_IF:
  266. return (IP_MULTICAST_IF);
  267. case LINUX_IP_MULTICAST_TTL:
  268. return (IP_MULTICAST_TTL);
  269. case LINUX_IP_MULTICAST_LOOP:
  270. return (IP_MULTICAST_LOOP);
  271. case LINUX_IP_ADD_MEMBERSHIP:
  272. return (IP_ADD_MEMBERSHIP);
  273. case LINUX_IP_DROP_MEMBERSHIP:
  274. return (IP_DROP_MEMBERSHIP);
  275. case LINUX_IP_HDRINCL:
  276. return (IP_HDRINCL);
  277. }
  278. return (-1);
  279. }
  280. static int
  281. linux_to_bsd_so_sockopt(int opt)
  282. {
  283. assert(opt != LINUX_SO_PEERCRED);
  284. switch (opt) {
  285. case LINUX_SO_DEBUG:
  286. return (SO_DEBUG);
  287. case LINUX_SO_REUSEADDR:
  288. return (SO_REUSEADDR);
  289. case LINUX_SO_TYPE:
  290. return (SO_TYPE);
  291. case LINUX_SO_ERROR:
  292. return (SO_ERROR);
  293. case LINUX_SO_DONTROUTE:
  294. return (SO_DONTROUTE);
  295. case LINUX_SO_BROADCAST:
  296. return (SO_BROADCAST);
  297. case LINUX_SO_SNDBUF:
  298. return (SO_SNDBUF);
  299. case LINUX_SO_RCVBUF:
  300. return (SO_RCVBUF);
  301. case LINUX_SO_KEEPALIVE:
  302. return (SO_KEEPALIVE);
  303. case LINUX_SO_OOBINLINE:
  304. return (SO_OOBINLINE);
  305. case LINUX_SO_LINGER:
  306. return (SO_LINGER);
  307. case LINUX_SO_RCVLOWAT:
  308. return (SO_RCVLOWAT);
  309. case LINUX_SO_SNDLOWAT:
  310. return (SO_SNDLOWAT);
  311. case LINUX_SO_RCVTIMEO:
  312. return (SO_RCVTIMEO);
  313. case LINUX_SO_SNDTIMEO:
  314. return (SO_SNDTIMEO);
  315. case LINUX_SO_TIMESTAMP:
  316. return (SO_TIMESTAMP);
  317. case LINUX_SO_ACCEPTCONN:
  318. return (SO_ACCEPTCONN);
  319. }
  320. return (-1);
  321. }
  322. static int
  323. linux_to_bsd_msg_flags(int flags)
  324. {
  325. int ret_flags = 0;
  326. if (flags & LINUX_MSG_OOB)
  327. ret_flags |= MSG_OOB;
  328. if (flags & LINUX_MSG_PEEK)
  329. ret_flags |= MSG_PEEK;
  330. if (flags & LINUX_MSG_DONTROUTE)
  331. ret_flags |= MSG_DONTROUTE;
  332. if (flags & LINUX_MSG_CTRUNC)
  333. ret_flags |= MSG_CTRUNC;
  334. if (flags & LINUX_MSG_TRUNC)
  335. ret_flags |= MSG_TRUNC;
  336. if (flags & LINUX_MSG_DONTWAIT)
  337. ret_flags |= MSG_DONTWAIT;
  338. if (flags & LINUX_MSG_EOR)
  339. ret_flags |= MSG_EOR;
  340. if (flags & LINUX_MSG_WAITALL)
  341. ret_flags |= MSG_WAITALL;
  342. if (flags & LINUX_MSG_NOSIGNAL)
  343. ret_flags |= MSG_NOSIGNAL;
  344. #if 0 /* not handled */
  345. if (flags & LINUX_MSG_PROXY)
  346. ;
  347. if (flags & LINUX_MSG_FIN)
  348. ;
  349. if (flags & LINUX_MSG_SYN)
  350. ;
  351. if (flags & LINUX_MSG_CONFIRM)
  352. ;
  353. if (flags & LINUX_MSG_RST)
  354. ;
  355. if (flags & LINUX_MSG_ERRQUEUE)
  356. ;
  357. #endif
  358. return ret_flags;
  359. }
  360. static int
  361. bsd_to_linux_sockaddr(struct bsd_sockaddr *sa)
  362. {
  363. if (sa == NULL)
  364. return EINVAL;
  365. u_short family = sa->sa_family;
  366. *(u_short *)sa = family;
  367. return (0);
  368. }
  369. static int
  370. linux_to_bsd_sockaddr(struct bsd_sockaddr *sa, int len)
  371. {
  372. if (sa == NULL)
  373. return EINVAL;
  374. sa_family_t family = *(sa_family_t *)sa;
  375. sa->sa_family = family;
  376. sa->sa_len = len;
  377. return (0);
  378. }
  379. static int
  380. linux_sa_put(struct bsd_osockaddr *osa)
  381. {
  382. int bdom;
  383. bdom = bsd_to_linux_domain(osa->sa_family);
  384. if (bdom == -1)
  385. return (EINVAL);
  386. osa->sa_family = bdom;
  387. return (0);
  388. }
  389. #if 0
  390. static int
  391. linux_to_bsd_cmsg_type(int cmsg_type)
  392. {
  393. switch (cmsg_type) {
  394. case LINUX_SCM_RIGHTS:
  395. return (SCM_RIGHTS);
  396. case LINUX_SCM_CREDENTIALS:
  397. return (SCM_CREDS);
  398. }
  399. return (-1);
  400. }
  401. static int
  402. bsd_to_linux_cmsg_type(int cmsg_type)
  403. {
  404. switch (cmsg_type) {
  405. case SCM_RIGHTS:
  406. return (LINUX_SCM_RIGHTS);
  407. case SCM_CREDS:
  408. return (LINUX_SCM_CREDENTIALS);
  409. }
  410. return (-1);
  411. }
  412. #endif
  413. static int
  414. linux_to_bsd_msghdr(struct msghdr *hdr)
  415. {
  416. /* Ignore msg_control in OSv */
  417. hdr->msg_control = NULL;
  418. hdr->msg_flags = linux_to_bsd_msg_flags(hdr->msg_flags);
  419. return (0);
  420. }
  421. static int
  422. bsd_to_linux_msghdr(const struct msghdr *hdr)
  423. {
  424. /*
  425. * msg_controllen is skipped since BSD and LINUX control messages
  426. * are potentially different sizes (e.g. the cred structure used
  427. * by SCM_CREDS is different between the two operating system).
  428. *
  429. * The caller can set it (if necessary) after converting all the
  430. * control messages.
  431. */
  432. /* msg_flags skipped */
  433. return (0);
  434. }
  435. static int
  436. linux_set_socket_flags(int s, int flags)
  437. {
  438. int error;
  439. if (flags & LINUX_SOCK_NONBLOCK) {
  440. error = fcntl(s, F_SETFL, O_NONBLOCK);
  441. if (error)
  442. return (error);
  443. }
  444. if (flags & LINUX_SOCK_CLOEXEC) {
  445. error = fcntl(s, F_SETFD, FD_CLOEXEC);
  446. if (error)
  447. return (error);
  448. }
  449. return (0);
  450. }
  451. static int
  452. linux_sendit(int s, struct msghdr *mp, int flags,
  453. struct mbuf *control, ssize_t *bytes)
  454. {
  455. struct bsd_sockaddr *to;
  456. int error, bsd_flags;
  457. if (mp->msg_name != NULL) {
  458. error = linux_getsockaddr(&to, (const bsd_osockaddr*)mp->msg_name, mp->msg_namelen);
  459. if (error)
  460. return (error);
  461. mp->msg_name = to;
  462. } else
  463. to = NULL;
  464. bsd_flags = linux_to_bsd_msg_flags(flags);
  465. error = kern_sendit(s, mp, bsd_flags, control, bytes);
  466. if (to)
  467. free(to);
  468. return (error);
  469. }
  470. /* Return 0 if IP_HDRINCL is set for the given socket. */
  471. static int
  472. linux_check_hdrincl(int s)
  473. {
  474. int error, optval;
  475. socklen_t size_val;
  476. size_val = sizeof(optval);
  477. error = kern_getsockopt(s, IPPROTO_IP, IP_HDRINCL, &optval, &size_val);
  478. if (error)
  479. return (error);
  480. return (optval == 0);
  481. }
  482. /*
  483. * Updated sendto() when IP_HDRINCL is set:
  484. * tweak endian-dependent fields in the IP packet.
  485. */
  486. static int
  487. linux_sendto_hdrincl(int s, void *buf, int len, int flags, void *to,
  488. int tolen, ssize_t *bytes)
  489. {
  490. /*
  491. * linux_ip_copysize defines how many bytes we should copy
  492. * from the beginning of the IP packet before we customize it for BSD.
  493. * It should include all the fields we modify (ip_len and ip_off).
  494. */
  495. #define linux_ip_copysize 8
  496. struct ip *packet;
  497. struct msghdr bsd_msg;
  498. struct iovec aiov[1];
  499. int error;
  500. /* Check that the packet isn't too big or too small. */
  501. if (len < linux_ip_copysize ||
  502. len > IP_MAXSEGMENT)
  503. return (EINVAL);
  504. packet = (struct ip *)buf;
  505. /* Convert fields from Linux to BSD raw IP socket format */
  506. packet->ip_len = len;
  507. packet->ip_off = ntohs(packet->ip_off);
  508. /* Prepare the msghdr and iovec structures describing the new packet */
  509. bsd_msg.msg_name = to;
  510. bsd_msg.msg_namelen = tolen;
  511. bsd_msg.msg_iov = aiov;
  512. bsd_msg.msg_iovlen = 1;
  513. bsd_msg.msg_control = NULL;
  514. bsd_msg.msg_flags = 0;
  515. aiov[0].iov_base = (char *)packet;
  516. aiov[0].iov_len = len;
  517. error = linux_sendit(s, &bsd_msg, flags, NULL, bytes);
  518. return (error);
  519. }
  520. int
  521. linux_socket(int domain, int type, int protocol, int *out_fd)
  522. {
  523. int retval_socket, socket_flags;
  524. int s;
  525. socket_flags = type & ~LINUX_SOCK_TYPE_MASK;
  526. if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
  527. return (EINVAL);
  528. type = type & LINUX_SOCK_TYPE_MASK;
  529. if (type < 0 || type > LINUX_SOCK_MAX)
  530. return (EINVAL);
  531. domain = linux_to_bsd_domain(domain);
  532. if (domain == -1)
  533. return (EAFNOSUPPORT);
  534. retval_socket = sys_socket(domain, type, protocol, &s);
  535. if (retval_socket)
  536. return (retval_socket);
  537. retval_socket = linux_set_socket_flags(s, socket_flags);
  538. if (retval_socket) {
  539. close(s);
  540. goto out;
  541. }
  542. if (type == SOCK_RAW
  543. && (protocol == IPPROTO_RAW || protocol == 0)
  544. && domain == PF_INET) {
  545. /* It's a raw IP socket: set the IP_HDRINCL option. */
  546. int hdrincl;
  547. hdrincl = 1;
  548. /* We ignore any error returned by kern_setsockopt() */
  549. kern_setsockopt(s, IPPROTO_IP, IP_HDRINCL,
  550. &hdrincl, sizeof(hdrincl));
  551. }
  552. #ifdef INET6
  553. /*
  554. * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by default
  555. * and some apps depend on this. So, set V6ONLY to 0 for Linux apps.
  556. * For simplicity we do this unconditionally of the net.inet6.ip6.v6only
  557. * sysctl value.
  558. */
  559. if (bsd_args.domain == PF_INET6) {
  560. int v6only;
  561. v6only = 0;
  562. /* We ignore any error returned by setsockopt() */
  563. kern_setsockopt(td, td->td_retval[0], IPPROTO_IPV6, IPV6_V6ONLY,
  564. &v6only, UIO_SYSSPACE, sizeof(v6only));
  565. }
  566. #endif
  567. /* return the file descriptor */
  568. *out_fd = s;
  569. out:
  570. return (retval_socket);
  571. }
  572. int
  573. linux_bind(int s, void *name, int namelen)
  574. {
  575. struct bsd_sockaddr *sa;
  576. int error;
  577. error = linux_getsockaddr(&sa, (const bsd_osockaddr*)name, namelen);
  578. if (error)
  579. return (error);
  580. error = kern_bind(s, sa);
  581. free(sa);
  582. if (error == EADDRNOTAVAIL && namelen != sizeof(struct bsd_sockaddr_in))
  583. return (EINVAL);
  584. return (error);
  585. }
  586. int
  587. linux_connect(int s, void *name, int namelen)
  588. {
  589. struct socket *so;
  590. struct bsd_sockaddr *sa;
  591. u_int fflag;
  592. int error;
  593. error = linux_getsockaddr(&sa, (const bsd_osockaddr*)name, namelen);
  594. if (error)
  595. return (error);
  596. error = kern_connect(s, sa);
  597. free(sa);
  598. if (error != EISCONN)
  599. return (error);
  600. /*
  601. * Linux doesn't return EISCONN the first time it occurs,
  602. * when on a non-blocking socket. Instead it returns the
  603. * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD.
  604. *
  605. * XXXRW: Instead of using fgetsock(), check that it is a
  606. * socket and use the file descriptor reference instead of
  607. * creating a new one.
  608. */
  609. error = fgetsock(s, &so, &fflag);
  610. if (error == 0) {
  611. error = EISCONN;
  612. if (fflag & FNONBLOCK) {
  613. SOCK_LOCK(so);
  614. if (so->so_emuldata == 0)
  615. error = so->so_error;
  616. so->so_emuldata = (void *)1;
  617. SOCK_UNLOCK(so);
  618. }
  619. fputsock(so);
  620. }
  621. return (error);
  622. }
  623. int
  624. linux_listen(int s, int backlog)
  625. {
  626. return (sys_listen(s, backlog));
  627. }
  628. static int
  629. linux_accept_common(int s, struct bsd_sockaddr * name,
  630. socklen_t * namelen, int *out_fd, int flags)
  631. {
  632. int error;
  633. if (flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
  634. return (EINVAL);
  635. error = sys_accept(s, name, namelen, out_fd);
  636. bsd_to_linux_sockaddr(name);
  637. if (error) {
  638. if (error == EFAULT && *namelen != sizeof(struct bsd_sockaddr_in))
  639. return (EINVAL);
  640. return (error);
  641. }
  642. /*
  643. * linux appears not to copy flags from the parent socket to the
  644. * accepted one, so we must clear the flags in the new descriptor
  645. * and apply the requested flags.
  646. */
  647. error = fcntl(*out_fd, F_SETFL, 0);
  648. if (error)
  649. goto out;
  650. error = linux_set_socket_flags(*out_fd, flags);
  651. if (error)
  652. goto out;
  653. if (name)
  654. error = linux_sa_put((struct bsd_osockaddr *)name);
  655. out:
  656. if (error) {
  657. close(*out_fd);
  658. *out_fd = 0;
  659. }
  660. return (error);
  661. }
  662. int linux_accept(int s, struct bsd_sockaddr * name,
  663. socklen_t * namelen, int *out_fd)
  664. {
  665. return (linux_accept_common(s, name, namelen, out_fd, 0));
  666. }
  667. int
  668. linux_accept4(int s, struct bsd_sockaddr * name,
  669. socklen_t * namelen, int *out_fd, int flags)
  670. {
  671. return (linux_accept_common(s, name, namelen, out_fd, flags));
  672. }
  673. int
  674. linux_getsockname(int s, struct bsd_sockaddr *addr, socklen_t *addrlen)
  675. {
  676. int error;
  677. error = sys_getsockname(s, addr, addrlen);
  678. bsd_to_linux_sockaddr(addr);
  679. if (error)
  680. return (error);
  681. error = linux_sa_put((struct bsd_osockaddr *)addr);
  682. if (error)
  683. return (error);
  684. return (0);
  685. }
  686. int
  687. linux_getpeername(int s, struct bsd_sockaddr *addr, socklen_t *namelen)
  688. {
  689. int error;
  690. error = sys_getpeername(s, addr, namelen);
  691. bsd_to_linux_sockaddr(addr);
  692. if (error)
  693. return (error);
  694. error = linux_sa_put((struct bsd_osockaddr *)addr);
  695. if (error)
  696. return (error);
  697. return (0);
  698. }
  699. int
  700. linux_socketpair(int domain, int type, int protocol, int* rsv)
  701. {
  702. int error, socket_flags;
  703. domain = linux_to_bsd_domain(domain);
  704. if (domain != PF_LOCAL)
  705. return (EAFNOSUPPORT);
  706. socket_flags = type & ~LINUX_SOCK_TYPE_MASK;
  707. if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
  708. return (EINVAL);
  709. type = type & LINUX_SOCK_TYPE_MASK;
  710. if (type < 0 || type > LINUX_SOCK_MAX)
  711. return (EINVAL);
  712. assert(protocol != PF_UNIX);
  713. if (protocol != 0 && protocol != PF_UNIX)
  714. /*
  715. * Use of PF_UNIX as protocol argument is not right,
  716. * but Linux does it.
  717. * Do not map PF_UNIX as its Linux value is identical
  718. * to FreeBSD one.
  719. */
  720. return (EPROTONOSUPPORT);
  721. else
  722. protocol = 0;
  723. error = kern_socketpair(domain, type, protocol, rsv);
  724. if (error)
  725. return (error);
  726. error = linux_set_socket_flags(rsv[0], socket_flags);
  727. if (error)
  728. goto out;
  729. error = linux_set_socket_flags(rsv[1], socket_flags);
  730. if (error)
  731. goto out;
  732. out:
  733. if (error) {
  734. (void)close(rsv[0]);
  735. (void)close(rsv[1]);
  736. }
  737. return (error);
  738. }
  739. int
  740. linux_send(int s, caddr_t buf, size_t len, int flags, ssize_t* bytes)
  741. {
  742. int bsd_flags = linux_to_bsd_msg_flags(flags);
  743. return sys_sendto(s, buf, len, bsd_flags, NULL, 0, bytes);
  744. }
  745. int
  746. linux_recv(int s, caddr_t buf, int len, int flags, ssize_t* bytes)
  747. {
  748. int bsd_flags = linux_to_bsd_msg_flags(flags);
  749. return (sys_recvfrom(s, buf, len, bsd_flags, NULL, 0, bytes));
  750. }
  751. int
  752. linux_sendto(int s, void* buf, int len, int flags,
  753. void* to, int tolen, ssize_t *bytes)
  754. {
  755. struct msghdr msg;
  756. struct iovec aiov;
  757. int error;
  758. if (linux_check_hdrincl(s) == 0)
  759. /* IP_HDRINCL set, tweak the packet before sending */
  760. return (linux_sendto_hdrincl(s, buf, len, flags, to, tolen, bytes));
  761. msg.msg_name = to;
  762. msg.msg_namelen = tolen;
  763. msg.msg_iov = &aiov;
  764. msg.msg_iovlen = 1;
  765. msg.msg_control = NULL;
  766. msg.msg_flags = 0;
  767. aiov.iov_base = buf;
  768. aiov.iov_len = len;
  769. error = linux_sendit(s, &msg, flags, NULL, bytes);
  770. return (error);
  771. }
  772. int
  773. linux_recvfrom(int s, void* buf, size_t len, int flags,
  774. struct bsd_sockaddr * from, socklen_t * fromlen, ssize_t* bytes)
  775. {
  776. int error;
  777. int bsd_flags = linux_to_bsd_msg_flags(flags);
  778. linux_to_bsd_sockaddr(from, len);
  779. error = sys_recvfrom(s, (caddr_t)buf, len, bsd_flags, from,
  780. fromlen, bytes);
  781. bsd_to_linux_sockaddr(from);
  782. if (error)
  783. return (error);
  784. if (from) {
  785. error = linux_sa_put((struct bsd_osockaddr *)from);
  786. if (error)
  787. return (error);
  788. }
  789. return (0);
  790. }
  791. int
  792. linux_sendmsg(int s, struct msghdr* msg, int flags, ssize_t* bytes)
  793. {
  794. #if 0
  795. struct cmsghdr *cmsg;
  796. struct mbuf *control;
  797. struct iovec *iov;
  798. socklen_t datalen;
  799. struct bsd_sockaddr *sa;
  800. sa_family_t sa_family;
  801. void *data;
  802. #endif
  803. int error;
  804. /*
  805. * Some Linux applications (ping) define a non-NULL control data
  806. * pointer, but a msg_controllen of 0, which is not allowed in the
  807. * FreeBSD system call interface. NULL the msg_control pointer in
  808. * order to handle this case. This should be checked, but allows the
  809. * Linux ping to work.
  810. */
  811. if (msg->msg_control != NULL && msg->msg_controllen == 0)
  812. msg->msg_control = NULL;
  813. /* FIXME: Translate msg control */
  814. assert(msg->msg_control == NULL);
  815. error = linux_to_bsd_msghdr(msg);
  816. if (error)
  817. return (error);
  818. /* FIXME: OSv - cmsgs translation is done credentials and rights,
  819. we ignore those in OSv. */
  820. #if 0
  821. if ((ptr_cmsg = LINUX_CMSG_FIRSTHDR(&linux_msg)) != NULL) {
  822. error = kern_getsockname(td, args->s, &sa, &datalen);
  823. if (error)
  824. goto bad;
  825. sa_family = sa->sa_family;
  826. free(sa, M_SONAME);
  827. error = ENOBUFS;
  828. cmsg = malloc(CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
  829. control = m_get(M_WAIT, MT_CONTROL);
  830. if (control == NULL)
  831. goto bad;
  832. do {
  833. error = copyin(ptr_cmsg, &linux_cmsg,
  834. sizeof(struct l_cmsghdr));
  835. if (error)
  836. goto bad;
  837. error = EINVAL;
  838. if (linux_cmsg.cmsg_len < sizeof(struct l_cmsghdr))
  839. goto bad;
  840. /*
  841. * Now we support only SCM_RIGHTS and SCM_CRED,
  842. * so return EINVAL in any other cmsg_type
  843. */
  844. cmsg->cmsg_type =
  845. linux_to_bsd_cmsg_type(linux_cmsg.cmsg_type);
  846. cmsg->cmsg_level =
  847. linux_cmsg.cmsg_level;
  848. if (cmsg->cmsg_type linux_sendmsg== -1
  849. || cmsg->cmsg_level != SOL_SOCKET)
  850. goto bad;
  851. /*
  852. * Some applications (e.g. pulseaudio) attempt to
  853. * send ancillary data even if the underlying protocol
  854. * doesn't support it which is not allowed in the
  855. * FreeBSD system call interface.
  856. */
  857. if (sa_family != AF_UNIX)
  858. continue;
  859. data = LINUX_CMSG_DATA(ptr_cmsg);
  860. datalen = linux_cmsg.cmsg_len - L_CMSG_HDRSZ;
  861. switch (cmsg->cmsg_type)
  862. {
  863. case SCM_RIGHTS:
  864. break;
  865. case SCM_CREDS:
  866. data = &cmcred;
  867. datalen = sizeof(cmcred);
  868. /*
  869. * The lower levels will fill in the structure
  870. */
  871. bzero(data, datalen);
  872. break;
  873. }
  874. cmsg->cmsg_len = CMSG_LEN(datalen);
  875. error = ENOBUFS;
  876. if (!m_append(control, CMSG_HDRSZ, (c_caddr_t)cmsg))
  877. goto bad;
  878. if (!m_append(control, datalen, (c_caddr_t)data))
  879. goto bad;
  880. } while ((ptr_cmsg = LINUX_CMSG_NXTHDR(&linux_msg, ptr_cmsg)));
  881. if (m_length(control, NULL) == 0) {
  882. m_freem(control);
  883. control = NULL;
  884. }
  885. }
  886. #endif
  887. error = linux_sendit(s, msg, flags, NULL, bytes);
  888. #if 0
  889. bad:
  890. free(iov);
  891. if (cmsg)
  892. free(cmsg);
  893. #endif
  894. return (error);
  895. }
  896. struct linux_recvmsg_args {
  897. int s;
  898. l_uintptr_t msg;
  899. int flags;
  900. };
  901. /* FIXME: OSv - flags are ignored, the flags
  902. * inside the msghdr are used instead */
  903. int
  904. linux_recvmsg(int s, struct msghdr *msg, int flags, ssize_t* bytes)
  905. {
  906. #if 0
  907. socklen_t datalen, outlen;
  908. struct mbuf *control = NULL;
  909. struct mbuf **controlp;
  910. caddr_t outbuf;
  911. void *data;
  912. int error, i, fd, fds, *fdp;
  913. #endif
  914. int error;
  915. error = linux_to_bsd_msghdr(msg);
  916. if (error)
  917. return (error);
  918. if (msg->msg_name) {
  919. error = linux_to_bsd_sockaddr((struct bsd_sockaddr *)msg->msg_name,
  920. msg->msg_namelen);
  921. if (error)
  922. goto bad;
  923. }
  924. assert(msg->msg_control == NULL);
  925. error = kern_recvit(s, msg, NULL, bytes);
  926. if (error)
  927. goto bad;
  928. error = bsd_to_linux_msghdr(msg);
  929. if (error)
  930. goto bad;
  931. if (msg->msg_name) {
  932. error = bsd_to_linux_sockaddr((struct bsd_sockaddr *)msg->msg_name);
  933. if (error)
  934. goto bad;
  935. }
  936. if (msg->msg_name && msg->msg_namelen > 2) {
  937. error = linux_sa_put((bsd_osockaddr*)msg->msg_name);
  938. if (error)
  939. goto bad;
  940. }
  941. assert(msg->msg_controllen == 0);
  942. assert(msg->msg_control == NULL);
  943. #if 0
  944. if (control) {
  945. linux_cmsg = malloc(L_CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
  946. msg.msg_control = mtod(control, struct cmsghdr *);
  947. msg.msg_controllen = control->m_hdr.mh_len;
  948. cm = CMSG_FIRSTHDR(&msg);
  949. while (cm != NULL) {
  950. linux_cmsg->cmsg_type =
  951. bsd_to_linux_cmsg_type(cm->cmsg_type);
  952. linux_cmsg->cmsg_level =
  953. bsd_to_linux_sockopt_level(cm->cmsg_level);
  954. if (linux_cmsg->cmsg_type == -1
  955. || cm->cmsg_level != SOL_SOCKET)
  956. {
  957. error = EINVAL;
  958. goto bad;
  959. }
  960. data = CMSG_DATA(cm);
  961. datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
  962. switch (cm->cmsg_type)
  963. {
  964. case SCM_RIGHTS:
  965. if (args->flags & LINUX_MSG_CMSG_CLOEXEC) {
  966. fds = datalen / sizeof(int);
  967. fdp = data;
  968. for (i = 0; i < fds; i++) {
  969. fd = *fdp++;
  970. (void)kern_fcntl(td, fd,
  971. F_SETFD, FD_CLOEXEC);
  972. }
  973. }
  974. break;
  975. case SCM_CREDS:
  976. /*
  977. * Currently LOCAL_CREDS is never in
  978. * effect for Linux so no need to worry
  979. * about sockcred
  980. */
  981. if (datalen != sizeof(*cmcred)) {
  982. error = EMSGSIZE;
  983. goto bad;
  984. }
  985. cmcred = (struct cmsgcred *)data;
  986. bzero(&linux_ucred, sizeof(linux_ucred));
  987. linux_ucred.pid = cmcred->cmcred_pid;
  988. linux_ucred.uid = cmcred->cmcred_uid;
  989. linux_ucred.gid = cmcred->cmcred_gid;
  990. data = &linux_ucred;
  991. datalen = sizeof(linux_ucred);
  992. break;
  993. }
  994. if (outlen + LINUX_CMSG_LEN(datalen) >
  995. linux_msg.msg_controllen) {
  996. if (outlen == 0) {
  997. error = EMSGSIZE;
  998. goto bad;
  999. } else {
  1000. linux_msg.msg_flags |=
  1001. LINUX_MSG_CTRUNC;
  1002. goto out;
  1003. }
  1004. }
  1005. linux_cmsg->cmsg_len = LINUX_CMSG_LEN(datalen);
  1006. error = copyout(linux_cmsg, outbuf, L_CMSG_HDRSZ);
  1007. if (error)
  1008. goto bad;
  1009. outbuf += L_CMSG_HDRSZ;
  1010. error = copyout(data, outbuf, datalen);
  1011. if (error)
  1012. goto bad;
  1013. outbuf += LINUX_CMSG_ALIGN(datalen);
  1014. outlen += LINUX_CMSG_LEN(datalen);
  1015. cm = CMSG_NXTHDR(&msg, cm);
  1016. }
  1017. }
  1018. out:
  1019. linux_msg.msg_controllen = outlen;
  1020. error = copyout(&linux_msg, PTRIN(args->msg), sizeof(linux_msg));
  1021. bad:
  1022. free(iov);
  1023. if (control != NULL)
  1024. m_freem(control);
  1025. if (linux_cmsg != NULL)
  1026. free(linux_cmsg);
  1027. #endif
  1028. bad:
  1029. return (error);
  1030. }
  1031. int
  1032. linux_shutdown(int s, int how)
  1033. {
  1034. return (sys_shutdown(s, how));
  1035. }
  1036. int linux_to_bsd_tcp_sockopt(int name)
  1037. {
  1038. // Not using the constants because we never know what will the compiler
  1039. // will insert here. They are interface, so they shouldn't change.
  1040. switch (name) {
  1041. case 4: // TCP_KEEPIDLE
  1042. return 0x100;
  1043. case 5: // TCP_KEEPINTVL
  1044. return 0x200;
  1045. case 6: // TCP_KEEPCNT
  1046. return 0x400;
  1047. case 13: // TCP_CONGESTION
  1048. return 0x40;
  1049. }
  1050. return name;
  1051. }
  1052. int
  1053. linux_setsockopt(int s, int level, int name, caddr_t val, int valsize)
  1054. {
  1055. int error;
  1056. switch (level) {
  1057. case SOL_SOCKET:
  1058. name = linux_to_bsd_so_sockopt(name);
  1059. break;
  1060. case IPPROTO_IP:
  1061. name = linux_to_bsd_ip_sockopt(name);
  1062. break;
  1063. case IPPROTO_TCP:
  1064. name = linux_to_bsd_tcp_sockopt(name);
  1065. /* Linux TCP option values match BSD's */
  1066. break;
  1067. default:
  1068. name = -1;
  1069. break;
  1070. }
  1071. if (name == -1)
  1072. return (ENOPROTOOPT);
  1073. /* FIXME: OSv - enable when we have IPv6 */
  1074. #if 0
  1075. if (name == IPV6_NEXTHOP) {
  1076. linux_to_bsd_sockaddr((struct bsd_sockaddr *)bsd_args.val,
  1077. bsd_args.valsize);
  1078. error = sys_setsockopt(td, &bsd_args);
  1079. bsd_to_linux_sockaddr((struct bsd_sockaddr *)bsd_args.val);
  1080. } else
  1081. #endif
  1082. error = sys_setsockopt(s, level, name, val, valsize);
  1083. return (error);
  1084. }
  1085. int
  1086. linux_getsockopt(int s, int level, int name, void *val, socklen_t *valsize)
  1087. {
  1088. int error;
  1089. switch (level) {
  1090. case SOL_SOCKET:
  1091. name = linux_to_bsd_so_sockopt(name);
  1092. break;
  1093. case IPPROTO_IP:
  1094. name = linux_to_bsd_ip_sockopt(name);
  1095. break;
  1096. case IPPROTO_TCP:
  1097. name = linux_to_bsd_tcp_sockopt(name);
  1098. break;
  1099. default:
  1100. name = -1;
  1101. break;
  1102. }
  1103. if (name == -1)
  1104. return (EINVAL);
  1105. /* FIXME: OSv - enable when we have IPv6 */
  1106. #if 0
  1107. if (name == IPV6_NEXTHOP) {
  1108. error = sys_getsockopt(td, &bsd_args);
  1109. bsd_to_linux_sockaddr((struct bsd_sockaddr *)bsd_args.val);
  1110. } else
  1111. #endif
  1112. error = sys_getsockopt(s, level, name, val, valsize);
  1113. return (error);
  1114. }