PageRenderTime 9ms CodeModel.GetById 61ms app.highlight 56ms RepoModel.GetById 1ms app.codeStats 1ms

/openvswitch/lib/socket-util.c

https://github.com/kevinfhell/dpdk-ovs
C | 1395 lines | 1026 code | 161 blank | 208 comment | 228 complexity | 301dd0a4d2b7b47ca7e860691dbfd79b MD5 | raw file
   1/*
   2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
   3 *
   4 * Licensed under the Apache License, Version 2.0 (the "License");
   5 * you may not use this file except in compliance with the License.
   6 * You may obtain a copy of the License at:
   7 *
   8 *     http://www.apache.org/licenses/LICENSE-2.0
   9 *
  10 * Unless required by applicable law or agreed to in writing, software
  11 * distributed under the License is distributed on an "AS IS" BASIS,
  12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 * See the License for the specific language governing permissions and
  14 * limitations under the License.
  15 */
  16
  17#include <config.h>
  18#include "socket-util.h"
  19#include <arpa/inet.h>
  20#include <errno.h>
  21#include <fcntl.h>
  22#include <net/if.h>
  23#include <netdb.h>
  24#include <poll.h>
  25#include <stddef.h>
  26#include <stdio.h>
  27#include <stdlib.h>
  28#include <string.h>
  29#include <sys/ioctl.h>
  30#include <sys/resource.h>
  31#include <sys/socket.h>
  32#include <sys/stat.h>
  33#include <sys/uio.h>
  34#include <sys/un.h>
  35#include <unistd.h>
  36#include "dynamic-string.h"
  37#include "fatal-signal.h"
  38#include "packets.h"
  39#include "poll-loop.h"
  40#include "util.h"
  41#include "vlog.h"
  42#if AF_PACKET && LINUX_DATAPATH
  43#include <linux/if_packet.h>
  44#endif
  45#ifdef HAVE_NETLINK
  46#include "netlink-protocol.h"
  47#include "netlink-socket.h"
  48#endif
  49
  50VLOG_DEFINE_THIS_MODULE(socket_util);
  51
  52/* #ifdefs make it a pain to maintain code: you have to try to build both ways.
  53 * Thus, this file compiles all of the code regardless of the target, by
  54 * writing "if (LINUX_DATAPATH)" instead of "#ifdef __linux__". */
  55#ifndef LINUX_DATAPATH
  56#define LINUX_DATAPATH 0
  57#endif
  58
  59#ifndef O_DIRECTORY
  60#define O_DIRECTORY 0
  61#endif
  62
  63static int getsockopt_int(int fd, int level, int option, const char *optname,
  64                          int *valuep);
  65
  66/* Sets 'fd' to non-blocking mode.  Returns 0 if successful, otherwise a
  67 * positive errno value. */
  68int
  69set_nonblocking(int fd)
  70{
  71    int flags = fcntl(fd, F_GETFL, 0);
  72    if (flags != -1) {
  73        if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) != -1) {
  74            return 0;
  75        } else {
  76            VLOG_ERR("fcntl(F_SETFL) failed: %s", ovs_strerror(errno));
  77            return errno;
  78        }
  79    } else {
  80        VLOG_ERR("fcntl(F_GETFL) failed: %s", ovs_strerror(errno));
  81        return errno;
  82    }
  83}
  84
  85void
  86xset_nonblocking(int fd)
  87{
  88    if (set_nonblocking(fd)) {
  89        exit(EXIT_FAILURE);
  90    }
  91}
  92
  93int
  94set_dscp(int fd, uint8_t dscp)
  95{
  96    int val;
  97
  98    if (dscp > 63) {
  99        return EINVAL;
 100    }
 101
 102    val = dscp << 2;
 103    if (setsockopt(fd, IPPROTO_IP, IP_TOS, &val, sizeof val)) {
 104        return errno;
 105    }
 106
 107    return 0;
 108}
 109
 110static bool
 111rlim_is_finite(rlim_t limit)
 112{
 113    if (limit == RLIM_INFINITY) {
 114        return false;
 115    }
 116
 117#ifdef RLIM_SAVED_CUR           /* FreeBSD 8.0 lacks RLIM_SAVED_CUR. */
 118    if (limit == RLIM_SAVED_CUR) {
 119        return false;
 120    }
 121#endif
 122
 123#ifdef RLIM_SAVED_MAX           /* FreeBSD 8.0 lacks RLIM_SAVED_MAX. */
 124    if (limit == RLIM_SAVED_MAX) {
 125        return false;
 126    }
 127#endif
 128
 129    return true;
 130}
 131
 132/* Returns the maximum valid FD value, plus 1. */
 133int
 134get_max_fds(void)
 135{
 136    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
 137    static int max_fds;
 138
 139    if (ovsthread_once_start(&once)) {
 140        struct rlimit r;
 141        if (!getrlimit(RLIMIT_NOFILE, &r) && rlim_is_finite(r.rlim_cur)) {
 142            max_fds = r.rlim_cur;
 143        } else {
 144            VLOG_WARN("failed to obtain fd limit, defaulting to 1024");
 145            max_fds = 1024;
 146        }
 147        ovsthread_once_done(&once);
 148    }
 149
 150    return max_fds;
 151}
 152
 153/* Translates 'host_name', which must be a string representation of an IP
 154 * address, into a numeric IP address in '*addr'.  Returns 0 if successful,
 155 * otherwise a positive errno value. */
 156int
 157lookup_ip(const char *host_name, struct in_addr *addr)
 158{
 159    if (!inet_aton(host_name, addr)) {
 160        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 161        VLOG_ERR_RL(&rl, "\"%s\" is not a valid IP address", host_name);
 162        return ENOENT;
 163    }
 164    return 0;
 165}
 166
 167/* Translates 'host_name', which must be a string representation of an IPv6
 168 * address, into a numeric IPv6 address in '*addr'.  Returns 0 if successful,
 169 * otherwise a positive errno value. */
 170int
 171lookup_ipv6(const char *host_name, struct in6_addr *addr)
 172{
 173    if (inet_pton(AF_INET6, host_name, addr) != 1) {
 174        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 175        VLOG_ERR_RL(&rl, "\"%s\" is not a valid IPv6 address", host_name);
 176        return ENOENT;
 177    }
 178    return 0;
 179}
 180
 181/* Translates 'host_name', which must be a host name or a string representation
 182 * of an IP address, into a numeric IP address in '*addr'.  Returns 0 if
 183 * successful, otherwise a positive errno value.
 184 *
 185 * Most Open vSwitch code should not use this because it causes deadlocks:
 186 * getaddrinfo() sends out a DNS request but that starts a new flow for which
 187 * OVS must set up a flow, but it can't because it's waiting for a DNS reply.
 188 * The synchronous lookup also delays other activity.  (Of course we can solve
 189 * this but it doesn't seem worthwhile quite yet.)  */
 190int
 191lookup_hostname(const char *host_name, struct in_addr *addr)
 192{
 193    struct addrinfo *result;
 194    struct addrinfo hints;
 195
 196    if (inet_aton(host_name, addr)) {
 197        return 0;
 198    }
 199
 200    memset(&hints, 0, sizeof hints);
 201    hints.ai_family = AF_INET;
 202
 203    switch (getaddrinfo(host_name, NULL, &hints, &result)) {
 204    case 0:
 205        *addr = ALIGNED_CAST(struct sockaddr_in *,
 206                             result->ai_addr)->sin_addr;
 207        freeaddrinfo(result);
 208        return 0;
 209
 210#ifdef EAI_ADDRFAMILY
 211    case EAI_ADDRFAMILY:
 212#endif
 213    case EAI_NONAME:
 214    case EAI_SERVICE:
 215        return ENOENT;
 216
 217    case EAI_AGAIN:
 218        return EAGAIN;
 219
 220    case EAI_BADFLAGS:
 221    case EAI_FAMILY:
 222    case EAI_SOCKTYPE:
 223        return EINVAL;
 224
 225    case EAI_FAIL:
 226        return EIO;
 227
 228    case EAI_MEMORY:
 229        return ENOMEM;
 230
 231#ifdef EAI_NODATA
 232    case EAI_NODATA:
 233        return ENXIO;
 234#endif
 235
 236    case EAI_SYSTEM:
 237        return errno;
 238
 239    default:
 240        return EPROTO;
 241    }
 242}
 243
 244int
 245check_connection_completion(int fd)
 246{
 247    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 10);
 248    struct pollfd pfd;
 249    int retval;
 250
 251    pfd.fd = fd;
 252    pfd.events = POLLOUT;
 253    do {
 254        retval = poll(&pfd, 1, 0);
 255    } while (retval < 0 && errno == EINTR);
 256    if (retval == 1) {
 257        if (pfd.revents & POLLERR) {
 258            ssize_t n = send(fd, "", 1, MSG_DONTWAIT);
 259            if (n < 0) {
 260                return errno;
 261            } else {
 262                VLOG_ERR_RL(&rl, "poll return POLLERR but send succeeded");
 263                return EPROTO;
 264            }
 265        }
 266        return 0;
 267    } else if (retval < 0) {
 268        VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(errno));
 269        return errno;
 270    } else {
 271        return EAGAIN;
 272    }
 273}
 274
 275/* Drain all the data currently in the receive queue of a datagram socket (and
 276 * possibly additional data).  There is no way to know how many packets are in
 277 * the receive queue, but we do know that the total number of bytes queued does
 278 * not exceed the receive buffer size, so we pull packets until none are left
 279 * or we've read that many bytes. */
 280int
 281drain_rcvbuf(int fd)
 282{
 283    int rcvbuf;
 284
 285    rcvbuf = get_socket_rcvbuf(fd);
 286    if (rcvbuf < 0) {
 287        return -rcvbuf;
 288    }
 289
 290    while (rcvbuf > 0) {
 291        /* In Linux, specifying MSG_TRUNC in the flags argument causes the
 292         * datagram length to be returned, even if that is longer than the
 293         * buffer provided.  Thus, we can use a 1-byte buffer to discard the
 294         * incoming datagram and still be able to account how many bytes were
 295         * removed from the receive buffer.
 296         *
 297         * On other Unix-like OSes, MSG_TRUNC has no effect in the flags
 298         * argument. */
 299        char buffer[LINUX_DATAPATH ? 1 : 2048];
 300        ssize_t n_bytes = recv(fd, buffer, sizeof buffer,
 301                               MSG_TRUNC | MSG_DONTWAIT);
 302        if (n_bytes <= 0 || n_bytes >= rcvbuf) {
 303            break;
 304        }
 305        rcvbuf -= n_bytes;
 306    }
 307    return 0;
 308}
 309
 310/* Returns the size of socket 'sock''s receive buffer (SO_RCVBUF), or a
 311 * negative errno value if an error occurs. */
 312int
 313get_socket_rcvbuf(int sock)
 314{
 315    int rcvbuf;
 316    int error;
 317
 318    error = getsockopt_int(sock, SOL_SOCKET, SO_RCVBUF, "SO_RCVBUF", &rcvbuf);
 319    return error ? -error : rcvbuf;
 320}
 321
 322/* Reads and discards up to 'n' datagrams from 'fd', stopping as soon as no
 323 * more data can be immediately read.  ('fd' should therefore be in
 324 * non-blocking mode.)*/
 325void
 326drain_fd(int fd, size_t n_packets)
 327{
 328    for (; n_packets > 0; n_packets--) {
 329        /* 'buffer' only needs to be 1 byte long in most circumstances.  This
 330         * size is defensive against the possibility that we someday want to
 331         * use a Linux tap device without TUN_NO_PI, in which case a buffer
 332         * smaller than sizeof(struct tun_pi) will give EINVAL on read. */
 333        char buffer[128];
 334        if (read(fd, buffer, sizeof buffer) <= 0) {
 335            break;
 336        }
 337    }
 338}
 339
 340/* Stores in '*un' a sockaddr_un that refers to file 'name'.  Stores in
 341 * '*un_len' the size of the sockaddr_un. */
 342static void
 343make_sockaddr_un__(const char *name, struct sockaddr_un *un, socklen_t *un_len)
 344{
 345    un->sun_family = AF_UNIX;
 346    ovs_strzcpy(un->sun_path, name, sizeof un->sun_path);
 347    *un_len = (offsetof(struct sockaddr_un, sun_path)
 348                + strlen (un->sun_path) + 1);
 349}
 350
 351/* Stores in '*un' a sockaddr_un that refers to file 'name'.  Stores in
 352 * '*un_len' the size of the sockaddr_un.
 353 *
 354 * Returns 0 on success, otherwise a positive errno value.  On success,
 355 * '*dirfdp' is either -1 or a nonnegative file descriptor that the caller
 356 * should close after using '*un' to bind or connect.  On failure, '*dirfdp' is
 357 * -1. */
 358static int
 359make_sockaddr_un(const char *name, struct sockaddr_un *un, socklen_t *un_len,
 360                 int *dirfdp)
 361{
 362    enum { MAX_UN_LEN = sizeof un->sun_path - 1 };
 363
 364    *dirfdp = -1;
 365    if (strlen(name) > MAX_UN_LEN) {
 366        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
 367
 368        if (LINUX_DATAPATH) {
 369            /* 'name' is too long to fit in a sockaddr_un, but we have a
 370             * workaround for that on Linux: shorten it by opening a file
 371             * descriptor for the directory part of the name and indirecting
 372             * through /proc/self/fd/<dirfd>/<basename>. */
 373            char *dir, *base;
 374            char *short_name;
 375            int dirfd;
 376
 377            dir = dir_name(name);
 378            base = base_name(name);
 379
 380            dirfd = open(dir, O_DIRECTORY | O_RDONLY);
 381            if (dirfd < 0) {
 382                free(base);
 383                free(dir);
 384                return errno;
 385            }
 386
 387            short_name = xasprintf("/proc/self/fd/%d/%s", dirfd, base);
 388            free(dir);
 389            free(base);
 390
 391            if (strlen(short_name) <= MAX_UN_LEN) {
 392                make_sockaddr_un__(short_name, un, un_len);
 393                free(short_name);
 394                *dirfdp = dirfd;
 395                return 0;
 396            }
 397            free(short_name);
 398            close(dirfd);
 399
 400            VLOG_WARN_RL(&rl, "Unix socket name %s is longer than maximum "
 401                         "%d bytes (even shortened)", name, MAX_UN_LEN);
 402        } else {
 403            /* 'name' is too long and we have no workaround. */
 404            VLOG_WARN_RL(&rl, "Unix socket name %s is longer than maximum "
 405                         "%d bytes", name, MAX_UN_LEN);
 406        }
 407
 408        return ENAMETOOLONG;
 409    } else {
 410        make_sockaddr_un__(name, un, un_len);
 411        return 0;
 412    }
 413}
 414
 415/* Binds Unix domain socket 'fd' to a file with permissions 0700. */
 416static int
 417bind_unix_socket(int fd, struct sockaddr *sun, socklen_t sun_len)
 418{
 419    /* According to _Unix Network Programming_, umask should affect bind(). */
 420    mode_t old_umask = umask(0077);
 421    int error = bind(fd, sun, sun_len) ? errno : 0;
 422    umask(old_umask);
 423    return error;
 424}
 425
 426/* Creates a Unix domain socket in the given 'style' (either SOCK_DGRAM or
 427 * SOCK_STREAM) that is bound to '*bind_path' (if 'bind_path' is non-null) and
 428 * connected to '*connect_path' (if 'connect_path' is non-null).  If 'nonblock'
 429 * is true, the socket is made non-blocking.
 430 *
 431 * Returns the socket's fd if successful, otherwise a negative errno value. */
 432int
 433make_unix_socket(int style, bool nonblock,
 434                 const char *bind_path, const char *connect_path)
 435{
 436    int error;
 437    int fd;
 438
 439    fd = socket(PF_UNIX, style, 0);
 440    if (fd < 0) {
 441        return -errno;
 442    }
 443
 444    /* Set nonblocking mode right away, if we want it.  This prevents blocking
 445     * in connect(), if connect_path != NULL.  (In turn, that's a corner case:
 446     * it will only happen if style is SOCK_STREAM or SOCK_SEQPACKET, and only
 447     * if a backlog of un-accepted connections has built up in the kernel.)  */
 448    if (nonblock) {
 449        error = set_nonblocking(fd);
 450        if (error) {
 451            goto error;
 452        }
 453    }
 454
 455    if (bind_path) {
 456        struct sockaddr_un un;
 457        socklen_t un_len;
 458        int dirfd;
 459
 460        if (unlink(bind_path) && errno != ENOENT) {
 461            VLOG_WARN("unlinking \"%s\": %s\n",
 462                      bind_path, ovs_strerror(errno));
 463        }
 464        fatal_signal_add_file_to_unlink(bind_path);
 465
 466        error = make_sockaddr_un(bind_path, &un, &un_len, &dirfd);
 467        if (!error) {
 468            error = bind_unix_socket(fd, (struct sockaddr *) &un, un_len);
 469        }
 470        if (dirfd >= 0) {
 471            close(dirfd);
 472        }
 473        if (error) {
 474            goto error;
 475        }
 476    }
 477
 478    if (connect_path) {
 479        struct sockaddr_un un;
 480        socklen_t un_len;
 481        int dirfd;
 482
 483        error = make_sockaddr_un(connect_path, &un, &un_len, &dirfd);
 484        if (!error
 485            && connect(fd, (struct sockaddr*) &un, un_len)
 486            && errno != EINPROGRESS) {
 487            error = errno;
 488        }
 489        if (dirfd >= 0) {
 490            close(dirfd);
 491        }
 492        if (error) {
 493            goto error;
 494        }
 495    }
 496
 497    return fd;
 498
 499error:
 500    if (error == EAGAIN) {
 501        error = EPROTO;
 502    }
 503    if (bind_path) {
 504        fatal_signal_unlink_file_now(bind_path);
 505    }
 506    close(fd);
 507    return -error;
 508}
 509
 510int
 511get_unix_name_len(socklen_t sun_len)
 512{
 513    return (sun_len >= offsetof(struct sockaddr_un, sun_path)
 514            ? sun_len - offsetof(struct sockaddr_un, sun_path)
 515            : 0);
 516}
 517
 518ovs_be32
 519guess_netmask(ovs_be32 ip_)
 520{
 521    uint32_t ip = ntohl(ip_);
 522    return ((ip >> 31) == 0 ? htonl(0xff000000)   /* Class A */
 523            : (ip >> 30) == 2 ? htonl(0xffff0000) /* Class B */
 524            : (ip >> 29) == 6 ? htonl(0xffffff00) /* Class C */
 525            : htonl(0));                          /* ??? */
 526}
 527
 528/* Parses 'target', which should be a string in the format "<host>[:<port>]".
 529 * <host> is required.  If 'default_port' is nonzero then <port> is optional
 530 * and defaults to 'default_port'.
 531 *
 532 * On success, returns true and stores the parsed remote address into '*sinp'.
 533 * On failure, logs an error, stores zeros into '*sinp', and returns false. */
 534bool
 535inet_parse_active(const char *target_, uint16_t default_port,
 536                  struct sockaddr_in *sinp)
 537{
 538    char *target = xstrdup(target_);
 539    char *save_ptr = NULL;
 540    const char *host_name;
 541    const char *port_string;
 542    bool ok = false;
 543
 544    /* Defaults. */
 545    sinp->sin_family = AF_INET;
 546    sinp->sin_port = htons(default_port);
 547
 548    /* Tokenize. */
 549    host_name = strtok_r(target, ":", &save_ptr);
 550    port_string = strtok_r(NULL, ":", &save_ptr);
 551    if (!host_name) {
 552        VLOG_ERR("%s: bad peer name format", target_);
 553        goto exit;
 554    }
 555
 556    /* Look up IP, port. */
 557    if (lookup_ip(host_name, &sinp->sin_addr)) {
 558        goto exit;
 559    }
 560    if (port_string && atoi(port_string)) {
 561        sinp->sin_port = htons(atoi(port_string));
 562    } else if (!default_port) {
 563        VLOG_ERR("%s: port number must be specified", target_);
 564        goto exit;
 565    }
 566
 567    ok = true;
 568
 569exit:
 570    if (!ok) {
 571        memset(sinp, 0, sizeof *sinp);
 572    }
 573    free(target);
 574    return ok;
 575}
 576
 577/* Opens a non-blocking IPv4 socket of the specified 'style' and connects to
 578 * 'target', which should be a string in the format "<host>[:<port>]".  <host>
 579 * is required.  If 'default_port' is nonzero then <port> is optional and
 580 * defaults to 'default_port'.
 581 *
 582 * 'style' should be SOCK_STREAM (for TCP) or SOCK_DGRAM (for UDP).
 583 *
 584 * On success, returns 0 (indicating connection complete) or EAGAIN (indicating
 585 * connection in progress), in which case the new file descriptor is stored
 586 * into '*fdp'.  On failure, returns a positive errno value other than EAGAIN
 587 * and stores -1 into '*fdp'.
 588 *
 589 * If 'sinp' is non-null, then on success the target address is stored into
 590 * '*sinp'.
 591 *
 592 * 'dscp' becomes the DSCP bits in the IP headers for the new connection.  It
 593 * should be in the range [0, 63] and will automatically be shifted to the
 594 * appropriately place in the IP tos field. */
 595int
 596inet_open_active(int style, const char *target, uint16_t default_port,
 597                 struct sockaddr_in *sinp, int *fdp, uint8_t dscp)
 598{
 599    struct sockaddr_in sin;
 600    int fd = -1;
 601    int error;
 602
 603    /* Parse. */
 604    if (!inet_parse_active(target, default_port, &sin)) {
 605        error = EAFNOSUPPORT;
 606        goto exit;
 607    }
 608
 609    /* Create non-blocking socket. */
 610    fd = socket(AF_INET, style, 0);
 611    if (fd < 0) {
 612        VLOG_ERR("%s: socket: %s", target, ovs_strerror(errno));
 613        error = errno;
 614        goto exit;
 615    }
 616    error = set_nonblocking(fd);
 617    if (error) {
 618        goto exit;
 619    }
 620
 621    /* The dscp bits must be configured before connect() to ensure that the TOS
 622     * field is set during the connection establishment.  If set after
 623     * connect(), the handshake SYN frames will be sent with a TOS of 0. */
 624    error = set_dscp(fd, dscp);
 625    if (error) {
 626        VLOG_ERR("%s: socket: %s", target, ovs_strerror(error));
 627        goto exit;
 628    }
 629
 630    /* Connect. */
 631    error = connect(fd, (struct sockaddr *) &sin, sizeof sin) == 0 ? 0 : errno;
 632    if (error == EINPROGRESS) {
 633        error = EAGAIN;
 634    }
 635
 636exit:
 637    if (!error || error == EAGAIN) {
 638        if (sinp) {
 639            *sinp = sin;
 640        }
 641    } else if (fd >= 0) {
 642        close(fd);
 643        fd = -1;
 644    }
 645    *fdp = fd;
 646    return error;
 647}
 648
 649/* Parses 'target', which should be a string in the format "[<port>][:<ip>]":
 650 *
 651 *      - If 'default_port' is -1, then <port> is required.  Otherwise, if
 652 *        <port> is omitted, then 'default_port' is used instead.
 653 *
 654 *      - If <port> (or 'default_port', if used) is 0, then no port is bound
 655 *        and the TCP/IP stack will select a port.
 656 *
 657 *      - If <ip> is omitted then the IP address is wildcarded.
 658 *
 659 * If successful, stores the address into '*sinp' and returns true; otherwise
 660 * zeros '*sinp' and returns false. */
 661bool
 662inet_parse_passive(const char *target_, int default_port,
 663                   struct sockaddr_in *sinp)
 664{
 665    char *target = xstrdup(target_);
 666    char *string_ptr = target;
 667    const char *host_name;
 668    const char *port_string;
 669    bool ok = false;
 670    int port;
 671
 672    /* Address defaults. */
 673    memset(sinp, 0, sizeof *sinp);
 674    sinp->sin_family = AF_INET;
 675    sinp->sin_addr.s_addr = htonl(INADDR_ANY);
 676    sinp->sin_port = htons(default_port);
 677
 678    /* Parse optional port number. */
 679    port_string = strsep(&string_ptr, ":");
 680    if (port_string && str_to_int(port_string, 10, &port)) {
 681        sinp->sin_port = htons(port);
 682    } else if (default_port < 0) {
 683        VLOG_ERR("%s: port number must be specified", target_);
 684        goto exit;
 685    }
 686
 687    /* Parse optional bind IP. */
 688    host_name = strsep(&string_ptr, ":");
 689    if (host_name && host_name[0] && lookup_ip(host_name, &sinp->sin_addr)) {
 690        goto exit;
 691    }
 692
 693    ok = true;
 694
 695exit:
 696    if (!ok) {
 697        memset(sinp, 0, sizeof *sinp);
 698    }
 699    free(target);
 700    return ok;
 701}
 702
 703
 704/* Opens a non-blocking IPv4 socket of the specified 'style', binds to
 705 * 'target', and listens for incoming connections.  Parses 'target' in the same
 706 * way was inet_parse_passive().
 707 *
 708 * 'style' should be SOCK_STREAM (for TCP) or SOCK_DGRAM (for UDP).
 709 *
 710 * For TCP, the socket will have SO_REUSEADDR turned on.
 711 *
 712 * On success, returns a non-negative file descriptor.  On failure, returns a
 713 * negative errno value.
 714 *
 715 * If 'sinp' is non-null, then on success the bound address is stored into
 716 * '*sinp'.
 717 *
 718 * 'dscp' becomes the DSCP bits in the IP headers for the new connection.  It
 719 * should be in the range [0, 63] and will automatically be shifted to the
 720 * appropriately place in the IP tos field. */
 721int
 722inet_open_passive(int style, const char *target, int default_port,
 723                  struct sockaddr_in *sinp, uint8_t dscp)
 724{
 725    bool kernel_chooses_port;
 726    struct sockaddr_in sin;
 727    int fd = 0, error;
 728    unsigned int yes = 1;
 729
 730    if (!inet_parse_passive(target, default_port, &sin)) {
 731        return -EAFNOSUPPORT;
 732    }
 733
 734    /* Create non-blocking socket, set SO_REUSEADDR. */
 735    fd = socket(AF_INET, style, 0);
 736    if (fd < 0) {
 737        error = errno;
 738        VLOG_ERR("%s: socket: %s", target, ovs_strerror(error));
 739        return -error;
 740    }
 741    error = set_nonblocking(fd);
 742    if (error) {
 743        goto error;
 744    }
 745    if (style == SOCK_STREAM
 746        && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof yes) < 0) {
 747        error = errno;
 748        VLOG_ERR("%s: setsockopt(SO_REUSEADDR): %s",
 749                 target, ovs_strerror(error));
 750        goto error;
 751    }
 752
 753    /* Bind. */
 754    if (bind(fd, (struct sockaddr *) &sin, sizeof sin) < 0) {
 755        error = errno;
 756        VLOG_ERR("%s: bind: %s", target, ovs_strerror(error));
 757        goto error;
 758    }
 759
 760    /* The dscp bits must be configured before connect() to ensure that the TOS
 761     * field is set during the connection establishment.  If set after
 762     * connect(), the handshake SYN frames will be sent with a TOS of 0. */
 763    error = set_dscp(fd, dscp);
 764    if (error) {
 765        VLOG_ERR("%s: socket: %s", target, ovs_strerror(error));
 766        goto error;
 767    }
 768
 769    /* Listen. */
 770    if (style == SOCK_STREAM && listen(fd, 10) < 0) {
 771        error = errno;
 772        VLOG_ERR("%s: listen: %s", target, ovs_strerror(error));
 773        goto error;
 774    }
 775
 776    kernel_chooses_port = sin.sin_port == htons(0);
 777    if (sinp || kernel_chooses_port) {
 778        socklen_t sin_len = sizeof sin;
 779        if (getsockname(fd, (struct sockaddr *) &sin, &sin_len) < 0) {
 780            error = errno;
 781            VLOG_ERR("%s: getsockname: %s", target, ovs_strerror(error));
 782            goto error;
 783        }
 784        if (sin.sin_family != AF_INET || sin_len != sizeof sin) {
 785            error = EAFNOSUPPORT;
 786            VLOG_ERR("%s: getsockname: invalid socket name", target);
 787            goto error;
 788        }
 789        if (sinp) {
 790            *sinp = sin;
 791        }
 792        if (kernel_chooses_port) {
 793            VLOG_INFO("%s: listening on port %"PRIu16,
 794                      target, ntohs(sin.sin_port));
 795        }
 796    }
 797
 798    return fd;
 799
 800error:
 801    close(fd);
 802    return -error;
 803}
 804
 805/* Returns a readable and writable fd for /dev/null, if successful, otherwise
 806 * a negative errno value.  The caller must not close the returned fd (because
 807 * the same fd will be handed out to subsequent callers). */
 808int
 809get_null_fd(void)
 810{
 811    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
 812    static int null_fd;
 813
 814    if (ovsthread_once_start(&once)) {
 815        null_fd = open("/dev/null", O_RDWR);
 816        if (null_fd < 0) {
 817            int error = errno;
 818            VLOG_ERR("could not open /dev/null: %s", ovs_strerror(error));
 819            null_fd = -error;
 820        }
 821        ovsthread_once_done(&once);
 822    }
 823
 824    return null_fd;
 825}
 826
 827int
 828read_fully(int fd, void *p_, size_t size, size_t *bytes_read)
 829{
 830    uint8_t *p = p_;
 831
 832    *bytes_read = 0;
 833    while (size > 0) {
 834        ssize_t retval = read(fd, p, size);
 835        if (retval > 0) {
 836            *bytes_read += retval;
 837            size -= retval;
 838            p += retval;
 839        } else if (retval == 0) {
 840            return EOF;
 841        } else if (errno != EINTR) {
 842            return errno;
 843        }
 844    }
 845    return 0;
 846}
 847
 848int
 849write_fully(int fd, const void *p_, size_t size, size_t *bytes_written)
 850{
 851    const uint8_t *p = p_;
 852
 853    *bytes_written = 0;
 854    while (size > 0) {
 855        ssize_t retval = write(fd, p, size);
 856        if (retval > 0) {
 857            *bytes_written += retval;
 858            size -= retval;
 859            p += retval;
 860        } else if (retval == 0) {
 861            VLOG_WARN("write returned 0");
 862            return EPROTO;
 863        } else if (errno != EINTR) {
 864            return errno;
 865        }
 866    }
 867    return 0;
 868}
 869
 870/* Given file name 'file_name', fsyncs the directory in which it is contained.
 871 * Returns 0 if successful, otherwise a positive errno value. */
 872int
 873fsync_parent_dir(const char *file_name)
 874{
 875    int error = 0;
 876    char *dir;
 877    int fd;
 878
 879    dir = dir_name(file_name);
 880    fd = open(dir, O_RDONLY);
 881    if (fd >= 0) {
 882        if (fsync(fd)) {
 883            if (errno == EINVAL || errno == EROFS) {
 884                /* This directory does not support synchronization.  Not
 885                 * really an error. */
 886            } else {
 887                error = errno;
 888                VLOG_ERR("%s: fsync failed (%s)", dir, ovs_strerror(error));
 889            }
 890        }
 891        close(fd);
 892    } else {
 893        error = errno;
 894        VLOG_ERR("%s: open failed (%s)", dir, ovs_strerror(error));
 895    }
 896    free(dir);
 897
 898    return error;
 899}
 900
 901/* Obtains the modification time of the file named 'file_name' to the greatest
 902 * supported precision.  If successful, stores the mtime in '*mtime' and
 903 * returns 0.  On error, returns a positive errno value and stores zeros in
 904 * '*mtime'. */
 905int
 906get_mtime(const char *file_name, struct timespec *mtime)
 907{
 908    struct stat s;
 909
 910    if (!stat(file_name, &s)) {
 911        mtime->tv_sec = s.st_mtime;
 912
 913#if HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
 914        mtime->tv_nsec = s.st_mtim.tv_nsec;
 915#elif HAVE_STRUCT_STAT_ST_MTIMENSEC
 916        mtime->tv_nsec = s.st_mtimensec;
 917#else
 918        mtime->tv_nsec = 0;
 919#endif
 920
 921        return 0;
 922    } else {
 923        mtime->tv_sec = mtime->tv_nsec = 0;
 924        return errno;
 925    }
 926}
 927
 928void
 929xpipe(int fds[2])
 930{
 931    if (pipe(fds)) {
 932        VLOG_FATAL("failed to create pipe (%s)", ovs_strerror(errno));
 933    }
 934}
 935
 936void
 937xpipe_nonblocking(int fds[2])
 938{
 939    xpipe(fds);
 940    xset_nonblocking(fds[0]);
 941    xset_nonblocking(fds[1]);
 942}
 943
 944void
 945xsocketpair(int domain, int type, int protocol, int fds[2])
 946{
 947    if (socketpair(domain, type, protocol, fds)) {
 948        VLOG_FATAL("failed to create socketpair (%s)", ovs_strerror(errno));
 949    }
 950}
 951
 952static int
 953getsockopt_int(int fd, int level, int option, const char *optname, int *valuep)
 954{
 955    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 10);
 956    socklen_t len;
 957    int value;
 958    int error;
 959
 960    len = sizeof value;
 961    if (getsockopt(fd, level, option, &value, &len)) {
 962        error = errno;
 963        VLOG_ERR_RL(&rl, "getsockopt(%s): %s", optname, ovs_strerror(error));
 964    } else if (len != sizeof value) {
 965        error = EINVAL;
 966        VLOG_ERR_RL(&rl, "getsockopt(%s): value is %u bytes (expected %zu)",
 967                    optname, (unsigned int) len, sizeof value);
 968    } else {
 969        error = 0;
 970    }
 971
 972    *valuep = error ? 0 : value;
 973    return error;
 974}
 975
 976static void
 977describe_sockaddr(struct ds *string, int fd,
 978                  int (*getaddr)(int, struct sockaddr *, socklen_t *))
 979{
 980    struct sockaddr_storage ss;
 981    socklen_t len = sizeof ss;
 982
 983    if (!getaddr(fd, (struct sockaddr *) &ss, &len)) {
 984        if (ss.ss_family == AF_INET) {
 985            struct sockaddr_in sin;
 986
 987            memcpy(&sin, &ss, sizeof sin);
 988            ds_put_format(string, IP_FMT":%"PRIu16,
 989                          IP_ARGS(sin.sin_addr.s_addr), ntohs(sin.sin_port));
 990        } else if (ss.ss_family == AF_UNIX) {
 991            struct sockaddr_un sun;
 992            const char *null;
 993            size_t maxlen;
 994
 995            memcpy(&sun, &ss, sizeof sun);
 996            maxlen = len - offsetof(struct sockaddr_un, sun_path);
 997            null = memchr(sun.sun_path, '\0', maxlen);
 998            ds_put_buffer(string, sun.sun_path,
 999                          null ? null - sun.sun_path : maxlen);
1000        }
1001#ifdef HAVE_NETLINK
1002        else if (ss.ss_family == AF_NETLINK) {
1003            int protocol;
1004
1005/* SO_PROTOCOL was introduced in 2.6.32.  Support it regardless of the version
1006 * of the Linux kernel headers in use at build time. */
1007#ifndef SO_PROTOCOL
1008#define SO_PROTOCOL 38
1009#endif
1010
1011            if (!getsockopt_int(fd, SOL_SOCKET, SO_PROTOCOL, "SO_PROTOCOL",
1012                                &protocol)) {
1013                switch (protocol) {
1014                case NETLINK_ROUTE:
1015                    ds_put_cstr(string, "NETLINK_ROUTE");
1016                    break;
1017
1018                case NETLINK_GENERIC:
1019                    ds_put_cstr(string, "NETLINK_GENERIC");
1020                    break;
1021
1022                default:
1023                    ds_put_format(string, "AF_NETLINK family %d", protocol);
1024                    break;
1025                }
1026            } else {
1027                ds_put_cstr(string, "AF_NETLINK");
1028            }
1029        }
1030#endif
1031#if AF_PACKET && LINUX_DATAPATH
1032        else if (ss.ss_family == AF_PACKET) {
1033            struct sockaddr_ll sll;
1034
1035            memcpy(&sll, &ss, sizeof sll);
1036            ds_put_cstr(string, "AF_PACKET");
1037            if (sll.sll_ifindex) {
1038                char name[IFNAMSIZ];
1039
1040                if (if_indextoname(sll.sll_ifindex, name)) {
1041                    ds_put_format(string, "(%s)", name);
1042                } else {
1043                    ds_put_format(string, "(ifindex=%d)", sll.sll_ifindex);
1044                }
1045            }
1046            if (sll.sll_protocol) {
1047                ds_put_format(string, "(protocol=0x%"PRIu16")",
1048                              ntohs(sll.sll_protocol));
1049            }
1050        }
1051#endif
1052        else if (ss.ss_family == AF_UNSPEC) {
1053            ds_put_cstr(string, "AF_UNSPEC");
1054        } else {
1055            ds_put_format(string, "AF_%d", (int) ss.ss_family);
1056        }
1057    }
1058}
1059
1060
1061#ifdef LINUX_DATAPATH
1062static void
1063put_fd_filename(struct ds *string, int fd)
1064{
1065    char buf[1024];
1066    char *linkname;
1067    int n;
1068
1069    linkname = xasprintf("/proc/self/fd/%d", fd);
1070    n = readlink(linkname, buf, sizeof buf);
1071    if (n > 0) {
1072        ds_put_char(string, ' ');
1073        ds_put_buffer(string, buf, n);
1074        if (n > sizeof buf) {
1075            ds_put_cstr(string, "...");
1076        }
1077    }
1078    free(linkname);
1079}
1080#endif
1081
1082/* Returns a malloc()'d string describing 'fd', for use in logging. */
1083char *
1084describe_fd(int fd)
1085{
1086    struct ds string;
1087    struct stat s;
1088
1089    ds_init(&string);
1090    if (fstat(fd, &s)) {
1091        ds_put_format(&string, "fstat failed (%s)", ovs_strerror(errno));
1092    } else if (S_ISSOCK(s.st_mode)) {
1093        describe_sockaddr(&string, fd, getsockname);
1094        ds_put_cstr(&string, "<->");
1095        describe_sockaddr(&string, fd, getpeername);
1096    } else {
1097        ds_put_cstr(&string, (isatty(fd) ? "tty"
1098                              : S_ISDIR(s.st_mode) ? "directory"
1099                              : S_ISCHR(s.st_mode) ? "character device"
1100                              : S_ISBLK(s.st_mode) ? "block device"
1101                              : S_ISREG(s.st_mode) ? "file"
1102                              : S_ISFIFO(s.st_mode) ? "FIFO"
1103                              : S_ISLNK(s.st_mode) ? "symbolic link"
1104                              : "unknown"));
1105#ifdef LINUX_DATAPATH
1106        put_fd_filename(&string, fd);
1107#endif
1108    }
1109    return ds_steal_cstr(&string);
1110}
1111
1112/* Returns the total of the 'iov_len' members of the 'n_iovs' in 'iovs'.
1113 * The caller must ensure that the total does not exceed SIZE_MAX. */
1114size_t
1115iovec_len(const struct iovec iovs[], size_t n_iovs)
1116{
1117    size_t len = 0;
1118    size_t i;
1119
1120    for (i = 0; i < n_iovs; i++) {
1121        len += iovs[i].iov_len;
1122    }
1123    return len;
1124}
1125
1126/* Returns true if all of the 'n_iovs' iovecs in 'iovs' have length zero. */
1127bool
1128iovec_is_empty(const struct iovec iovs[], size_t n_iovs)
1129{
1130    size_t i;
1131
1132    for (i = 0; i < n_iovs; i++) {
1133        if (iovs[i].iov_len) {
1134            return false;
1135        }
1136    }
1137    return true;
1138}
1139
1140/* Sends the 'n_iovs' iovecs of data in 'iovs' and the 'n_fds' file descriptors
1141 * in 'fds' on Unix domain socket 'sock'.  Returns the number of bytes
1142 * successfully sent or -1 if an error occurred.  On error, sets errno
1143 * appropriately.  */
1144int
1145send_iovec_and_fds(int sock,
1146                   const struct iovec *iovs, size_t n_iovs,
1147                   const int fds[], size_t n_fds)
1148{
1149    ovs_assert(sock >= 0);
1150    if (n_fds > 0) {
1151        union {
1152            struct cmsghdr cm;
1153            char control[CMSG_SPACE(SOUTIL_MAX_FDS * sizeof *fds)];
1154        } cmsg;
1155        struct msghdr msg;
1156
1157        ovs_assert(!iovec_is_empty(iovs, n_iovs));
1158        ovs_assert(n_fds <= SOUTIL_MAX_FDS);
1159
1160        memset(&cmsg, 0, sizeof cmsg);
1161        cmsg.cm.cmsg_len = CMSG_LEN(n_fds * sizeof *fds);
1162        cmsg.cm.cmsg_level = SOL_SOCKET;
1163        cmsg.cm.cmsg_type = SCM_RIGHTS;
1164        memcpy(CMSG_DATA(&cmsg.cm), fds, n_fds * sizeof *fds);
1165
1166        msg.msg_name = NULL;
1167        msg.msg_namelen = 0;
1168        msg.msg_iov = CONST_CAST(struct iovec *, iovs);
1169        msg.msg_iovlen = n_iovs;
1170        msg.msg_control = &cmsg.cm;
1171        msg.msg_controllen = CMSG_SPACE(n_fds * sizeof *fds);
1172        msg.msg_flags = 0;
1173
1174        return sendmsg(sock, &msg, 0);
1175    } else {
1176        return writev(sock, iovs, n_iovs);
1177    }
1178}
1179
1180/* Sends the 'n_iovs' iovecs of data in 'iovs' and the 'n_fds' file descriptors
1181 * in 'fds' on Unix domain socket 'sock'.  If 'skip_bytes' is nonzero, then the
1182 * first 'skip_bytes' of data in the iovecs are not sent, and none of the file
1183 * descriptors are sent.  The function continues to retry sending until an
1184 * error (other than EINTR) occurs or all the data and fds are sent.
1185 *
1186 * Returns 0 if all the data and fds were successfully sent, otherwise a
1187 * positive errno value.  Regardless of success, stores the number of bytes
1188 * sent (always at least 'skip_bytes') in '*bytes_sent'.  (If at least one byte
1189 * is sent, then all the fds have been sent.)
1190 *
1191 * 'skip_bytes' must be less than or equal to iovec_len(iovs, n_iovs). */
1192int
1193send_iovec_and_fds_fully(int sock,
1194                         const struct iovec iovs[], size_t n_iovs,
1195                         const int fds[], size_t n_fds,
1196                         size_t skip_bytes, size_t *bytes_sent)
1197{
1198    *bytes_sent = 0;
1199    while (n_iovs > 0) {
1200        int retval;
1201
1202        if (skip_bytes) {
1203            retval = skip_bytes;
1204            skip_bytes = 0;
1205        } else if (!*bytes_sent) {
1206            retval = send_iovec_and_fds(sock, iovs, n_iovs, fds, n_fds);
1207        } else {
1208            retval = writev(sock, iovs, n_iovs);
1209        }
1210
1211        if (retval > 0) {
1212            *bytes_sent += retval;
1213            while (retval > 0) {
1214                const uint8_t *base = iovs->iov_base;
1215                size_t len = iovs->iov_len;
1216
1217                if (retval < len) {
1218                    size_t sent;
1219                    int error;
1220
1221                    error = write_fully(sock, base + retval, len - retval,
1222                                        &sent);
1223                    *bytes_sent += sent;
1224                    retval += sent;
1225                    if (error) {
1226                        return error;
1227                    }
1228                }
1229                retval -= len;
1230                iovs++;
1231                n_iovs--;
1232            }
1233        } else if (retval == 0) {
1234            if (iovec_is_empty(iovs, n_iovs)) {
1235                break;
1236            }
1237            VLOG_WARN("send returned 0");
1238            return EPROTO;
1239        } else if (errno != EINTR) {
1240            return errno;
1241        }
1242    }
1243
1244    return 0;
1245}
1246
1247/* Sends the 'n_iovs' iovecs of data in 'iovs' and the 'n_fds' file descriptors
1248 * in 'fds' on Unix domain socket 'sock'.  The function continues to retry
1249 * sending until an error (other than EAGAIN or EINTR) occurs or all the data
1250 * and fds are sent.  Upon EAGAIN, the function blocks until the socket is
1251 * ready for more data.
1252 *
1253 * Returns 0 if all the data and fds were successfully sent, otherwise a
1254 * positive errno value. */
1255int
1256send_iovec_and_fds_fully_block(int sock,
1257                               const struct iovec iovs[], size_t n_iovs,
1258                               const int fds[], size_t n_fds)
1259{
1260    size_t sent = 0;
1261
1262    for (;;) {
1263        int error;
1264
1265        error = send_iovec_and_fds_fully(sock, iovs, n_iovs,
1266                                         fds, n_fds, sent, &sent);
1267        if (error != EAGAIN) {
1268            return error;
1269        }
1270        poll_fd_wait(sock, POLLOUT);
1271        poll_block();
1272    }
1273}
1274
1275/* Attempts to receive from Unix domain socket 'sock' up to 'size' bytes of
1276 * data into 'data' and up to SOUTIL_MAX_FDS file descriptors into 'fds'.
1277 *
1278 *      - Upon success, returns the number of bytes of data copied into 'data'
1279 *        and stores the number of received file descriptors into '*n_fdsp'.
1280 *
1281 *      - On failure, returns a negative errno value and stores 0 in
1282 *        '*n_fdsp'.
1283 *
1284 *      - On EOF, returns 0 and stores 0 in '*n_fdsp'. */
1285int
1286recv_data_and_fds(int sock,
1287                  void *data, size_t size,
1288                  int fds[SOUTIL_MAX_FDS], size_t *n_fdsp)
1289{
1290    union {
1291        struct cmsghdr cm;
1292        char control[CMSG_SPACE(SOUTIL_MAX_FDS * sizeof *fds)];
1293    } cmsg;
1294    struct msghdr msg;
1295    int retval;
1296    struct cmsghdr *p;
1297    size_t i;
1298
1299    *n_fdsp = 0;
1300
1301    do {
1302        struct iovec iov;
1303
1304        iov.iov_base = data;
1305        iov.iov_len = size;
1306
1307        msg.msg_name = NULL;
1308        msg.msg_namelen = 0;
1309        msg.msg_iov = &iov;
1310        msg.msg_iovlen = 1;
1311        msg.msg_control = &cmsg.cm;
1312        msg.msg_controllen = sizeof cmsg.control;
1313        msg.msg_flags = 0;
1314
1315        retval = recvmsg(sock, &msg, 0);
1316    } while (retval < 0 && errno == EINTR);
1317    if (retval <= 0) {
1318        return retval < 0 ? -errno : 0;
1319    }
1320
1321    for (p = CMSG_FIRSTHDR(&msg); p; p = CMSG_NXTHDR(&msg, p)) {
1322        if (p->cmsg_level != SOL_SOCKET || p->cmsg_type != SCM_RIGHTS) {
1323            VLOG_ERR("unexpected control message %d:%d",
1324                     p->cmsg_level, p->cmsg_type);
1325            goto error;
1326        } else if (*n_fdsp) {
1327            VLOG_ERR("multiple SCM_RIGHTS received");
1328            goto error;
1329        } else {
1330            size_t n_fds = (p->cmsg_len - CMSG_LEN(0)) / sizeof *fds;
1331            const int *fds_data = ALIGNED_CAST(const int *, CMSG_DATA(p));
1332
1333            ovs_assert(n_fds > 0);
1334            if (n_fds > SOUTIL_MAX_FDS) {
1335                VLOG_ERR("%zu fds received but only %d supported",
1336                         n_fds, SOUTIL_MAX_FDS);
1337                for (i = 0; i < n_fds; i++) {
1338                    close(fds_data[i]);
1339                }
1340                goto error;
1341            }
1342
1343            *n_fdsp = n_fds;
1344            memcpy(fds, fds_data, n_fds * sizeof *fds);
1345        }
1346    }
1347
1348    return retval;
1349
1350error:
1351    for (i = 0; i < *n_fdsp; i++) {
1352        close(fds[i]);
1353    }
1354    *n_fdsp = 0;
1355    return EPROTO;
1356}
1357
1358/* Calls ioctl() on an AF_INET sock, passing the specified 'command' and
1359 * 'arg'.  Returns 0 if successful, otherwise a positive errno value. */
1360int
1361af_inet_ioctl(unsigned long int command, const void *arg)
1362{
1363    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
1364    static int sock;
1365
1366    if (ovsthread_once_start(&once)) {
1367        sock = socket(AF_INET, SOCK_DGRAM, 0);
1368        if (sock < 0) {
1369            sock = -errno;
1370            VLOG_ERR("failed to create inet socket: %s", ovs_strerror(errno));
1371        }
1372        ovsthread_once_done(&once);
1373    }
1374
1375    return (sock < 0 ? -sock
1376            : ioctl(sock, command, arg) == -1 ? errno
1377            : 0);
1378}
1379
1380int
1381af_inet_ifreq_ioctl(const char *name, struct ifreq *ifr, unsigned long int cmd,
1382                    const char *cmd_name)
1383{
1384    int error;
1385
1386    ovs_strzcpy(ifr->ifr_name, name, sizeof ifr->ifr_name);
1387    error = af_inet_ioctl(cmd, ifr);
1388    if (error) {
1389        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
1390        VLOG_DBG_RL(&rl, "%s: ioctl(%s) failed: %s", name, cmd_name,
1391                    ovs_strerror(error));
1392    }
1393    return error;
1394}
1395