PageRenderTime 85ms CodeModel.GetById 2ms app.highlight 75ms RepoModel.GetById 1ms app.codeStats 0ms

/src/unix/stream.c

http://github.com/joyent/libuv
C | 1056 lines | 692 code | 227 blank | 137 comment | 202 complexity | cc3dfe3aec8148b7bc24f2aba4665f7c MD5 | raw file
   1/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
   2 *
   3 * Permission is hereby granted, free of charge, to any person obtaining a copy
   4 * of this software and associated documentation files (the "Software"), to
   5 * deal in the Software without restriction, including without limitation the
   6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
   7 * sell copies of the Software, and to permit persons to whom the Software is
   8 * furnished to do so, subject to the following conditions:
   9 *
  10 * The above copyright notice and this permission notice shall be included in
  11 * all copies or substantial portions of the Software.
  12 *
  13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  19 * IN THE SOFTWARE.
  20 */
  21
  22#include "uv.h"
  23#include "internal.h"
  24
  25#include <stdio.h>
  26#include <stdlib.h>
  27#include <string.h>
  28#include <assert.h>
  29#include <errno.h>
  30
  31#include <sys/types.h>
  32#include <sys/socket.h>
  33#include <sys/uio.h>
  34#include <sys/un.h>
  35#include <unistd.h>
  36
  37
  38static void uv__stream_connect(uv_stream_t*);
  39static void uv__write(uv_stream_t* stream);
  40static void uv__read(uv_stream_t* stream);
  41static void uv__stream_io(uv_loop_t* loop, uv__io_t* w, unsigned int events);
  42
  43
  44/* Used by the accept() EMFILE party trick. */
  45static int uv__open_cloexec(const char* path, int flags) {
  46  int fd;
  47
  48#if defined(__linux__)
  49  fd = open(path, flags | UV__O_CLOEXEC);
  50  if (fd != -1)
  51    return fd;
  52
  53  if (errno != EINVAL)
  54    return -1;
  55
  56  /* O_CLOEXEC not supported. */
  57#endif
  58
  59  fd = open(path, flags);
  60  if (fd != -1)
  61    uv__cloexec(fd, 1);
  62
  63  return fd;
  64}
  65
  66
  67static size_t uv__buf_count(uv_buf_t bufs[], int bufcnt) {
  68  size_t total = 0;
  69  int i;
  70
  71  for (i = 0; i < bufcnt; i++) {
  72    total += bufs[i].len;
  73  }
  74
  75  return total;
  76}
  77
  78
  79void uv__stream_init(uv_loop_t* loop,
  80                     uv_stream_t* stream,
  81                     uv_handle_type type) {
  82  uv__handle_init(loop, (uv_handle_t*)stream, type);
  83  stream->read_cb = NULL;
  84  stream->read2_cb = NULL;
  85  stream->alloc_cb = NULL;
  86  stream->close_cb = NULL;
  87  stream->connection_cb = NULL;
  88  stream->connect_req = NULL;
  89  stream->shutdown_req = NULL;
  90  stream->accepted_fd = -1;
  91  stream->delayed_error = 0;
  92  ngx_queue_init(&stream->write_queue);
  93  ngx_queue_init(&stream->write_completed_queue);
  94  stream->write_queue_size = 0;
  95
  96  if (loop->emfile_fd == -1)
  97    loop->emfile_fd = uv__open_cloexec("/", O_RDONLY);
  98
  99  uv__io_init(&stream->io_watcher, uv__stream_io, -1);
 100}
 101
 102
 103int uv__stream_open(uv_stream_t* stream, int fd, int flags) {
 104  socklen_t yes;
 105
 106  assert(fd >= 0);
 107  stream->flags |= flags;
 108
 109  if (stream->type == UV_TCP) {
 110    /* Reuse the port address if applicable. */
 111    yes = 1;
 112
 113    if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof yes) == -1)
 114      return uv__set_sys_error(stream->loop, errno);
 115
 116    if ((stream->flags & UV_TCP_NODELAY) && uv__tcp_nodelay(fd, 1))
 117      return uv__set_sys_error(stream->loop, errno);
 118
 119    /* TODO Use delay the user passed in. */
 120    if ((stream->flags & UV_TCP_KEEPALIVE) && uv__tcp_keepalive(fd, 1, 60))
 121      return uv__set_sys_error(stream->loop, errno);
 122  }
 123
 124  stream->io_watcher.fd = fd;
 125
 126  return 0;
 127}
 128
 129
 130void uv__stream_destroy(uv_stream_t* stream) {
 131  uv_write_t* req;
 132  ngx_queue_t* q;
 133
 134  assert(!uv__io_active(&stream->io_watcher, UV__POLLIN | UV__POLLOUT));
 135  assert(stream->flags & UV_CLOSED);
 136
 137  if (stream->connect_req) {
 138    uv__req_unregister(stream->loop, stream->connect_req);
 139    uv__set_artificial_error(stream->loop, UV_ECANCELED);
 140    stream->connect_req->cb(stream->connect_req, -1);
 141    stream->connect_req = NULL;
 142  }
 143
 144  while (!ngx_queue_empty(&stream->write_queue)) {
 145    q = ngx_queue_head(&stream->write_queue);
 146    ngx_queue_remove(q);
 147
 148    req = ngx_queue_data(q, uv_write_t, queue);
 149    uv__req_unregister(stream->loop, req);
 150
 151    if (req->bufs != req->bufsml)
 152      free(req->bufs);
 153
 154    if (req->cb) {
 155      uv__set_artificial_error(req->handle->loop, UV_ECANCELED);
 156      req->cb(req, -1);
 157    }
 158  }
 159
 160  while (!ngx_queue_empty(&stream->write_completed_queue)) {
 161    q = ngx_queue_head(&stream->write_completed_queue);
 162    ngx_queue_remove(q);
 163
 164    req = ngx_queue_data(q, uv_write_t, queue);
 165    uv__req_unregister(stream->loop, req);
 166
 167    if (req->cb) {
 168      uv__set_sys_error(stream->loop, req->error);
 169      req->cb(req, req->error ? -1 : 0);
 170    }
 171  }
 172
 173  if (stream->shutdown_req) {
 174    uv__req_unregister(stream->loop, stream->shutdown_req);
 175    uv__set_artificial_error(stream->loop, UV_ECANCELED);
 176    stream->shutdown_req->cb(stream->shutdown_req, -1);
 177    stream->shutdown_req = NULL;
 178  }
 179}
 180
 181
 182/* Implements a best effort approach to mitigating accept() EMFILE errors.
 183 * We have a spare file descriptor stashed away that we close to get below
 184 * the EMFILE limit. Next, we accept all pending connections and close them
 185 * immediately to signal the clients that we're overloaded - and we are, but
 186 * we still keep on trucking.
 187 *
 188 * There is one caveat: it's not reliable in a multi-threaded environment.
 189 * The file descriptor limit is per process. Our party trick fails if another
 190 * thread opens a file or creates a socket in the time window between us
 191 * calling close() and accept().
 192 */
 193static int uv__emfile_trick(uv_loop_t* loop, int accept_fd) {
 194  int fd;
 195  int r;
 196
 197  if (loop->emfile_fd == -1)
 198    return -1;
 199
 200  close(loop->emfile_fd);
 201
 202  for (;;) {
 203    fd = uv__accept(accept_fd);
 204
 205    if (fd != -1) {
 206      close(fd);
 207      continue;
 208    }
 209
 210    if (errno == EINTR)
 211      continue;
 212
 213    if (errno == EAGAIN || errno == EWOULDBLOCK)
 214      r = 0;
 215    else
 216      r = -1;
 217
 218    loop->emfile_fd = uv__open_cloexec("/", O_RDONLY);
 219
 220    return r;
 221  }
 222}
 223
 224
 225void uv__server_io(uv_loop_t* loop, uv__io_t* w, unsigned int events) {
 226  static int use_emfile_trick = -1;
 227  uv_stream_t* stream;
 228  int fd;
 229  int r;
 230
 231  stream = container_of(w, uv_stream_t, io_watcher);
 232  assert(events == UV__POLLIN);
 233  assert(stream->accepted_fd == -1);
 234  assert(!(stream->flags & UV_CLOSING));
 235
 236  if (stream->accepted_fd == -1)
 237    uv__io_start(stream->loop, &stream->io_watcher, UV__POLLIN);
 238
 239  /* connection_cb can close the server socket while we're
 240   * in the loop so check it on each iteration.
 241   */
 242  while (stream->io_watcher.fd != -1) {
 243    assert(stream->accepted_fd == -1);
 244    fd = uv__accept(stream->io_watcher.fd);
 245
 246    if (fd == -1) {
 247      switch (errno) {
 248#if EWOULDBLOCK != EAGAIN
 249      case EWOULDBLOCK:
 250#endif
 251      case EAGAIN:
 252        return; /* Not an error. */
 253
 254      case ECONNABORTED:
 255        continue; /* Ignore. */
 256
 257      case EMFILE:
 258      case ENFILE:
 259        if (use_emfile_trick == -1) {
 260          const char* val = getenv("UV_ACCEPT_EMFILE_TRICK");
 261          use_emfile_trick = (val == NULL || atoi(val) != 0);
 262        }
 263
 264        if (use_emfile_trick) {
 265          SAVE_ERRNO(r = uv__emfile_trick(loop, stream->io_watcher.fd));
 266          if (r == 0)
 267            continue;
 268        }
 269
 270        /* Fall through. */
 271
 272      default:
 273        uv__set_sys_error(loop, errno);
 274        stream->connection_cb(stream, -1);
 275        continue;
 276      }
 277    }
 278
 279    stream->accepted_fd = fd;
 280    stream->connection_cb(stream, 0);
 281
 282    if (stream->accepted_fd != -1) {
 283      /* The user hasn't yet accepted called uv_accept() */
 284      uv__io_stop(loop, &stream->io_watcher, UV__POLLIN);
 285      return;
 286    }
 287
 288    if (stream->type == UV_TCP && (stream->flags & UV_TCP_SINGLE_ACCEPT)) {
 289      /* Give other processes a chance to accept connections. */
 290      struct timespec timeout = { 0, 1 };
 291      nanosleep(&timeout, NULL);
 292    }
 293  }
 294}
 295
 296
 297int uv_accept(uv_stream_t* server, uv_stream_t* client) {
 298  uv_stream_t* streamServer;
 299  uv_stream_t* streamClient;
 300  int saved_errno;
 301  int status;
 302
 303  /* TODO document this */
 304  assert(server->loop == client->loop);
 305
 306  saved_errno = errno;
 307  status = -1;
 308
 309  streamServer = (uv_stream_t*)server;
 310  streamClient = (uv_stream_t*)client;
 311
 312  if (streamServer->accepted_fd < 0) {
 313    uv__set_sys_error(server->loop, EAGAIN);
 314    goto out;
 315  }
 316
 317  if (uv__stream_open(streamClient, streamServer->accepted_fd,
 318        UV_STREAM_READABLE | UV_STREAM_WRITABLE)) {
 319    /* TODO handle error */
 320    close(streamServer->accepted_fd);
 321    streamServer->accepted_fd = -1;
 322    goto out;
 323  }
 324
 325  uv__io_start(streamServer->loop, &streamServer->io_watcher, UV__POLLIN);
 326  streamServer->accepted_fd = -1;
 327  status = 0;
 328
 329out:
 330  errno = saved_errno;
 331  return status;
 332}
 333
 334
 335int uv_listen(uv_stream_t* stream, int backlog, uv_connection_cb cb) {
 336  int r;
 337
 338  switch (stream->type) {
 339    case UV_TCP:
 340      r = uv_tcp_listen((uv_tcp_t*)stream, backlog, cb);
 341      break;
 342
 343    case UV_NAMED_PIPE:
 344      r = uv_pipe_listen((uv_pipe_t*)stream, backlog, cb);
 345      break;
 346
 347    default:
 348      assert(0);
 349      return -1;
 350  }
 351
 352  if (r == 0)
 353    uv__handle_start(stream);
 354
 355  return r;
 356}
 357
 358
 359uv_write_t* uv_write_queue_head(uv_stream_t* stream) {
 360  ngx_queue_t* q;
 361  uv_write_t* req;
 362
 363  if (ngx_queue_empty(&stream->write_queue)) {
 364    return NULL;
 365  }
 366
 367  q = ngx_queue_head(&stream->write_queue);
 368  if (!q) {
 369    return NULL;
 370  }
 371
 372  req = ngx_queue_data(q, struct uv_write_s, queue);
 373  assert(req);
 374
 375  return req;
 376}
 377
 378
 379static void uv__drain(uv_stream_t* stream) {
 380  uv_shutdown_t* req;
 381
 382  assert(!uv_write_queue_head(stream));
 383  assert(stream->write_queue_size == 0);
 384
 385  uv__io_stop(stream->loop, &stream->io_watcher, UV__POLLOUT);
 386
 387  /* Shutdown? */
 388  if ((stream->flags & UV_STREAM_SHUTTING) &&
 389      !(stream->flags & UV_CLOSING) &&
 390      !(stream->flags & UV_STREAM_SHUT)) {
 391    assert(stream->shutdown_req);
 392
 393    req = stream->shutdown_req;
 394    stream->shutdown_req = NULL;
 395    uv__req_unregister(stream->loop, req);
 396
 397    if (shutdown(stream->io_watcher.fd, SHUT_WR)) {
 398      /* Error. Report it. User should call uv_close(). */
 399      uv__set_sys_error(stream->loop, errno);
 400      if (req->cb) {
 401        req->cb(req, -1);
 402      }
 403    } else {
 404      uv__set_sys_error(stream->loop, 0);
 405      ((uv_handle_t*) stream)->flags |= UV_STREAM_SHUT;
 406      if (req->cb) {
 407        req->cb(req, 0);
 408      }
 409    }
 410  }
 411}
 412
 413
 414static size_t uv__write_req_size(uv_write_t* req) {
 415  size_t size;
 416
 417  size = uv__buf_count(req->bufs + req->write_index,
 418                       req->bufcnt - req->write_index);
 419  assert(req->handle->write_queue_size >= size);
 420
 421  return size;
 422}
 423
 424
 425static void uv__write_req_finish(uv_write_t* req) {
 426  uv_stream_t* stream = req->handle;
 427
 428  /* Pop the req off tcp->write_queue. */
 429  ngx_queue_remove(&req->queue);
 430  if (req->bufs != req->bufsml) {
 431    free(req->bufs);
 432  }
 433  req->bufs = NULL;
 434
 435  /* Add it to the write_completed_queue where it will have its
 436   * callback called in the near future.
 437   */
 438  ngx_queue_insert_tail(&stream->write_completed_queue, &req->queue);
 439  uv__io_feed(stream->loop, &stream->io_watcher);
 440}
 441
 442
 443/* On success returns NULL. On error returns a pointer to the write request
 444 * which had the error.
 445 */
 446static void uv__write(uv_stream_t* stream) {
 447  uv_write_t* req;
 448  struct iovec* iov;
 449  int iovcnt;
 450  ssize_t n;
 451
 452  if (stream->flags & UV_CLOSING) {
 453    /* Handle was closed this tick. We've received a stale
 454     * 'is writable' callback from the event loop, ignore.
 455     */
 456    return;
 457  }
 458
 459start:
 460
 461  assert(stream->io_watcher.fd >= 0);
 462
 463  /* Get the request at the head of the queue. */
 464  req = uv_write_queue_head(stream);
 465  if (!req) {
 466    assert(stream->write_queue_size == 0);
 467    return;
 468  }
 469
 470  assert(req->handle == stream);
 471
 472  /*
 473   * Cast to iovec. We had to have our own uv_buf_t instead of iovec
 474   * because Windows's WSABUF is not an iovec.
 475   */
 476  assert(sizeof(uv_buf_t) == sizeof(struct iovec));
 477  iov = (struct iovec*) &(req->bufs[req->write_index]);
 478  iovcnt = req->bufcnt - req->write_index;
 479
 480  /*
 481   * Now do the actual writev. Note that we've been updating the pointers
 482   * inside the iov each time we write. So there is no need to offset it.
 483   */
 484
 485  if (req->send_handle) {
 486    struct msghdr msg;
 487    char scratch[64];
 488    struct cmsghdr *cmsg;
 489    int fd_to_send = req->send_handle->io_watcher.fd;
 490
 491    assert(fd_to_send >= 0);
 492
 493    msg.msg_name = NULL;
 494    msg.msg_namelen = 0;
 495    msg.msg_iov = iov;
 496    msg.msg_iovlen = iovcnt;
 497    msg.msg_flags = 0;
 498
 499    msg.msg_control = (void*) scratch;
 500    msg.msg_controllen = CMSG_LEN(sizeof(fd_to_send));
 501
 502    cmsg = CMSG_FIRSTHDR(&msg);
 503    cmsg->cmsg_level = SOL_SOCKET;
 504    cmsg->cmsg_type = SCM_RIGHTS;
 505    cmsg->cmsg_len = msg.msg_controllen;
 506
 507    /* silence aliasing warning */
 508    {
 509      void* pv = CMSG_DATA(cmsg);
 510      int* pi = pv;
 511      *pi = fd_to_send;
 512    }
 513
 514    do {
 515      n = sendmsg(stream->io_watcher.fd, &msg, 0);
 516    }
 517    while (n == -1 && errno == EINTR);
 518  } else {
 519    do {
 520      if (iovcnt == 1) {
 521        n = write(stream->io_watcher.fd, iov[0].iov_base, iov[0].iov_len);
 522      } else {
 523        n = writev(stream->io_watcher.fd, iov, iovcnt);
 524      }
 525    }
 526    while (n == -1 && errno == EINTR);
 527  }
 528
 529  if (n < 0) {
 530    if (errno != EAGAIN && errno != EWOULDBLOCK) {
 531      /* Error */
 532      req->error = errno;
 533      stream->write_queue_size -= uv__write_req_size(req);
 534      uv__write_req_finish(req);
 535      return;
 536    } else if (stream->flags & UV_STREAM_BLOCKING) {
 537      /* If this is a blocking stream, try again. */
 538      goto start;
 539    }
 540  } else {
 541    /* Successful write */
 542
 543    while (n >= 0) {
 544      uv_buf_t* buf = &(req->bufs[req->write_index]);
 545      size_t len = buf->len;
 546
 547      assert(req->write_index < req->bufcnt);
 548
 549      if ((size_t)n < len) {
 550        buf->base += n;
 551        buf->len -= n;
 552        stream->write_queue_size -= n;
 553        n = 0;
 554
 555        /* There is more to write. */
 556        if (stream->flags & UV_STREAM_BLOCKING) {
 557          /*
 558           * If we're blocking then we should not be enabling the write
 559           * watcher - instead we need to try again.
 560           */
 561          goto start;
 562        } else {
 563          /* Break loop and ensure the watcher is pending. */
 564          break;
 565        }
 566
 567      } else {
 568        /* Finished writing the buf at index req->write_index. */
 569        req->write_index++;
 570
 571        assert((size_t)n >= len);
 572        n -= len;
 573
 574        assert(stream->write_queue_size >= len);
 575        stream->write_queue_size -= len;
 576
 577        if (req->write_index == req->bufcnt) {
 578          /* Then we're done! */
 579          assert(n == 0);
 580          uv__write_req_finish(req);
 581          /* TODO: start trying to write the next request. */
 582          return;
 583        }
 584      }
 585    }
 586  }
 587
 588  /* Either we've counted n down to zero or we've got EAGAIN. */
 589  assert(n == 0 || n == -1);
 590
 591  /* Only non-blocking streams should use the write_watcher. */
 592  assert(!(stream->flags & UV_STREAM_BLOCKING));
 593
 594  /* We're not done. */
 595  uv__io_start(stream->loop, &stream->io_watcher, UV__POLLOUT);
 596}
 597
 598
 599static void uv__write_callbacks(uv_stream_t* stream) {
 600  uv_write_t* req;
 601  ngx_queue_t* q;
 602
 603  while (!ngx_queue_empty(&stream->write_completed_queue)) {
 604    /* Pop a req off write_completed_queue. */
 605    q = ngx_queue_head(&stream->write_completed_queue);
 606    req = ngx_queue_data(q, uv_write_t, queue);
 607    ngx_queue_remove(q);
 608    uv__req_unregister(stream->loop, req);
 609
 610    /* NOTE: call callback AFTER freeing the request data. */
 611    if (req->cb) {
 612      uv__set_sys_error(stream->loop, req->error);
 613      req->cb(req, req->error ? -1 : 0);
 614    }
 615  }
 616
 617  assert(ngx_queue_empty(&stream->write_completed_queue));
 618
 619  /* Write queue drained. */
 620  if (!uv_write_queue_head(stream)) {
 621    uv__drain(stream);
 622  }
 623}
 624
 625
 626static uv_handle_type uv__handle_type(int fd) {
 627  struct sockaddr_storage ss;
 628  socklen_t len;
 629
 630  memset(&ss, 0, sizeof(ss));
 631  len = sizeof(ss);
 632
 633  if (getsockname(fd, (struct sockaddr*)&ss, &len))
 634    return UV_UNKNOWN_HANDLE;
 635
 636  switch (ss.ss_family) {
 637  case AF_UNIX:
 638    return UV_NAMED_PIPE;
 639  case AF_INET:
 640  case AF_INET6:
 641    return UV_TCP;
 642  }
 643
 644  return UV_UNKNOWN_HANDLE;
 645}
 646
 647
 648static void uv__read(uv_stream_t* stream) {
 649  uv_buf_t buf;
 650  ssize_t nread;
 651  struct msghdr msg;
 652  struct cmsghdr* cmsg;
 653  char cmsg_space[64];
 654  int count;
 655
 656  /* Prevent loop starvation when the data comes in as fast as (or faster than)
 657   * we can read it. XXX Need to rearm fd if we switch to edge-triggered I/O.
 658   */
 659  count = 32;
 660
 661  /* XXX: Maybe instead of having UV_STREAM_READING we just test if
 662   * tcp->read_cb is NULL or not?
 663   */
 664  while ((stream->read_cb || stream->read2_cb)
 665      && (stream->flags & UV_STREAM_READING)
 666      && (count-- > 0)) {
 667    assert(stream->alloc_cb);
 668    buf = stream->alloc_cb((uv_handle_t*)stream, 64 * 1024);
 669
 670    assert(buf.len > 0);
 671    assert(buf.base);
 672    assert(stream->io_watcher.fd >= 0);
 673
 674    if (stream->read_cb) {
 675      do {
 676        nread = read(stream->io_watcher.fd, buf.base, buf.len);
 677      }
 678      while (nread < 0 && errno == EINTR);
 679    } else {
 680      assert(stream->read2_cb);
 681      /* read2_cb uses recvmsg */
 682      msg.msg_flags = 0;
 683      msg.msg_iov = (struct iovec*) &buf;
 684      msg.msg_iovlen = 1;
 685      msg.msg_name = NULL;
 686      msg.msg_namelen = 0;
 687      /* Set up to receive a descriptor even if one isn't in the message */
 688      msg.msg_controllen = 64;
 689      msg.msg_control = (void *) cmsg_space;
 690
 691      do {
 692        nread = recvmsg(stream->io_watcher.fd, &msg, 0);
 693      }
 694      while (nread < 0 && errno == EINTR);
 695    }
 696
 697
 698    if (nread < 0) {
 699      /* Error */
 700      if (errno == EAGAIN || errno == EWOULDBLOCK) {
 701        /* Wait for the next one. */
 702        if (stream->flags & UV_STREAM_READING) {
 703          uv__io_start(stream->loop, &stream->io_watcher, UV__POLLIN);
 704        }
 705        uv__set_sys_error(stream->loop, EAGAIN);
 706
 707        if (stream->read_cb) {
 708          stream->read_cb(stream, 0, buf);
 709        } else {
 710          stream->read2_cb((uv_pipe_t*)stream, 0, buf, UV_UNKNOWN_HANDLE);
 711        }
 712
 713        return;
 714      } else {
 715        /* Error. User should call uv_close(). */
 716        uv__set_sys_error(stream->loop, errno);
 717
 718        if (stream->read_cb) {
 719          stream->read_cb(stream, -1, buf);
 720        } else {
 721          stream->read2_cb((uv_pipe_t*)stream, -1, buf, UV_UNKNOWN_HANDLE);
 722        }
 723
 724        assert(!uv__io_active(&stream->io_watcher, UV__POLLIN));
 725        return;
 726      }
 727
 728    } else if (nread == 0) {
 729      /* EOF */
 730      uv__set_artificial_error(stream->loop, UV_EOF);
 731      uv__io_stop(stream->loop, &stream->io_watcher, UV__POLLIN);
 732
 733      if (!uv__io_active(&stream->io_watcher, UV__POLLOUT))
 734        uv__handle_stop(stream);
 735
 736      if (stream->read_cb) {
 737        stream->read_cb(stream, -1, buf);
 738      } else {
 739        stream->read2_cb((uv_pipe_t*)stream, -1, buf, UV_UNKNOWN_HANDLE);
 740      }
 741      return;
 742    } else {
 743      /* Successful read */
 744      ssize_t buflen = buf.len;
 745
 746      if (stream->read_cb) {
 747        stream->read_cb(stream, nread, buf);
 748      } else {
 749        assert(stream->read2_cb);
 750
 751        /*
 752         * XXX: Some implementations can send multiple file descriptors in a
 753         * single message. We should be using CMSG_NXTHDR() to walk the
 754         * chain to get at them all. This would require changing the API to
 755         * hand these back up the caller, is a pain.
 756         */
 757
 758        for (cmsg = CMSG_FIRSTHDR(&msg);
 759             msg.msg_controllen > 0 && cmsg != NULL;
 760             cmsg = CMSG_NXTHDR(&msg, cmsg)) {
 761
 762          if (cmsg->cmsg_type == SCM_RIGHTS) {
 763            if (stream->accepted_fd != -1) {
 764              fprintf(stderr, "(libuv) ignoring extra FD received\n");
 765            }
 766
 767            /* silence aliasing warning */
 768            {
 769              void* pv = CMSG_DATA(cmsg);
 770              int* pi = pv;
 771              stream->accepted_fd = *pi;
 772            }
 773
 774          } else {
 775            fprintf(stderr, "ignoring non-SCM_RIGHTS ancillary data: %d\n",
 776                cmsg->cmsg_type);
 777          }
 778        }
 779
 780
 781        if (stream->accepted_fd >= 0) {
 782          stream->read2_cb((uv_pipe_t*)stream, nread, buf,
 783              uv__handle_type(stream->accepted_fd));
 784        } else {
 785          stream->read2_cb((uv_pipe_t*)stream, nread, buf, UV_UNKNOWN_HANDLE);
 786        }
 787      }
 788
 789      /* Return if we didn't fill the buffer, there is no more data to read. */
 790      if (nread < buflen) {
 791        return;
 792      }
 793    }
 794  }
 795}
 796
 797
 798int uv_shutdown(uv_shutdown_t* req, uv_stream_t* stream, uv_shutdown_cb cb) {
 799  assert((stream->type == UV_TCP || stream->type == UV_NAMED_PIPE) &&
 800         "uv_shutdown (unix) only supports uv_handle_t right now");
 801  assert(stream->io_watcher.fd >= 0);
 802
 803  if (!(stream->flags & UV_STREAM_WRITABLE) ||
 804      stream->flags & UV_STREAM_SHUT ||
 805      stream->flags & UV_CLOSED ||
 806      stream->flags & UV_CLOSING) {
 807    uv__set_artificial_error(stream->loop, UV_ENOTCONN);
 808    return -1;
 809  }
 810
 811  /* Initialize request */
 812  uv__req_init(stream->loop, req, UV_SHUTDOWN);
 813  req->handle = stream;
 814  req->cb = cb;
 815  stream->shutdown_req = req;
 816  stream->flags |= UV_STREAM_SHUTTING;
 817
 818  uv__io_start(stream->loop, &stream->io_watcher, UV__POLLOUT);
 819
 820  return 0;
 821}
 822
 823
 824static void uv__stream_io(uv_loop_t* loop, uv__io_t* w, unsigned int events) {
 825  uv_stream_t* stream;
 826
 827  stream = container_of(w, uv_stream_t, io_watcher);
 828
 829  assert(stream->type == UV_TCP ||
 830         stream->type == UV_NAMED_PIPE ||
 831         stream->type == UV_TTY);
 832  assert(!(stream->flags & UV_CLOSING));
 833
 834  if (stream->connect_req) {
 835    uv__stream_connect(stream);
 836    return;
 837  }
 838
 839  if (events & UV__POLLIN) {
 840    assert(stream->io_watcher.fd >= 0);
 841
 842    uv__read(stream);
 843
 844    if (stream->io_watcher.fd == -1)
 845      return; /* read_cb closed stream. */
 846  }
 847
 848  if (events & UV__POLLOUT) {
 849    assert(stream->io_watcher.fd >= 0);
 850    uv__write(stream);
 851    uv__write_callbacks(stream);
 852  }
 853}
 854
 855
 856/**
 857 * We get called here from directly following a call to connect(2).
 858 * In order to determine if we've errored out or succeeded must call
 859 * getsockopt.
 860 */
 861static void uv__stream_connect(uv_stream_t* stream) {
 862  int error;
 863  uv_connect_t* req = stream->connect_req;
 864  socklen_t errorsize = sizeof(int);
 865
 866  assert(stream->type == UV_TCP || stream->type == UV_NAMED_PIPE);
 867  assert(req);
 868
 869  if (stream->delayed_error) {
 870    /* To smooth over the differences between unixes errors that
 871     * were reported synchronously on the first connect can be delayed
 872     * until the next tick--which is now.
 873     */
 874    error = stream->delayed_error;
 875    stream->delayed_error = 0;
 876  } else {
 877    /* Normal situation: we need to get the socket error from the kernel. */
 878    assert(stream->io_watcher.fd >= 0);
 879    getsockopt(stream->io_watcher.fd, SOL_SOCKET, SO_ERROR, &error, &errorsize);
 880  }
 881
 882  if (error == EINPROGRESS)
 883    return;
 884
 885  stream->connect_req = NULL;
 886  uv__req_unregister(stream->loop, req);
 887
 888  if (req->cb) {
 889    uv__set_sys_error(stream->loop, error);
 890    req->cb(req, error ? -1 : 0);
 891  }
 892}
 893
 894
 895int uv_write2(uv_write_t* req,
 896              uv_stream_t* stream,
 897              uv_buf_t bufs[],
 898              int bufcnt,
 899              uv_stream_t* send_handle,
 900              uv_write_cb cb) {
 901  int empty_queue;
 902
 903  assert(bufcnt > 0);
 904
 905  assert((stream->type == UV_TCP || stream->type == UV_NAMED_PIPE ||
 906      stream->type == UV_TTY) &&
 907      "uv_write (unix) does not yet support other types of streams");
 908
 909  if (stream->io_watcher.fd < 0) {
 910    uv__set_sys_error(stream->loop, EBADF);
 911    return -1;
 912  }
 913
 914  if (send_handle) {
 915    if (stream->type != UV_NAMED_PIPE || !((uv_pipe_t*)stream)->ipc) {
 916      uv__set_sys_error(stream->loop, EOPNOTSUPP);
 917      return -1;
 918    }
 919  }
 920
 921  empty_queue = (stream->write_queue_size == 0);
 922
 923  /* Initialize the req */
 924  uv__req_init(stream->loop, req, UV_WRITE);
 925  req->cb = cb;
 926  req->handle = stream;
 927  req->error = 0;
 928  req->send_handle = send_handle;
 929  ngx_queue_init(&req->queue);
 930
 931  if (bufcnt <= (int) ARRAY_SIZE(req->bufsml))
 932    req->bufs = req->bufsml;
 933  else
 934    req->bufs = malloc(sizeof(uv_buf_t) * bufcnt);
 935
 936  memcpy(req->bufs, bufs, bufcnt * sizeof(uv_buf_t));
 937  req->bufcnt = bufcnt;
 938  req->write_index = 0;
 939  stream->write_queue_size += uv__buf_count(bufs, bufcnt);
 940
 941  /* Append the request to write_queue. */
 942  ngx_queue_insert_tail(&stream->write_queue, &req->queue);
 943
 944  /* If the queue was empty when this function began, we should attempt to
 945   * do the write immediately. Otherwise start the write_watcher and wait
 946   * for the fd to become writable.
 947   */
 948  if (stream->connect_req) {
 949    /* Still connecting, do nothing. */
 950  }
 951  else if (empty_queue) {
 952    uv__write(stream);
 953  }
 954  else {
 955    /*
 956     * blocking streams should never have anything in the queue.
 957     * if this assert fires then somehow the blocking stream isn't being
 958     * sufficiently flushed in uv__write.
 959     */
 960    assert(!(stream->flags & UV_STREAM_BLOCKING));
 961    uv__io_start(stream->loop, &stream->io_watcher, UV__POLLOUT);
 962  }
 963
 964  return 0;
 965}
 966
 967
 968/* The buffers to be written must remain valid until the callback is called.
 969 * This is not required for the uv_buf_t array.
 970 */
 971int uv_write(uv_write_t* req, uv_stream_t* stream, uv_buf_t bufs[], int bufcnt,
 972    uv_write_cb cb) {
 973  return uv_write2(req, stream, bufs, bufcnt, NULL, cb);
 974}
 975
 976
 977int uv__read_start_common(uv_stream_t* stream, uv_alloc_cb alloc_cb,
 978    uv_read_cb read_cb, uv_read2_cb read2_cb) {
 979  assert(stream->type == UV_TCP || stream->type == UV_NAMED_PIPE ||
 980      stream->type == UV_TTY);
 981
 982  if (stream->flags & UV_CLOSING) {
 983    uv__set_sys_error(stream->loop, EINVAL);
 984    return -1;
 985  }
 986
 987  /* The UV_STREAM_READING flag is irrelevant of the state of the tcp - it just
 988   * expresses the desired state of the user.
 989   */
 990  stream->flags |= UV_STREAM_READING;
 991
 992  /* TODO: try to do the read inline? */
 993  /* TODO: keep track of tcp state. If we've gotten a EOF then we should
 994   * not start the IO watcher.
 995   */
 996  assert(stream->io_watcher.fd >= 0);
 997  assert(alloc_cb);
 998
 999  stream->read_cb = read_cb;
1000  stream->read2_cb = read2_cb;
1001  stream->alloc_cb = alloc_cb;
1002
1003  uv__io_start(stream->loop, &stream->io_watcher, UV__POLLIN);
1004  uv__handle_start(stream);
1005
1006  return 0;
1007}
1008
1009
1010int uv_read_start(uv_stream_t* stream, uv_alloc_cb alloc_cb,
1011    uv_read_cb read_cb) {
1012  return uv__read_start_common(stream, alloc_cb, read_cb, NULL);
1013}
1014
1015
1016int uv_read2_start(uv_stream_t* stream, uv_alloc_cb alloc_cb,
1017    uv_read2_cb read_cb) {
1018  return uv__read_start_common(stream, alloc_cb, NULL, read_cb);
1019}
1020
1021
1022int uv_read_stop(uv_stream_t* stream) {
1023  uv__io_stop(stream->loop, &stream->io_watcher, UV__POLLIN);
1024  uv__handle_stop(stream);
1025  stream->flags &= ~UV_STREAM_READING;
1026  stream->read_cb = NULL;
1027  stream->read2_cb = NULL;
1028  stream->alloc_cb = NULL;
1029  return 0;
1030}
1031
1032
1033int uv_is_readable(const uv_stream_t* stream) {
1034  return stream->flags & UV_STREAM_READABLE;
1035}
1036
1037
1038int uv_is_writable(const uv_stream_t* stream) {
1039  return stream->flags & UV_STREAM_WRITABLE;
1040}
1041
1042
1043void uv__stream_close(uv_stream_t* handle) {
1044  uv_read_stop(handle);
1045  uv__io_stop(handle->loop, &handle->io_watcher, UV__POLLOUT);
1046
1047  close(handle->io_watcher.fd);
1048  handle->io_watcher.fd = -1;
1049
1050  if (handle->accepted_fd >= 0) {
1051    close(handle->accepted_fd);
1052    handle->accepted_fd = -1;
1053  }
1054
1055  assert(!uv__io_active(&handle->io_watcher, UV__POLLIN | UV__POLLOUT));
1056}