libs/partners/openai/langchain_openai/chat_models/_client_utils.py · langchain-ai/langchain

1"""Helpers for OpenAI httpx client construction, transport tuning, and streaming.23Covers cached default client builders, proxy-aware variants for the4`openai_proxy` path, kernel-level TCP keepalive / `TCP_USER_TIMEOUT` socket5options, and the `_astream_with_chunk_timeout` wrapper that bounds per-chunk6wall-clock time on async SSE streams.78Client-builder boilerplate mirrors the patterns in `openai._base_client`;9socket-option tuning and the streaming timeout are original to this module.10"""1112from __future__ import annotations1314import asyncio15import inspect16import logging17import os18import socket19import sys20import urllib.request21from collections.abc import AsyncIterator, Awaitable, Callable, Sequence22from functools import lru_cache23from typing import Any, TypeVar, cast2425import httpx26import openai27from pydantic import SecretStr2829logger = logging.getLogger(__name__)3031SocketOption = tuple[int, int, int]3233# socket.TCP_KEEPIDLE etc. are absent on darwin/win32; use raw UAPI constants.34_LINUX_TCP_KEEPIDLE = 435_LINUX_TCP_KEEPINTVL = 536_LINUX_TCP_KEEPCNT = 637_LINUX_TCP_USER_TIMEOUT = 183839# macOS: same semantics, different constants from <netinet/tcp.h>.40_DARWIN_TCP_KEEPALIVE = 0x10  # idle seconds before first probe41_DARWIN_TCP_KEEPINTVL = 0x10142_DARWIN_TCP_KEEPCNT = 0x1024344# Mirrors the openai SDK's pool defaults. Hardcoded to avoid depending on45# an internal module path (openai._constants) that can move across SDK versions.46_DEFAULT_CONNECTION_LIMITS = httpx.Limits(47    max_connections=1000,48    max_keepalive_connections=100,49    keepalive_expiry=5.0,50)515253def _int_env(name: str, default: int, *, allow_negative: bool = False) -> int:54    """Read an int env var with graceful fallback + discoverable warning.5556    Unparseable or (by default) negative values fall back to `default` and57    emit a single `WARNING` naming the offending variable. A misconfigured58    environment still loads, but operators see the fallback in their logs59    rather than silently getting a surprising default.60    """61    raw = os.environ.get(name)62    if raw is None:63        return default64    try:65        value = int(raw)66    except (TypeError, ValueError):67        logger.warning(68            "Invalid value for %s=%r (not an int); falling back to %d.",69            name,70            raw,71            default,72        )73        return default74    if not allow_negative and value < 0:75        logger.warning(76            "Invalid value for %s=%r (negative); falling back to %d.",77            name,78            raw,79            default,80        )81        return default82    return value838485def _float_env(name: str, default: float, *, allow_negative: bool = False) -> float:86    """Read a float env var with graceful fallback + discoverable warning.8788    See `_int_env`. Negative values are rejected by default so a typo in89    `LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S=-10` can't silently disable the90    wrapper it was meant to configure.91    """92    raw = os.environ.get(name)93    if raw is None:94        return default95    try:96        value = float(raw)97    except (TypeError, ValueError):98        logger.warning(99            "Invalid value for %s=%r (not a float); falling back to %s.",100            name,101            raw,102            default,103        )104        return default105    if not allow_negative and value < 0:106        logger.warning(107            "Invalid value for %s=%r (negative); falling back to %s.",108            name,109            raw,110            default,111        )112        return default113    return value114115116def _filter_supported(opts: list[SocketOption]) -> list[SocketOption]:117    """Drop socket options the running platform rejects.118119    Probes each option against a throwaway socket via `setsockopt` and keeps120    only those the kernel accepts. This keeps the library-computed defaults121    non-fatal across platforms that don't implement every Linux option —122    `TCP_USER_TIMEOUT` in particular is Linux-only and silently missing on123    macOS, some minimal kernels, and older gVisor builds. Dropped options124    are logged at `DEBUG` so an operator can confirm whether a kernel-level125    knob took effect on their platform.126127    If the probe socket cannot be created (sandboxed runtimes, `pytest-socket`128    under `--disable-socket`, tight seccomp policies), the input list is129    returned unfiltered. This preserves the pass-through behavior used for130    explicit user overrides: unsupported options will surface as a clear131    `OSError` at the first real `connect()` rather than being silently132    dropped during `ChatOpenAI` construction.133    """134    try:135        probe = socket.socket(socket.AF_INET, socket.SOCK_STREAM)136    except Exception:137        # Broad catch is deliberate: `pytest_socket` under `--disable-socket`138        # raises `SocketBlockedError` (a `RuntimeError`, not `OSError`), and139        # seccomp/sandboxed runtimes have been observed to raise other140        # `OSError` subclasses and `PermissionError`. The intent is "any141        # inability to create a probe socket -> pass through unfiltered,"142        # and narrowing the type would silently regress sandboxed CI.143        return list(opts)144    try:145        supported: list[SocketOption] = []146        dropped: list[SocketOption] = []147        for level, optname, optval in opts:148            try:149                probe.setsockopt(level, optname, optval)150            except OSError:151                dropped.append((level, optname, optval))152                continue153            supported.append((level, optname, optval))154        if dropped:155            logger.debug(156                "Dropped %d unsupported socket option(s) on %s: %s",157                len(dropped),158                sys.platform,159                dropped,160            )161        return supported162    finally:163        probe.close()164165166def _default_socket_options() -> tuple[SocketOption, ...]:167    """Return default TCP socket options, or `()` if disabled via env.168169    Always returns a tuple (never None) so callers and `@lru_cache` keys170    remain uniform: `()` is the single shape for "no options".171172    Target behavior on Linux/gVisor with the full option set: silent peers173    are surfaced within ~90-120s via `SO_KEEPALIVE` + `TCP_USER_TIMEOUT`174    (keepalive path gives a ~90s floor at the defaults; `TCP_USER_TIMEOUT`175    caps at 120s). On platforms that reject some options,176    `_filter_supported` drops them and the bound degrades to whatever the177    remaining options provide.178    """179    if os.environ.get("LANGCHAIN_OPENAI_TCP_KEEPALIVE", "1") == "0":180        return ()181182    keepidle = _int_env("LANGCHAIN_OPENAI_TCP_KEEPIDLE", 60)183    keepintvl = _int_env("LANGCHAIN_OPENAI_TCP_KEEPINTVL", 10)184    keepcnt = _int_env("LANGCHAIN_OPENAI_TCP_KEEPCNT", 3)185    user_timeout_ms = _int_env("LANGCHAIN_OPENAI_TCP_USER_TIMEOUT_MS", 120000)186187    opts: list[SocketOption] = [(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)]188    if sys.platform == "linux":189        opts += [190            (socket.IPPROTO_TCP, _LINUX_TCP_KEEPIDLE, keepidle),191            (socket.IPPROTO_TCP, _LINUX_TCP_KEEPINTVL, keepintvl),192            (socket.IPPROTO_TCP, _LINUX_TCP_KEEPCNT, keepcnt),193            (socket.IPPROTO_TCP, _LINUX_TCP_USER_TIMEOUT, user_timeout_ms),194        ]195    elif sys.platform == "darwin":196        opts += [197            (socket.IPPROTO_TCP, _DARWIN_TCP_KEEPALIVE, keepidle),198            (socket.IPPROTO_TCP, _DARWIN_TCP_KEEPINTVL, keepintvl),199            (socket.IPPROTO_TCP, _DARWIN_TCP_KEEPCNT, keepcnt),200        ]201    # Windows (win32): SO_KEEPALIVE only; per-option tuning requires WSAIoctl.202    return tuple(_filter_supported(opts))203204205_PROXY_ENV_VARS = (206    "HTTP_PROXY",207    "HTTPS_PROXY",208    "ALL_PROXY",209    "http_proxy",210    "https_proxy",211    "all_proxy",212)213_proxy_env_warning_emitted = False214_proxy_env_bypass_info_emitted = False215216217def _proxy_env_detected() -> bool:218    """True when httpx would pick up a proxy from env or system config.219220    Mirrors the surface httpx reads (`urllib.request.getproxies()` plus the221    uppercase env var names) so a positive result means env-proxy222    auto-detection is live on pre-PR code paths.223    """224    if any(os.environ.get(name) for name in _PROXY_ENV_VARS):225        return True226    try:227        return bool(urllib.request.getproxies())228    except Exception:229        return False230231232def _should_bypass_socket_options_for_proxy_env(233    *,234    http_socket_options: Sequence[SocketOption] | None,235    http_client: Any,236    http_async_client: Any,237    openai_proxy: str | None,238) -> bool:239    """True when default shape + env proxy detected → skip transport injection.240241    Preserves pre-PR behavior for apps relying on httpx's env-proxy242    auto-detection. Only triggers when the user has made no explicit choice243    that would signal they want the custom transport:244245    - `http_socket_options` left at `None` (default, not `()` or a sequence)246    - `LANGCHAIN_OPENAI_TCP_KEEPALIVE` is not `0` (kill-switch is its own path)247    - No `http_client` or `http_async_client` supplied248    - No `openai_proxy` supplied249    - A proxy env var / system proxy is visible to httpx250251    If any of those are set, the user has opted in to the transport path252    (directly or via `openai_proxy`) and normal behavior — including the253    shadowed-proxy WARNING — applies. When the kill-switch is set,254    `_default_socket_options` already returns `()`, so the bypass INFO255    would be noise; route through the normal path instead.256    """257    if http_socket_options is not None:258        return False259    if os.environ.get("LANGCHAIN_OPENAI_TCP_KEEPALIVE", "1") == "0":260        return False261    if http_client is not None or http_async_client is not None:262        return False263    if openai_proxy:264        return False265    return _proxy_env_detected()266267268def _log_proxy_env_bypass_once() -> None:269    """Emit a one-time INFO when the proxy-env bypass triggers.270271    Visibility for operators running with a custom log pipeline: the bypass272    is the *safe* outcome (env-proxy auto-detection preserved), but it means273    socket-level keepalive / `TCP_USER_TIMEOUT` aren't applied on this274    instance. INFO-level, since it's not a problem — just a diagnostic.275    """276    global _proxy_env_bypass_info_emitted277    if _proxy_env_bypass_info_emitted:278        return279    _proxy_env_bypass_info_emitted = True280    active = [name for name in _PROXY_ENV_VARS if os.environ.get(name)]281    source = ", ".join(active) if active else "system proxy configuration"282    logger.info(283        "langchain-openai detected %s and no explicit `http_socket_options` / "284        "`http_client` / `http_async_client` / `openai_proxy`; skipping the "285        "custom `httpx` transport so httpx's env-proxy auto-detection applies. "286        "Pass `http_socket_options=[...]` to opt back into kernel-level TCP "287        "keepalive tuning on top of the env proxy.",288        source,289    )290291292def _warn_if_proxy_env_shadowed(293    socket_options: tuple[SocketOption, ...],294    *,295    openai_proxy: str | None,296) -> None:297    """Warn once if a custom transport will shadow httpx's proxy auto-detection.298299    When `socket_options` is non-empty we pass a custom `httpx` transport,300    which disables httpx's native proxy auto-detection — both the uppercase301    `HTTP_PROXY` / `HTTPS_PROXY` / `ALL_PROXY` env vars and their lowercase302    equivalents, plus macOS/Windows system proxy config. If the user303    supplies `openai_proxy` explicitly we route through it and the env-var304    handling is moot. Otherwise, a user whose app was transparently relying305    on any of those sources will silently stop using them on upgrade —306    emit a single WARNING so the behavior change is discoverable.307308    Detection uses `urllib.request.getproxies()` — the same surface httpx309    reads — so lowercase env vars and macOS/Windows system proxy settings310    are caught alongside the uppercase names.311    """312    global _proxy_env_warning_emitted313    if _proxy_env_warning_emitted or not socket_options or openai_proxy:314        return315    active = [name for name in _PROXY_ENV_VARS if os.environ.get(name)]316    try:317        detected = bool(urllib.request.getproxies())318    except Exception:319        detected = False320    if not active and not detected:321        return322    _proxy_env_warning_emitted = True323    if active:324        source = ", ".join(active) + " set in environment"325    else:326        source = "system proxy configuration detected"327    logger.warning(328        "langchain-openai injected a custom httpx transport to apply "329        "`http_socket_options`, which disables httpx's proxy "330        "auto-detection (%s). Set "331        "`LANGCHAIN_OPENAI_TCP_KEEPALIVE=0` or pass `http_socket_options=()` "332        "to restore default proxy behavior, or supply `openai_proxy` / your "333        "own `http_client` / `http_async_client` to take full control.",334        source,335    )336337338def _resolve_socket_options(339    value: Sequence[SocketOption] | None,340) -> tuple[SocketOption, ...]:341    """Normalize the user-facing field to the tuple form builders expect.342343    - `None` => env-driven defaults (may itself be `()` if the user set344        `LANGCHAIN_OPENAI_TCP_KEEPALIVE=0`). This path runs through345        `_filter_supported()` inside `_default_socket_options()` because346        the library-computed option set is aspirational and silent degradation347        is the right posture.348    - Any other sequence (including empty) => retupled for cache hashability.349        An empty tuple is the explicit "disabled" signal. A non-empty sequence350        is passed verbatim — **not** filtered. The user chose these options351        explicitly, so an unsupported constant should surface as a clear352        `OSError` at connect time, not be silently dropped.353354    Always returns a tuple — never `None` — so downstream signatures take355    `tuple[SocketOption, ...]` with `()` as the single "no options" shape.356    """357    if value is None:358        return _default_socket_options()359    return tuple(value)360361362class _SyncHttpxClientWrapper(openai.DefaultHttpxClient):363    """Borrowed from openai._base_client."""364365    def __del__(self) -> None:366        try:367            if self.is_closed:368                return369            self.close()370        except Exception:  # noqa: S110371            pass372373374class _AsyncHttpxClientWrapper(openai.DefaultAsyncHttpxClient):375    """Borrowed from openai._base_client."""376377    def __del__(self) -> None:378        try:379            if self.is_closed:380                return381            # TODO(someday): support non asyncio runtimes here382            asyncio.get_running_loop().create_task(self.aclose())383        except Exception:  # noqa: S110384            pass385386387def _build_sync_httpx_client(388    base_url: str | None,389    timeout: Any,390    socket_options: tuple[SocketOption, ...] = (),391) -> _SyncHttpxClientWrapper:392    kwargs: dict[str, Any] = {393        "base_url": base_url394        or os.environ.get("OPENAI_BASE_URL")395        or "https://api.openai.com/v1",396        "timeout": timeout,397    }398    if socket_options:399        # httpx ignores limits= when transport= is provided; set it explicitly400        # on the transport to avoid silently shrinking the connection pool.401        kwargs["transport"] = httpx.HTTPTransport(402            socket_options=list(socket_options),403            limits=_DEFAULT_CONNECTION_LIMITS,404        )405    return _SyncHttpxClientWrapper(**kwargs)406407408def _build_async_httpx_client(409    base_url: str | None,410    timeout: Any,411    socket_options: tuple[SocketOption, ...] = (),412) -> _AsyncHttpxClientWrapper:413    kwargs: dict[str, Any] = {414        "base_url": base_url415        or os.environ.get("OPENAI_BASE_URL")416        or "https://api.openai.com/v1",417        "timeout": timeout,418    }419    if socket_options:420        # See _build_sync_httpx_client for the limits= rationale.421        kwargs["transport"] = httpx.AsyncHTTPTransport(422            socket_options=list(socket_options),423            limits=_DEFAULT_CONNECTION_LIMITS,424        )425    return _AsyncHttpxClientWrapper(**kwargs)426427428def _build_proxied_sync_httpx_client(429    proxy: str,430    verify: Any,431    socket_options: tuple[SocketOption, ...] = (),432) -> httpx.Client:433    """httpx.Client for the openai_proxy code path.434435    When socket options are disabled (`()`), returns a plain436    `httpx.Client(proxy=..., verify=...)` with no transport injected.437    """438    if not socket_options:439        return httpx.Client(proxy=proxy, verify=verify)440    # Mount under `all://` (not `transport=`) so `Client._mounts` mirrors the441    # shape produced by httpx's own `proxy=` path — a single-entry dict keyed442    # by `URLPattern("all://")`. Callers (and the existing proxy integration443    # test) reach into `_mounts` to introspect the proxy URL; a bare444    # `transport=` leaves `_mounts` empty.445    #446    # `httpx.HTTPTransport(proxy=...)` is stricter about string coercion than447    # `httpx.Client(proxy=...)`; wrap in the public `httpx.Proxy` type for448    # version-stable behavior.449    transport = httpx.HTTPTransport(450        proxy=httpx.Proxy(proxy),451        verify=verify,452        socket_options=list(socket_options),453        limits=_DEFAULT_CONNECTION_LIMITS,454    )455    return httpx.Client(mounts={"all://": transport})456457458def _build_proxied_async_httpx_client(459    proxy: str,460    verify: Any,461    socket_options: tuple[SocketOption, ...] = (),462) -> httpx.AsyncClient:463    """httpx.AsyncClient for the openai_proxy code path.464465    See `_build_proxied_sync_httpx_client` for the opt-out fallback,466    the `mounts={"all://": ...}` shape, and the `httpx.Proxy` wrapping467    rationale.468    """469    if not socket_options:470        return httpx.AsyncClient(proxy=proxy, verify=verify)471    transport = httpx.AsyncHTTPTransport(472        proxy=httpx.Proxy(proxy),473        verify=verify,474        socket_options=list(socket_options),475        limits=_DEFAULT_CONNECTION_LIMITS,476    )477    return httpx.AsyncClient(mounts={"all://": transport})478479480@lru_cache481def _cached_sync_httpx_client(482    base_url: str | None,483    timeout: Any,484    socket_options: tuple[SocketOption, ...] = (),485) -> _SyncHttpxClientWrapper:486    return _build_sync_httpx_client(base_url, timeout, socket_options)487488489@lru_cache490def _cached_async_httpx_client(491    base_url: str | None,492    timeout: Any,493    socket_options: tuple[SocketOption, ...] = (),494) -> _AsyncHttpxClientWrapper:495    return _build_async_httpx_client(base_url, timeout, socket_options)496497498def _get_default_httpx_client(499    base_url: str | None,500    timeout: Any,501    socket_options: tuple[SocketOption, ...] = (),502) -> _SyncHttpxClientWrapper:503    """Get default httpx client.504505    Uses cached client unless timeout is `httpx.Timeout`, which is not hashable.506    """507    try:508        hash(timeout)509    except TypeError:510        return _build_sync_httpx_client(base_url, timeout, socket_options)511    else:512        return _cached_sync_httpx_client(base_url, timeout, socket_options)513514515def _get_default_async_httpx_client(516    base_url: str | None,517    timeout: Any,518    socket_options: tuple[SocketOption, ...] = (),519) -> _AsyncHttpxClientWrapper:520    """Get default httpx client.521522    Uses cached client unless timeout is `httpx.Timeout`, which is not hashable.523    """524    try:525        hash(timeout)526    except TypeError:527        return _build_async_httpx_client(base_url, timeout, socket_options)528    else:529        return _cached_async_httpx_client(base_url, timeout, socket_options)530531532def _resolve_sync_and_async_api_keys(533    api_key: SecretStr | Callable[[], str] | Callable[[], Awaitable[str]],534) -> tuple[str | None | Callable[[], str], str | Callable[[], Awaitable[str]]]:535    """Resolve sync and async API key values.536537    Because OpenAI and AsyncOpenAI clients support either sync or async callables for538    the API key, we need to resolve separate values here.539    """540    if isinstance(api_key, SecretStr):541        sync_api_key_value: str | None | Callable[[], str] = api_key.get_secret_value()542        async_api_key_value: str | Callable[[], Awaitable[str]] = (543            api_key.get_secret_value()544        )545    elif callable(api_key):546        if inspect.iscoroutinefunction(api_key):547            async_api_key_value = api_key548            sync_api_key_value = None549        else:550            sync_api_key_value = cast(Callable, api_key)551552            async def async_api_key_wrapper() -> str:553                return await asyncio.get_event_loop().run_in_executor(554                    None, cast(Callable, api_key)555                )556557            async_api_key_value = async_api_key_wrapper558559    return sync_api_key_value, async_api_key_value560561562T = TypeVar("T")563564# On Python ≤3.10, asyncio.TimeoutError and builtins.TimeoutError are distinct565# hierarchies, so subclassing only asyncio.TimeoutError would not be caught by566# `except TimeoutError:`. On Python ≥3.11 they are the same object, so listing567# both bases would raise TypeError: duplicate base class. We resolve this at568# class-definition time.569_StreamChunkTimeoutBases: tuple[type, ...] = (570    (asyncio.TimeoutError,)571    if issubclass(asyncio.TimeoutError, TimeoutError)572    else (asyncio.TimeoutError, TimeoutError)573)574575576class StreamChunkTimeoutError(*_StreamChunkTimeoutBases):  # type: ignore[misc]577    """Raised when no streaming chunk arrives within `stream_chunk_timeout`.578579    `issubclass(StreamChunkTimeoutError, asyncio.TimeoutError)` and580    `issubclass(StreamChunkTimeoutError, TimeoutError)` both hold on all581    supported Python versions, so existing `except asyncio.TimeoutError:`582    and `except TimeoutError:` handlers keep catching the exception. On583    Python 3.11+ the two exceptions are the same object, so only584    `asyncio.TimeoutError` appears in `__bases__`.585586    Structured attributes (`timeout_s`, `model_name`, `chunks_received`)587    mirror the WARNING log's `extra=` payload so diagnostic code doesn't588    need to regex the message.589    """590591    def __init__(592        self,593        timeout_s: float,594        *,595        model_name: str | None = None,596        chunks_received: int = 0,597    ) -> None:598        self.timeout_s = timeout_s599        self.model_name = model_name600        self.chunks_received = chunks_received601        context = []602        if model_name:603            context.append(f"model={model_name}")604        context.append(f"chunks_received={chunks_received}")605        suffix = f" ({', '.join(context)})"606        super().__init__(607            f"No streaming chunk received for {timeout_s:.1f}s{suffix}. The "608            f"connection may be alive at the TCP layer but is not producing "609            f"content. Tune or disable via the `stream_chunk_timeout` "610            f"constructor kwarg (set to None or 0 to disable) or the "611            f"`LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S` env var. See also "612            f"`http_socket_options` for the kernel-level TCP timeout that "613            f"catches dead TCP peers."614        )615616617async def _astream_with_chunk_timeout(618    source: AsyncIterator[T],619    timeout: float | None,620    *,621    model_name: str | None = None,622) -> AsyncIterator[T]:623    """Yield from `source` but bound the per-chunk wait time.624625    If `timeout` is None or <=0, yields directly with no wall-clock bound.626    Otherwise, each `__anext__` is wrapped in627    `asyncio.wait_for(..., timeout)`. A timeout raises628    `StreamChunkTimeoutError` (a `TimeoutError` subclass) whose message629    names the knob, the env-var override, the model, and how many chunks630    were received before the stall. A single-line structured log also631    fires at WARNING so the signal is visible in aggregate logging systems632    even when the exception is caught upstream.633634    When the timeout is active, the source iterator is explicitly635    `aclose()`-d on early exit (timeout, consumer break, any exception) so636    the underlying httpx streaming connection is released promptly. The637    pass-through branch (timeout disabled) relies on httpx's GC-driven638    cleanup instead — matching the behavior of unwrapped streams.639    """640    if not timeout or timeout <= 0:641        async for item in source:642            yield item643        return644645    chunks_received = 0646    it = source.__aiter__()647    try:648        while True:649            try:650                chunk = await asyncio.wait_for(it.__anext__(), timeout=timeout)651            except StopAsyncIteration:652                return653            except asyncio.TimeoutError as e:654                logger.warning(655                    "langchain_openai.stream_chunk_timeout fired",656                    extra={657                        "source": "stream_chunk_timeout",658                        "timeout_s": timeout,659                        "model_name": model_name,660                        "chunks_received": chunks_received,661                    },662                )663                raise StreamChunkTimeoutError(664                    timeout,665                    model_name=model_name,666                    chunks_received=chunks_received,667                ) from e668            chunks_received += 1669            yield chunk670    finally:671        aclose = getattr(it, "aclose", None)672        if aclose is not None:673            try:674                await aclose()675            except Exception as cleanup_exc:676                # Best-effort cleanup; don't mask the original exception,677                # but leave a DEBUG trace so pool/transport bugs stay678                # discoverable at the right log level.679                logger.debug(680                    "aclose() during _astream_with_chunk_timeout cleanup "681                    "raised; ignoring",682                    exc_info=cleanup_exc,683                )