Catch specific exceptions instead of Exception to avoid masking bugs
except Exception:
1"""Helpers for OpenAI httpx client construction, transport tuning, and streaming.23Covers cached default client builders, proxy-aware variants for the4`openai_proxy` path, kernel-level TCP keepalive / `TCP_USER_TIMEOUT` socket5options, and the `_astream_with_chunk_timeout` wrapper that bounds per-chunk6wall-clock time on async SSE streams.78Client-builder boilerplate mirrors the patterns in `openai._base_client`;9socket-option tuning and the streaming timeout are original to this module.10"""1112from __future__ import annotations1314import asyncio15import inspect16import logging17import os18import socket19import sys20import urllib.request21from collections.abc import AsyncIterator, Awaitable, Callable, Sequence22from functools import lru_cache23from typing import Any, TypeVar, cast2425import httpx26import openai27from pydantic import SecretStr2829logger = logging.getLogger(__name__)3031SocketOption = tuple[int, int, int]3233# socket.TCP_KEEPIDLE etc. are absent on darwin/win32; use raw UAPI constants.34_LINUX_TCP_KEEPIDLE = 435_LINUX_TCP_KEEPINTVL = 536_LINUX_TCP_KEEPCNT = 637_LINUX_TCP_USER_TIMEOUT = 183839# macOS: same semantics, different constants from <netinet/tcp.h>.40_DARWIN_TCP_KEEPALIVE = 0x10 # idle seconds before first probe41_DARWIN_TCP_KEEPINTVL = 0x10142_DARWIN_TCP_KEEPCNT = 0x1024344# Mirrors the openai SDK's pool defaults. Hardcoded to avoid depending on45# an internal module path (openai._constants) that can move across SDK versions.46_DEFAULT_CONNECTION_LIMITS = httpx.Limits(47 max_connections=1000,48 max_keepalive_connections=100,49 keepalive_expiry=5.0,50)515253def _int_env(name: str, default: int, *, allow_negative: bool = False) -> int:54 """Read an int env var with graceful fallback + discoverable warning.5556 Unparseable or (by default) negative values fall back to `default` and57 emit a single `WARNING` naming the offending variable. A misconfigured58 environment still loads, but operators see the fallback in their logs59 rather than silently getting a surprising default.60 """61 raw = os.environ.get(name)62 if raw is None:63 return default64 try:65 value = int(raw)66 except (TypeError, ValueError):67 logger.warning(68 "Invalid value for %s=%r (not an int); falling back to %d.",69 name,70 raw,71 default,72 )73 return default74 if not allow_negative and value < 0:75 logger.warning(76 "Invalid value for %s=%r (negative); falling back to %d.",77 name,78 raw,79 default,80 )81 return default82 return value838485def _float_env(name: str, default: float, *, allow_negative: bool = False) -> float:86 """Read a float env var with graceful fallback + discoverable warning.8788 See `_int_env`. Negative values are rejected by default so a typo in89 `LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S=-10` can't silently disable the90 wrapper it was meant to configure.91 """92 raw = os.environ.get(name)93 if raw is None:94 return default95 try:96 value = float(raw)97 except (TypeError, ValueError):98 logger.warning(99 "Invalid value for %s=%r (not a float); falling back to %s.",100 name,101 raw,102 default,103 )104 return default105 if not allow_negative and value < 0:106 logger.warning(107 "Invalid value for %s=%r (negative); falling back to %s.",108 name,109 raw,110 default,111 )112 return default113 return value114115116def _filter_supported(opts: list[SocketOption]) -> list[SocketOption]:117 """Drop socket options the running platform rejects.118119 Probes each option against a throwaway socket via `setsockopt` and keeps120 only those the kernel accepts. This keeps the library-computed defaults121 non-fatal across platforms that don't implement every Linux option —122 `TCP_USER_TIMEOUT` in particular is Linux-only and silently missing on123 macOS, some minimal kernels, and older gVisor builds. Dropped options124 are logged at `DEBUG` so an operator can confirm whether a kernel-level125 knob took effect on their platform.126127 If the probe socket cannot be created (sandboxed runtimes, `pytest-socket`128 under `--disable-socket`, tight seccomp policies), the input list is129 returned unfiltered. This preserves the pass-through behavior used for130 explicit user overrides: unsupported options will surface as a clear131 `OSError` at the first real `connect()` rather than being silently132 dropped during `ChatOpenAI` construction.133 """134 try:135 probe = socket.socket(socket.AF_INET, socket.SOCK_STREAM)136 except Exception:137 # Broad catch is deliberate: `pytest_socket` under `--disable-socket`138 # raises `SocketBlockedError` (a `RuntimeError`, not `OSError`), and139 # seccomp/sandboxed runtimes have been observed to raise other140 # `OSError` subclasses and `PermissionError`. The intent is "any141 # inability to create a probe socket -> pass through unfiltered,"142 # and narrowing the type would silently regress sandboxed CI.143 return list(opts)144 try:145 supported: list[SocketOption] = []146 dropped: list[SocketOption] = []147 for level, optname, optval in opts:148 try:149 probe.setsockopt(level, optname, optval)150 except OSError:151 dropped.append((level, optname, optval))152 continue153 supported.append((level, optname, optval))154 if dropped:155 logger.debug(156 "Dropped %d unsupported socket option(s) on %s: %s",157 len(dropped),158 sys.platform,159 dropped,160 )161 return supported162 finally:163 probe.close()164165166def _default_socket_options() -> tuple[SocketOption, ...]:167 """Return default TCP socket options, or `()` if disabled via env.168169 Always returns a tuple (never None) so callers and `@lru_cache` keys170 remain uniform: `()` is the single shape for "no options".171172 Target behavior on Linux/gVisor with the full option set: silent peers173 are surfaced within ~90-120s via `SO_KEEPALIVE` + `TCP_USER_TIMEOUT`174 (keepalive path gives a ~90s floor at the defaults; `TCP_USER_TIMEOUT`175 caps at 120s). On platforms that reject some options,176 `_filter_supported` drops them and the bound degrades to whatever the177 remaining options provide.178 """179 if os.environ.get("LANGCHAIN_OPENAI_TCP_KEEPALIVE", "1") == "0":180 return ()181182 keepidle = _int_env("LANGCHAIN_OPENAI_TCP_KEEPIDLE", 60)183 keepintvl = _int_env("LANGCHAIN_OPENAI_TCP_KEEPINTVL", 10)184 keepcnt = _int_env("LANGCHAIN_OPENAI_TCP_KEEPCNT", 3)185 user_timeout_ms = _int_env("LANGCHAIN_OPENAI_TCP_USER_TIMEOUT_MS", 120000)186187 opts: list[SocketOption] = [(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)]188 if sys.platform == "linux":189 opts += [190 (socket.IPPROTO_TCP, _LINUX_TCP_KEEPIDLE, keepidle),191 (socket.IPPROTO_TCP, _LINUX_TCP_KEEPINTVL, keepintvl),192 (socket.IPPROTO_TCP, _LINUX_TCP_KEEPCNT, keepcnt),193 (socket.IPPROTO_TCP, _LINUX_TCP_USER_TIMEOUT, user_timeout_ms),194 ]195 elif sys.platform == "darwin":196 opts += [197 (socket.IPPROTO_TCP, _DARWIN_TCP_KEEPALIVE, keepidle),198 (socket.IPPROTO_TCP, _DARWIN_TCP_KEEPINTVL, keepintvl),199 (socket.IPPROTO_TCP, _DARWIN_TCP_KEEPCNT, keepcnt),200 ]201 # Windows (win32): SO_KEEPALIVE only; per-option tuning requires WSAIoctl.202 return tuple(_filter_supported(opts))203204205_PROXY_ENV_VARS = (206 "HTTP_PROXY",207 "HTTPS_PROXY",208 "ALL_PROXY",209 "http_proxy",210 "https_proxy",211 "all_proxy",212)213_proxy_env_warning_emitted = False214_proxy_env_bypass_info_emitted = False215216217def _proxy_env_detected() -> bool:218 """True when httpx would pick up a proxy from env or system config.219220 Mirrors the surface httpx reads (`urllib.request.getproxies()` plus the221 uppercase env var names) so a positive result means env-proxy222 auto-detection is live on pre-PR code paths.223 """224 if any(os.environ.get(name) for name in _PROXY_ENV_VARS):225 return True226 try:227 return bool(urllib.request.getproxies())228 except Exception:229 return False230231232def _should_bypass_socket_options_for_proxy_env(233 *,234 http_socket_options: Sequence[SocketOption] | None,235 http_client: Any,236 http_async_client: Any,237 openai_proxy: str | None,238) -> bool:239 """True when default shape + env proxy detected → skip transport injection.240241 Preserves pre-PR behavior for apps relying on httpx's env-proxy242 auto-detection. Only triggers when the user has made no explicit choice243 that would signal they want the custom transport:244245 - `http_socket_options` left at `None` (default, not `()` or a sequence)246 - `LANGCHAIN_OPENAI_TCP_KEEPALIVE` is not `0` (kill-switch is its own path)247 - No `http_client` or `http_async_client` supplied248 - No `openai_proxy` supplied249 - A proxy env var / system proxy is visible to httpx250251 If any of those are set, the user has opted in to the transport path252 (directly or via `openai_proxy`) and normal behavior — including the253 shadowed-proxy WARNING — applies. When the kill-switch is set,254 `_default_socket_options` already returns `()`, so the bypass INFO255 would be noise; route through the normal path instead.256 """257 if http_socket_options is not None:258 return False259 if os.environ.get("LANGCHAIN_OPENAI_TCP_KEEPALIVE", "1") == "0":260 return False261 if http_client is not None or http_async_client is not None:262 return False263 if openai_proxy:264 return False265 return _proxy_env_detected()266267268def _log_proxy_env_bypass_once() -> None:269 """Emit a one-time INFO when the proxy-env bypass triggers.270271 Visibility for operators running with a custom log pipeline: the bypass272 is the *safe* outcome (env-proxy auto-detection preserved), but it means273 socket-level keepalive / `TCP_USER_TIMEOUT` aren't applied on this274 instance. INFO-level, since it's not a problem — just a diagnostic.275 """276 global _proxy_env_bypass_info_emitted277 if _proxy_env_bypass_info_emitted:278 return279 _proxy_env_bypass_info_emitted = True280 active = [name for name in _PROXY_ENV_VARS if os.environ.get(name)]281 source = ", ".join(active) if active else "system proxy configuration"282 logger.info(283 "langchain-openai detected %s and no explicit `http_socket_options` / "284 "`http_client` / `http_async_client` / `openai_proxy`; skipping the "285 "custom `httpx` transport so httpx's env-proxy auto-detection applies. "286 "Pass `http_socket_options=[...]` to opt back into kernel-level TCP "287 "keepalive tuning on top of the env proxy.",288 source,289 )290291292def _warn_if_proxy_env_shadowed(293 socket_options: tuple[SocketOption, ...],294 *,295 openai_proxy: str | None,296) -> None:297 """Warn once if a custom transport will shadow httpx's proxy auto-detection.298299 When `socket_options` is non-empty we pass a custom `httpx` transport,300 which disables httpx's native proxy auto-detection — both the uppercase301 `HTTP_PROXY` / `HTTPS_PROXY` / `ALL_PROXY` env vars and their lowercase302 equivalents, plus macOS/Windows system proxy config. If the user303 supplies `openai_proxy` explicitly we route through it and the env-var304 handling is moot. Otherwise, a user whose app was transparently relying305 on any of those sources will silently stop using them on upgrade —306 emit a single WARNING so the behavior change is discoverable.307308 Detection uses `urllib.request.getproxies()` — the same surface httpx309 reads — so lowercase env vars and macOS/Windows system proxy settings310 are caught alongside the uppercase names.311 """312 global _proxy_env_warning_emitted313 if _proxy_env_warning_emitted or not socket_options or openai_proxy:314 return315 active = [name for name in _PROXY_ENV_VARS if os.environ.get(name)]316 try:317 detected = bool(urllib.request.getproxies())318 except Exception:319 detected = False320 if not active and not detected:321 return322 _proxy_env_warning_emitted = True323 if active:324 source = ", ".join(active) + " set in environment"325 else:326 source = "system proxy configuration detected"327 logger.warning(328 "langchain-openai injected a custom httpx transport to apply "329 "`http_socket_options`, which disables httpx's proxy "330 "auto-detection (%s). Set "331 "`LANGCHAIN_OPENAI_TCP_KEEPALIVE=0` or pass `http_socket_options=()` "332 "to restore default proxy behavior, or supply `openai_proxy` / your "333 "own `http_client` / `http_async_client` to take full control.",334 source,335 )336337338def _resolve_socket_options(339 value: Sequence[SocketOption] | None,340) -> tuple[SocketOption, ...]:341 """Normalize the user-facing field to the tuple form builders expect.342343 - `None` => env-driven defaults (may itself be `()` if the user set344 `LANGCHAIN_OPENAI_TCP_KEEPALIVE=0`). This path runs through345 `_filter_supported()` inside `_default_socket_options()` because346 the library-computed option set is aspirational and silent degradation347 is the right posture.348 - Any other sequence (including empty) => retupled for cache hashability.349 An empty tuple is the explicit "disabled" signal. A non-empty sequence350 is passed verbatim — **not** filtered. The user chose these options351 explicitly, so an unsupported constant should surface as a clear352 `OSError` at connect time, not be silently dropped.353354 Always returns a tuple — never `None` — so downstream signatures take355 `tuple[SocketOption, ...]` with `()` as the single "no options" shape.356 """357 if value is None:358 return _default_socket_options()359 return tuple(value)360361362class _SyncHttpxClientWrapper(openai.DefaultHttpxClient):363 """Borrowed from openai._base_client."""364365 def __del__(self) -> None:366 try:367 if self.is_closed:368 return369 self.close()370 except Exception: # noqa: S110371 pass372373374class _AsyncHttpxClientWrapper(openai.DefaultAsyncHttpxClient):375 """Borrowed from openai._base_client."""376377 def __del__(self) -> None:378 try:379 if self.is_closed:380 return381 # TODO(someday): support non asyncio runtimes here382 asyncio.get_running_loop().create_task(self.aclose())383 except Exception: # noqa: S110384 pass385386387def _build_sync_httpx_client(388 base_url: str | None,389 timeout: Any,390 socket_options: tuple[SocketOption, ...] = (),391) -> _SyncHttpxClientWrapper:392 kwargs: dict[str, Any] = {393 "base_url": base_url394 or os.environ.get("OPENAI_BASE_URL")395 or "https://api.openai.com/v1",396 "timeout": timeout,397 }398 if socket_options:399 # httpx ignores limits= when transport= is provided; set it explicitly400 # on the transport to avoid silently shrinking the connection pool.401 kwargs["transport"] = httpx.HTTPTransport(402 socket_options=list(socket_options),403 limits=_DEFAULT_CONNECTION_LIMITS,404 )405 return _SyncHttpxClientWrapper(**kwargs)406407408def _build_async_httpx_client(409 base_url: str | None,410 timeout: Any,411 socket_options: tuple[SocketOption, ...] = (),412) -> _AsyncHttpxClientWrapper:413 kwargs: dict[str, Any] = {414 "base_url": base_url415 or os.environ.get("OPENAI_BASE_URL")416 or "https://api.openai.com/v1",417 "timeout": timeout,418 }419 if socket_options:420 # See _build_sync_httpx_client for the limits= rationale.421 kwargs["transport"] = httpx.AsyncHTTPTransport(422 socket_options=list(socket_options),423 limits=_DEFAULT_CONNECTION_LIMITS,424 )425 return _AsyncHttpxClientWrapper(**kwargs)426427428def _build_proxied_sync_httpx_client(429 proxy: str,430 verify: Any,431 socket_options: tuple[SocketOption, ...] = (),432) -> httpx.Client:433 """httpx.Client for the openai_proxy code path.434435 When socket options are disabled (`()`), returns a plain436 `httpx.Client(proxy=..., verify=...)` with no transport injected.437 """438 if not socket_options:439 return httpx.Client(proxy=proxy, verify=verify)440 # Mount under `all://` (not `transport=`) so `Client._mounts` mirrors the441 # shape produced by httpx's own `proxy=` path — a single-entry dict keyed442 # by `URLPattern("all://")`. Callers (and the existing proxy integration443 # test) reach into `_mounts` to introspect the proxy URL; a bare444 # `transport=` leaves `_mounts` empty.445 #446 # `httpx.HTTPTransport(proxy=...)` is stricter about string coercion than447 # `httpx.Client(proxy=...)`; wrap in the public `httpx.Proxy` type for448 # version-stable behavior.449 transport = httpx.HTTPTransport(450 proxy=httpx.Proxy(proxy),451 verify=verify,452 socket_options=list(socket_options),453 limits=_DEFAULT_CONNECTION_LIMITS,454 )455 return httpx.Client(mounts={"all://": transport})456457458def _build_proxied_async_httpx_client(459 proxy: str,460 verify: Any,461 socket_options: tuple[SocketOption, ...] = (),462) -> httpx.AsyncClient:463 """httpx.AsyncClient for the openai_proxy code path.464465 See `_build_proxied_sync_httpx_client` for the opt-out fallback,466 the `mounts={"all://": ...}` shape, and the `httpx.Proxy` wrapping467 rationale.468 """469 if not socket_options:470 return httpx.AsyncClient(proxy=proxy, verify=verify)471 transport = httpx.AsyncHTTPTransport(472 proxy=httpx.Proxy(proxy),473 verify=verify,474 socket_options=list(socket_options),475 limits=_DEFAULT_CONNECTION_LIMITS,476 )477 return httpx.AsyncClient(mounts={"all://": transport})478479480@lru_cache481def _cached_sync_httpx_client(482 base_url: str | None,483 timeout: Any,484 socket_options: tuple[SocketOption, ...] = (),485) -> _SyncHttpxClientWrapper:486 return _build_sync_httpx_client(base_url, timeout, socket_options)487488489@lru_cache490def _cached_async_httpx_client(491 base_url: str | None,492 timeout: Any,493 socket_options: tuple[SocketOption, ...] = (),494) -> _AsyncHttpxClientWrapper:495 return _build_async_httpx_client(base_url, timeout, socket_options)496497498def _get_default_httpx_client(499 base_url: str | None,500 timeout: Any,501 socket_options: tuple[SocketOption, ...] = (),502) -> _SyncHttpxClientWrapper:503 """Get default httpx client.504505 Uses cached client unless timeout is `httpx.Timeout`, which is not hashable.506 """507 try:508 hash(timeout)509 except TypeError:510 return _build_sync_httpx_client(base_url, timeout, socket_options)511 else:512 return _cached_sync_httpx_client(base_url, timeout, socket_options)513514515def _get_default_async_httpx_client(516 base_url: str | None,517 timeout: Any,518 socket_options: tuple[SocketOption, ...] = (),519) -> _AsyncHttpxClientWrapper:520 """Get default httpx client.521522 Uses cached client unless timeout is `httpx.Timeout`, which is not hashable.523 """524 try:525 hash(timeout)526 except TypeError:527 return _build_async_httpx_client(base_url, timeout, socket_options)528 else:529 return _cached_async_httpx_client(base_url, timeout, socket_options)530531532def _resolve_sync_and_async_api_keys(533 api_key: SecretStr | Callable[[], str] | Callable[[], Awaitable[str]],534) -> tuple[str | None | Callable[[], str], str | Callable[[], Awaitable[str]]]:535 """Resolve sync and async API key values.536537 Because OpenAI and AsyncOpenAI clients support either sync or async callables for538 the API key, we need to resolve separate values here.539 """540 if isinstance(api_key, SecretStr):541 sync_api_key_value: str | None | Callable[[], str] = api_key.get_secret_value()542 async_api_key_value: str | Callable[[], Awaitable[str]] = (543 api_key.get_secret_value()544 )545 elif callable(api_key):546 if inspect.iscoroutinefunction(api_key):547 async_api_key_value = api_key548 sync_api_key_value = None549 else:550 sync_api_key_value = cast(Callable, api_key)551552 async def async_api_key_wrapper() -> str:553 return await asyncio.get_event_loop().run_in_executor(554 None, cast(Callable, api_key)555 )556557 async_api_key_value = async_api_key_wrapper558559 return sync_api_key_value, async_api_key_value560561562T = TypeVar("T")563564# On Python ≤3.10, asyncio.TimeoutError and builtins.TimeoutError are distinct565# hierarchies, so subclassing only asyncio.TimeoutError would not be caught by566# `except TimeoutError:`. On Python ≥3.11 they are the same object, so listing567# both bases would raise TypeError: duplicate base class. We resolve this at568# class-definition time.569_StreamChunkTimeoutBases: tuple[type, ...] = (570 (asyncio.TimeoutError,)571 if issubclass(asyncio.TimeoutError, TimeoutError)572 else (asyncio.TimeoutError, TimeoutError)573)574575576class StreamChunkTimeoutError(*_StreamChunkTimeoutBases): # type: ignore[misc]577 """Raised when no streaming chunk arrives within `stream_chunk_timeout`.578579 `issubclass(StreamChunkTimeoutError, asyncio.TimeoutError)` and580 `issubclass(StreamChunkTimeoutError, TimeoutError)` both hold on all581 supported Python versions, so existing `except asyncio.TimeoutError:`582 and `except TimeoutError:` handlers keep catching the exception. On583 Python 3.11+ the two exceptions are the same object, so only584 `asyncio.TimeoutError` appears in `__bases__`.585586 Structured attributes (`timeout_s`, `model_name`, `chunks_received`)587 mirror the WARNING log's `extra=` payload so diagnostic code doesn't588 need to regex the message.589 """590591 def __init__(592 self,593 timeout_s: float,594 *,595 model_name: str | None = None,596 chunks_received: int = 0,597 ) -> None:598 self.timeout_s = timeout_s599 self.model_name = model_name600 self.chunks_received = chunks_received601 context = []602 if model_name:603 context.append(f"model={model_name}")604 context.append(f"chunks_received={chunks_received}")605 suffix = f" ({', '.join(context)})"606 super().__init__(607 f"No streaming chunk received for {timeout_s:.1f}s{suffix}. The "608 f"connection may be alive at the TCP layer but is not producing "609 f"content. Tune or disable via the `stream_chunk_timeout` "610 f"constructor kwarg (set to None or 0 to disable) or the "611 f"`LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S` env var. See also "612 f"`http_socket_options` for the kernel-level TCP timeout that "613 f"catches dead TCP peers."614 )615616617async def _astream_with_chunk_timeout(618 source: AsyncIterator[T],619 timeout: float | None,620 *,621 model_name: str | None = None,622) -> AsyncIterator[T]:623 """Yield from `source` but bound the per-chunk wait time.624625 If `timeout` is None or <=0, yields directly with no wall-clock bound.626 Otherwise, each `__anext__` is wrapped in627 `asyncio.wait_for(..., timeout)`. A timeout raises628 `StreamChunkTimeoutError` (a `TimeoutError` subclass) whose message629 names the knob, the env-var override, the model, and how many chunks630 were received before the stall. A single-line structured log also631 fires at WARNING so the signal is visible in aggregate logging systems632 even when the exception is caught upstream.633634 When the timeout is active, the source iterator is explicitly635 `aclose()`-d on early exit (timeout, consumer break, any exception) so636 the underlying httpx streaming connection is released promptly. The637 pass-through branch (timeout disabled) relies on httpx's GC-driven638 cleanup instead — matching the behavior of unwrapped streams.639 """640 if not timeout or timeout <= 0:641 async for item in source:642 yield item643 return644645 chunks_received = 0646 it = source.__aiter__()647 try:648 while True:649 try:650 chunk = await asyncio.wait_for(it.__anext__(), timeout=timeout)651 except StopAsyncIteration:652 return653 except asyncio.TimeoutError as e:654 logger.warning(655 "langchain_openai.stream_chunk_timeout fired",656 extra={657 "source": "stream_chunk_timeout",658 "timeout_s": timeout,659 "model_name": model_name,660 "chunks_received": chunks_received,661 },662 )663 raise StreamChunkTimeoutError(664 timeout,665 model_name=model_name,666 chunks_received=chunks_received,667 ) from e668 chunks_received += 1669 yield chunk670 finally:671 aclose = getattr(it, "aclose", None)672 if aclose is not None:673 try:674 await aclose()675 except Exception as cleanup_exc:676 # Best-effort cleanup; don't mask the original exception,677 # but leave a DEBUG trace so pool/transport bugs stay678 # discoverable at the right log level.679 logger.debug(680 "aclose() during _astream_with_chunk_timeout cleanup "681 "raised; ignoring",682 exc_info=cleanup_exc,683 )
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.