libs/partners/openai/langchain_openai/chat_models/base.py · langchain-ai/langchain

1"""OpenAI chat wrapper.23!!! warning "API scope"45        `ChatOpenAI` targets6        [official OpenAI API specifications](https://github.com/openai/openai-openapi)7        only. Non-standard response fields added by third-party providers (e.g.,8        `reasoning_content`, `reasoning_details`) are **not** extracted or9        preserved. If you are pointing `base_url` at a provider such as10        OpenRouter, vLLM, or DeepSeek, use the corresponding provider-specific11        LangChain package instead (e.g., `ChatDeepSeek`, `ChatOpenRouter`).12"""1314from __future__ import annotations1516import base6417import json18import logging19import os20import re21import ssl22import sys23import warnings24from collections.abc import (25    AsyncIterator,26    Awaitable,27    Callable,28    Iterator,29    Mapping,30    Sequence,31)32from functools import partial33from io import BytesIO34from json import JSONDecodeError35from math import ceil36from operator import itemgetter37from typing import (38    TYPE_CHECKING,39    Any,40    Literal,41    TypeAlias,42    TypeVar,43    cast,44)45from urllib.parse import urlparse4647import certifi48import openai49import tiktoken50from langchain_core.callbacks import (51    AsyncCallbackManagerForLLMRun,52    CallbackManagerForLLMRun,53)54from langchain_core.exceptions import ContextOverflowError55from langchain_core.language_models import (56    LanguageModelInput,57    ModelProfileRegistry,58)59from langchain_core.language_models.chat_models import (60    BaseChatModel,61    LangSmithParams,62)63from langchain_core.messages import (64    AIMessage,65    AIMessageChunk,66    BaseMessage,67    BaseMessageChunk,68    ChatMessage,69    ChatMessageChunk,70    FunctionMessage,71    FunctionMessageChunk,72    HumanMessage,73    HumanMessageChunk,74    InvalidToolCall,75    SystemMessage,76    SystemMessageChunk,77    ToolCall,78    ToolMessage,79    ToolMessageChunk,80    is_data_content_block,81)82from langchain_core.messages import content as types83from langchain_core.messages.ai import (84    InputTokenDetails,85    OutputTokenDetails,86    UsageMetadata,87)88from langchain_core.messages.block_translators.openai import (89    _convert_from_v03_ai_message,90    convert_to_openai_data_block,91)92from langchain_core.messages.tool import tool_call_chunk93from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser94from langchain_core.output_parsers.openai_tools import (95    JsonOutputKeyToolsParser,96    PydanticToolsParser,97    make_invalid_tool_call,98    parse_tool_call,99)100from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult101from langchain_core.runnables import (102    Runnable,103    RunnableLambda,104    RunnableMap,105    RunnablePassthrough,106)107from langchain_core.runnables.config import run_in_executor108from langchain_core.tools import BaseTool109from langchain_core.tools.base import _stringify110from langchain_core.utils import get_pydantic_field_names111from langchain_core.utils.function_calling import (112    convert_to_openai_function,113    convert_to_openai_tool,114)115from langchain_core.utils.pydantic import (116    PydanticBaseModel,117    TypeBaseModel,118    is_basemodel_subclass,119)120from langchain_core.utils.utils import _build_model_kwargs, from_env, secret_from_env121from pydantic import (122    BaseModel,123    ConfigDict,124    Field,125    SecretStr,126    ValidationError,127    field_validator,128    model_validator,129)130from pydantic.v1 import BaseModel as BaseModelV1131from typing_extensions import Self132133from langchain_openai.chat_models._client_utils import (134    _astream_with_chunk_timeout,135    _build_proxied_async_httpx_client,136    _build_proxied_sync_httpx_client,137    _float_env,138    _get_default_async_httpx_client,139    _get_default_httpx_client,140    _log_proxy_env_bypass_once,141    _resolve_socket_options,142    _resolve_sync_and_async_api_keys,143    _should_bypass_socket_options_for_proxy_env,144    _warn_if_proxy_env_shadowed,145)146from langchain_openai.chat_models._compat import (147    _convert_from_v1_to_chat_completions,148    _convert_from_v1_to_responses,149    _convert_to_v03_ai_message,150)151from langchain_openai.data._profiles import _PROFILES152153if TYPE_CHECKING:154    import httpx155    from langchain_core.language_models import ModelProfile156    from openai.types.responses import Response157158logger = logging.getLogger(__name__)159160# This SSL context is equivalent to the default `verify=True`.161# https://www.python-httpx.org/advanced/ssl/#configuring-client-instances162global_ssl_context = ssl.create_default_context(cafile=certifi.where())163164_ssrf_client: httpx.Client | None = None165166167def _get_ssrf_safe_client() -> httpx.Client:168    global _ssrf_client169    if _ssrf_client is None:170        from langchain_core._security._transport import ssrf_safe_client171172        _ssrf_client = ssrf_safe_client(173            verify=global_ssl_context, follow_redirects=False174        )175    return _ssrf_client176177178_MODEL_PROFILES = cast(ModelProfileRegistry, _PROFILES)179180181def _get_default_model_profile(model_name: str) -> ModelProfile:182    default = _MODEL_PROFILES.get(model_name) or {}183    return default.copy()184185186WellKnownTools = (187    "file_search",188    "web_search_preview",189    "web_search",190    "computer_use_preview",191    "code_interpreter",192    "mcp",193    "image_generation",194    "tool_search",195)196197198def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:199    """Convert a dictionary to a LangChain message.200201    Args:202        _dict: The dictionary.203204    Returns:205        The LangChain message.206    """207    role = _dict.get("role")208    name = _dict.get("name")209    id_ = _dict.get("id")210    if role == "user":211        return HumanMessage(content=_dict.get("content", ""), id=id_, name=name)212    if role == "assistant":213        # Fix for azure214        # Also OpenAI returns None for tool invocations215        content = _dict.get("content", "") or ""216        additional_kwargs: dict = {}217        if function_call := _dict.get("function_call"):218            additional_kwargs["function_call"] = dict(function_call)219        tool_calls = []220        invalid_tool_calls = []221        if raw_tool_calls := _dict.get("tool_calls"):222            for raw_tool_call in raw_tool_calls:223                try:224                    tool_calls.append(parse_tool_call(raw_tool_call, return_id=True))225                except Exception as e:226                    invalid_tool_calls.append(227                        make_invalid_tool_call(raw_tool_call, str(e))228                    )229        if audio := _dict.get("audio"):230            additional_kwargs["audio"] = audio231        return AIMessage(232            content=content,233            additional_kwargs=additional_kwargs,234            name=name,235            id=id_,236            tool_calls=tool_calls,237            invalid_tool_calls=invalid_tool_calls,238        )239    if role in ("system", "developer"):240        additional_kwargs = {"__openai_role__": role} if role == "developer" else {}241        return SystemMessage(242            content=_dict.get("content", ""),243            name=name,244            id=id_,245            additional_kwargs=additional_kwargs,246        )247    if role == "function":248        return FunctionMessage(249            content=_dict.get("content", ""), name=cast(str, _dict.get("name")), id=id_250        )251    if role == "tool":252        additional_kwargs = {}253        if "name" in _dict:254            additional_kwargs["name"] = _dict["name"]255        return ToolMessage(256            content=_dict.get("content", ""),257            tool_call_id=cast(str, _dict.get("tool_call_id")),258            additional_kwargs=additional_kwargs,259            name=name,260            id=id_,261        )262    return ChatMessage(content=_dict.get("content", ""), role=role, id=id_)  # type: ignore[arg-type]263264265def _sanitize_chat_completions_content(content: str | list[dict]) -> str | list[dict]:266    """Sanitize content for chat/completions API.267268    For list content, filters text blocks to only keep 'type' and 'text' keys.269    """270    if isinstance(content, list):271        sanitized = []272        for block in content:273            if (274                isinstance(block, dict)275                and block.get("type") == "text"276                and "text" in block277            ):278                sanitized.append({"type": "text", "text": block["text"]})279            else:280                sanitized.append(block)281        return sanitized282    return content283284285def _format_message_content(286    content: Any,287    api: Literal["chat/completions", "responses"] = "chat/completions",288    role: str | None = None,289) -> Any:290    """Format message content."""291    if content and isinstance(content, list):292        formatted_content = []293        for block in content:294            # Remove unexpected block types295            if (296                isinstance(block, dict)297                and "type" in block298                and (299                    block["type"] in ("tool_use", "thinking", "reasoning_content")300                    or (301                        block["type"] in ("function_call", "code_interpreter_call")302                        and api == "chat/completions"303                    )304                )305            ):306                continue307            if (308                isinstance(block, dict)309                and is_data_content_block(block)310                # Responses API messages handled separately in _compat (parsed into311                # image generation calls)312                and not (api == "responses" and str(role).lower().startswith("ai"))313            ):314                formatted_content.append(convert_to_openai_data_block(block, api=api))315            # Anthropic image blocks316            elif (317                isinstance(block, dict)318                and block.get("type") == "image"319                and (source := block.get("source"))320                and isinstance(source, dict)321            ):322                if source.get("type") == "base64" and (323                    (media_type := source.get("media_type"))324                    and (data := source.get("data"))325                ):326                    formatted_content.append(327                        {328                            "type": "image_url",329                            "image_url": {"url": f"data:{media_type};base64,{data}"},330                        }331                    )332                elif source.get("type") == "url" and (url := source.get("url")):333                    formatted_content.append(334                        {"type": "image_url", "image_url": {"url": url}}335                    )336                else:337                    continue338            else:339                formatted_content.append(block)340    else:341        formatted_content = content342343    return formatted_content344345346def _convert_message_to_dict(347    message: BaseMessage,348    api: Literal["chat/completions", "responses"] = "chat/completions",349) -> dict:350    """Convert a LangChain message to dictionary format expected by OpenAI."""351    message_dict: dict[str, Any] = {352        "content": _format_message_content(message.content, api=api, role=message.type)353    }354    if (name := message.name or message.additional_kwargs.get("name")) is not None:355        message_dict["name"] = name356357    # populate role and additional message data358    if isinstance(message, ChatMessage):359        message_dict["role"] = message.role360    elif isinstance(message, HumanMessage):361        message_dict["role"] = "user"362    elif isinstance(message, AIMessage):363        message_dict["role"] = "assistant"364        if message.tool_calls or message.invalid_tool_calls:365            message_dict["tool_calls"] = [366                _lc_tool_call_to_openai_tool_call(tc) for tc in message.tool_calls367            ] + [368                _lc_invalid_tool_call_to_openai_tool_call(tc)369                for tc in message.invalid_tool_calls370            ]371        elif "tool_calls" in message.additional_kwargs:372            message_dict["tool_calls"] = message.additional_kwargs["tool_calls"]373            tool_call_supported_props = {"id", "type", "function"}374            message_dict["tool_calls"] = [375                {k: v for k, v in tool_call.items() if k in tool_call_supported_props}376                for tool_call in message_dict["tool_calls"]377            ]378        elif "function_call" in message.additional_kwargs:379            # OpenAI raises 400 if both function_call and tool_calls are present in the380            # same message.381            message_dict["function_call"] = message.additional_kwargs["function_call"]382        else:383            pass384        # If tool calls present, content null value should be None not empty string.385        if "function_call" in message_dict or "tool_calls" in message_dict:386            message_dict["content"] = message_dict["content"] or None387388        audio: dict[str, Any] | None = None389        for block in message.content:390            if (391                isinstance(block, dict)392                and block.get("type") == "audio"393                and (id_ := block.get("id"))394                and api != "responses"395            ):396                # openai doesn't support passing the data back - only the id397                # https://platform.openai.com/docs/guides/audio/multi-turn-conversations398                audio = {"id": id_}399        if not audio and "audio" in message.additional_kwargs:400            raw_audio = message.additional_kwargs["audio"]401            audio = (402                {"id": message.additional_kwargs["audio"]["id"]}403                if "id" in raw_audio404                else raw_audio405            )406        if audio:407            message_dict["audio"] = audio408    elif isinstance(message, SystemMessage):409        message_dict["role"] = message.additional_kwargs.get(410            "__openai_role__", "system"411        )412    elif isinstance(message, FunctionMessage):413        message_dict["role"] = "function"414    elif isinstance(message, ToolMessage):415        message_dict["role"] = "tool"416        message_dict["tool_call_id"] = message.tool_call_id417        message_dict["content"] = _sanitize_chat_completions_content(418            message_dict["content"]419        )420        supported_props = {"content", "role", "tool_call_id"}421        message_dict = {k: v for k, v in message_dict.items() if k in supported_props}422    else:423        msg = f"Got unknown type {message}"424        raise TypeError(msg)425    return message_dict426427428def _convert_delta_to_message_chunk(429    _dict: Mapping[str, Any], default_class: type[BaseMessageChunk]430) -> BaseMessageChunk:431    """Convert to a LangChain message chunk."""432    id_ = _dict.get("id")433    role = cast(str, _dict.get("role"))434    content = cast(str, _dict.get("content") or "")435    additional_kwargs: dict = {}436    if _dict.get("function_call"):437        function_call = dict(_dict["function_call"])438        if "name" in function_call and function_call["name"] is None:439            function_call["name"] = ""440        additional_kwargs["function_call"] = function_call441    tool_call_chunks = []442    if raw_tool_calls := _dict.get("tool_calls"):443        try:444            tool_call_chunks = [445                tool_call_chunk(446                    name=rtc["function"].get("name"),447                    args=rtc["function"].get("arguments"),448                    id=rtc.get("id"),449                    index=rtc["index"],450                )451                for rtc in raw_tool_calls452            ]453        except KeyError:454            pass455456    if role == "user" or default_class == HumanMessageChunk:457        return HumanMessageChunk(content=content, id=id_)458    if role == "assistant" or default_class == AIMessageChunk:459        return AIMessageChunk(460            content=content,461            additional_kwargs=additional_kwargs,462            id=id_,463            tool_call_chunks=tool_call_chunks,  # type: ignore[arg-type]464        )465    if role in ("system", "developer") or default_class == SystemMessageChunk:466        if role == "developer":467            additional_kwargs = {"__openai_role__": "developer"}468        else:469            additional_kwargs = {}470        return SystemMessageChunk(471            content=content, id=id_, additional_kwargs=additional_kwargs472        )473    if role == "function" or default_class == FunctionMessageChunk:474        return FunctionMessageChunk(content=content, name=_dict["name"], id=id_)475    if role == "tool" or default_class == ToolMessageChunk:476        return ToolMessageChunk(477            content=content, tool_call_id=_dict["tool_call_id"], id=id_478        )479    if role or default_class == ChatMessageChunk:480        return ChatMessageChunk(content=content, role=role, id=id_)481    return default_class(content=content, id=id_)  # type: ignore[call-arg]482483484def _update_token_usage(485    overall_token_usage: int | dict, new_usage: int | dict486) -> int | dict:487    # Token usage is either ints or dictionaries488    # `reasoning_tokens` is nested inside `completion_tokens_details`489    if isinstance(new_usage, int):490        if not isinstance(overall_token_usage, int):491            msg = (492                f"Got different types for token usage: "493                f"{type(new_usage)} and {type(overall_token_usage)}"494            )495            raise ValueError(msg)496        return new_usage + overall_token_usage497    if isinstance(new_usage, dict):498        if not isinstance(overall_token_usage, dict):499            msg = (500                f"Got different types for token usage: "501                f"{type(new_usage)} and {type(overall_token_usage)}"502            )503            raise ValueError(msg)504        return {505            k: _update_token_usage(overall_token_usage.get(k, 0), v)506            for k, v in new_usage.items()507        }508    warnings.warn(f"Unexpected type for token usage: {type(new_usage)}")509    return new_usage510511512class OpenAIContextOverflowError(openai.BadRequestError, ContextOverflowError):513    """BadRequestError raised when input exceeds OpenAI's context limit."""514515516class OpenAIAPIContextOverflowError(openai.APIError, ContextOverflowError):517    """APIError raised when input exceeds OpenAI's context limit."""518519520def _handle_openai_bad_request(e: openai.BadRequestError) -> None:521    if (522        "context_length_exceeded" in str(e)523        or "Input tokens exceed the configured limit" in e.message524    ):525        raise OpenAIContextOverflowError(526            message=e.message, response=e.response, body=e.body527        ) from e528    if (529        "'response_format' of type 'json_schema' is not supported with this model"530    ) in e.message:531        message = (532            "This model does not support OpenAI's structured output feature, which "533            "is the default method for `with_structured_output` as of "534            "langchain-openai==0.3. To use `with_structured_output` with this model, "535            'specify `method="function_calling"`.'536        )537        warnings.warn(message)538        raise e539    if "Invalid schema for response_format" in e.message:540        message = (541            "Invalid schema for OpenAI's structured output feature, which is the "542            "default method for `with_structured_output` as of langchain-openai==0.3. "543            'Specify `method="function_calling"` instead or update your schema. '544            "See supported schemas: "545            "https://platform.openai.com/docs/guides/structured-outputs#supported-schemas"546        )547        warnings.warn(message)548        raise e549    raise550551552def _handle_openai_api_error(e: openai.APIError) -> None:553    error_message = str(e)554    if "exceeds the context window" in error_message:555        raise OpenAIAPIContextOverflowError(556            message=e.message, request=e.request, body=e.body557        ) from e558    raise559560561_RESPONSES_API_ONLY_PREFIXES = (562    "gpt-5-pro",563    "gpt-5.2-pro",564    "gpt-5.4-pro",565    "gpt-5.5-pro",566)567568569def _model_prefers_responses_api(model_name: str | None) -> bool:570    if not model_name:571        return False572    return model_name.startswith(_RESPONSES_API_ONLY_PREFIXES) or "codex" in model_name573574575_BM = TypeVar("_BM", bound=BaseModel)576_DictOrPydanticClass: TypeAlias = dict[str, Any] | type[_BM] | type577_DictOrPydantic: TypeAlias = dict | _BM578579580class BaseChatOpenAI(BaseChatModel):581    """Base wrapper around OpenAI large language models for chat.582583    This base class targets584    [official OpenAI API specifications](https://github.com/openai/openai-openapi)585    only. Non-standard response fields added by third-party providers (e.g.,586    `reasoning_content`) are not extracted. Use a provider-specific subclass for587    full provider support.588    """589590    client: Any = Field(default=None, exclude=True)591592    async_client: Any = Field(default=None, exclude=True)593594    root_client: Any = Field(default=None, exclude=True)595596    root_async_client: Any = Field(default=None, exclude=True)597598    model_name: str = Field(default="gpt-3.5-turbo", alias="model")599    """Model name to use."""600601    temperature: float | None = None602    """What sampling temperature to use."""603604    model_kwargs: dict[str, Any] = Field(default_factory=dict)605    """Holds any model parameters valid for `create` call not explicitly specified."""606607    openai_api_key: (608        SecretStr | None | Callable[[], str] | Callable[[], Awaitable[str]]609    ) = Field(610        alias="api_key", default_factory=secret_from_env("OPENAI_API_KEY", default=None)611    )612    """API key to use.613614    Can be inferred from the `OPENAI_API_KEY` environment variable, or specified615    as a string, or sync or async callable that returns a string.616617    ??? example "Specify with environment variable"618619        ```bash620        export OPENAI_API_KEY=...621        ```622        ```python623        from langchain_openai import ChatOpenAI624625        model = ChatOpenAI(model="gpt-5-nano")626        ```627628    ??? example "Specify with a string"629630        ```python631        from langchain_openai import ChatOpenAI632633        model = ChatOpenAI(model="gpt-5-nano", api_key="...")634        ```635636    ??? example "Specify with a sync callable"637638        ```python639        from langchain_openai import ChatOpenAI640641        def get_api_key() -> str:642            # Custom logic to retrieve API key643            return "..."644645        model = ChatOpenAI(model="gpt-5-nano", api_key=get_api_key)646        ```647648    ??? example "Specify with an async callable"649650        ```python651        from langchain_openai import ChatOpenAI652653        async def get_api_key() -> str:654            # Custom async logic to retrieve API key655            return "..."656657        model = ChatOpenAI(model="gpt-5-nano", api_key=get_api_key)658        ```659    """660661    openai_api_base: str | None = Field(default=None, alias="base_url")662    """Base URL path for API requests, leave blank if not using a proxy or service emulator."""  # noqa: E501663664    openai_organization: str | None = Field(default=None, alias="organization")665    """Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""666667    # to support explicit proxy for OpenAI668    openai_proxy: str | None = Field(669        default_factory=from_env("OPENAI_PROXY", default=None)670    )671672    request_timeout: float | tuple[float, float] | Any | None = Field(673        default=None, alias="timeout"674    )675    """Timeout for requests to OpenAI completion API.676677    Can be float, `httpx.Timeout` or `None`.678    """679680    stream_usage: bool | None = None681    """Whether to include usage metadata in streaming output.682683    If enabled, an additional message chunk will be generated during the stream684    including usage metadata.685686    This parameter is enabled unless `openai_api_base` is set or the model is687    initialized with a custom client, as many chat completions APIs do not688    support streaming token usage.689690    !!! version-added "Added in `langchain-openai` 0.3.9"691692    !!! warning "Behavior changed in `langchain-openai` 0.3.35"693694        Enabled for default base URL and client.695    """696697    max_retries: int | None = None698    """Maximum number of retries to make when generating."""699700    presence_penalty: float | None = None701    """Penalizes repeated tokens."""702703    frequency_penalty: float | None = None704    """Penalizes repeated tokens according to frequency."""705706    seed: int | None = None707    """Seed for generation"""708709    logprobs: bool | None = None710    """Whether to return logprobs."""711712    top_logprobs: int | None = None713    """Number of most likely tokens to return at each token position, each with an714    associated log probability.715716    `logprobs` must be set to true if this parameter is used.717    """718719    logit_bias: dict[int, int] | None = None720    """Modify the likelihood of specified tokens appearing in the completion."""721722    streaming: bool = False723    """Whether to stream the results or not."""724725    n: int | None = None726    """Number of chat completions to generate for each prompt."""727728    top_p: float | None = None729    """Total probability mass of tokens to consider at each step."""730731    max_tokens: int | None = Field(default=None)732    """Maximum number of tokens to generate."""733734    reasoning_effort: str | None = None735    """Constrains effort on reasoning for reasoning models.736737    For use with the Chat Completions API. Reasoning models only.738739    Currently supported values are `'minimal'`, `'low'`, `'medium'`, and740    `'high'`. Reducing reasoning effort can result in faster responses and fewer741    tokens used on reasoning in a response.742    """743744    reasoning: dict[str, Any] | None = None745    """Reasoning parameters for reasoning models. None disables reasoning.746747    For use with the Responses API.748749    ```python750    reasoning={751        "effort": None,  # Default None; can be "low", "medium", or "high"752        "summary": "auto",  # Can be "auto", "concise", or "detailed"753    }754    ```755756    !!! version-added "Added in `langchain-openai` 0.3.24"757    """758759    verbosity: str | None = None760    """Controls the verbosity level of responses for reasoning models.761762    For use with the Responses API.763764    Currently supported values are `'low'`, `'medium'`, and `'high'`.765766    !!! version-added "Added in `langchain-openai` 0.3.28"767    """768769    tiktoken_model_name: str | None = None770    """The model name to pass to tiktoken when using this class.771772    Tiktoken is used to count the number of tokens in documents to constrain773    them to be under a certain limit.774775    By default, when set to `None`, this will be the same as the embedding model name.776    However, there are some cases where you may want to use this `Embedding` class with777    a model name not supported by tiktoken. This can include when using Azure embeddings778    or when using one of the many model providers that expose an OpenAI-like779    API but with different models. In those cases, in order to avoid erroring780    when tiktoken is called, you can specify a model name to use here.781    """782783    default_headers: Mapping[str, str] | None = None784785    default_query: Mapping[str, object] | None = None786787    # Configure a custom httpx client. See the788    # [httpx documentation](https://www.python-httpx.org/api/#client) for more details.789    http_client: Any | None = Field(default=None, exclude=True)790    """Optional `httpx.Client`.791792    Only used for sync invocations. Must specify `http_async_client` as well if793    you'd like a custom client for async invocations.794    """795796    http_async_client: Any | None = Field(default=None, exclude=True)797    """Optional `httpx.AsyncClient`.798799    Only used for async invocations. Must specify `http_client` as well if you'd800    like a custom client for sync invocations.801    """802803    http_socket_options: Sequence[tuple[int, int, int]] | None = Field(804        default=None, exclude=True805    )806    """TCP socket options applied to the httpx transports built by this instance.807808    Defaults to a conservative TCP-keepalive + `TCP_USER_TIMEOUT` profile that809    targets a ~2-minute bound on silent connection hangs (silent mid-stream peer810    loss, gVisor/NAT idle timeouts, silent TCP black holes) on platforms that811    support the full option set. On platforms that only support a subset812    (macOS without `TCP_USER_TIMEOUT`, Windows with only `SO_KEEPALIVE`,813    minimal kernels), unsupported options are silently dropped and the bound814    degrades to whatever the remaining options + OS defaults provide — still815    better than indefinite hang.816817    Accepted values:818819    - `None` (default): use env-driven defaults. Matches the "unset" convention820        used by `http_client` elsewhere on this class.821    - `()` (empty): disable socket-option injection entirely. Inherits the OS822        defaults and restores httpx's native env-proxy auto-detection.823    - A non-empty sequence of `(level, option, value)` tuples: explicit824        override; passed verbatim to the transport (not filtered). Unsupported825        options raise `OSError` at connect time rather than being silently826        dropped — the user chose them explicitly.827828    Environment variables (only consulted when this field is `None`):829    `LANGCHAIN_OPENAI_TCP_KEEPALIVE` (set to `0` to disable entirely — the830    kill-switch), `LANGCHAIN_OPENAI_TCP_KEEPIDLE`,831    `LANGCHAIN_OPENAI_TCP_KEEPINTVL`, `LANGCHAIN_OPENAI_TCP_KEEPCNT`,832    `LANGCHAIN_OPENAI_TCP_USER_TIMEOUT_MS`.833834    Applied per side: if `http_client` is supplied, the sync path uses835    that user-owned client's socket options as-is; the async path still836    gets `http_socket_options` applied to its default builder (and837    vice-versa for `http_async_client`). Supply both to take full control.838839    !!! note "Interaction with env-proxy auto-detection"840841        When a custom `httpx` transport is active, `httpx` disables its842        native env-proxy auto-detection (`HTTP_PROXY` / `HTTPS_PROXY` /843        `ALL_PROXY` / `NO_PROXY` and macOS/Windows system proxy settings).844845        To keep the default shape safe, `ChatOpenAI` detects the846        "proxy-env-shadow" pattern and **skips the custom transport847        entirely** when **all** of the following hold:848849        - `http_socket_options` is left at its default (`None`)850        - No `http_client` or `http_async_client` supplied851        - No `openai_proxy` supplied852        - A proxy env var or system proxy is visible to httpx853854        On that specific shape, the instance falls back to pre-PR behavior855        and httpx's env-proxy auto-detection applies (a one-time `INFO` log856        records the bypass for observability).857858        If you explicitly set `http_socket_options=[...]` while a proxy859        env var is also set, no bypass — you opted into the transport, and860        a one-time `WARNING` records the shadowing. Set861        `http_socket_options=()` or `LANGCHAIN_OPENAI_TCP_KEEPALIVE=0` to862        disable transport injection explicitly, or pass a fully-configured863        `http_async_client` / `http_client` to take full control. The864        `openai_proxy` constructor kwarg is unaffected — socket options865        are applied cleanly through the proxied transport on that path.866    """867868    stream_chunk_timeout: float | None = Field(869        default_factory=lambda: _float_env(870            "LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S", 120.0871        ),872        exclude=True,873    )874    """Per-chunk wall-clock timeout (seconds) on async streaming responses.875876    Applies to async invocations only (`astream`, `ainvoke` with streaming,877    etc.). Sync streaming (`stream`) is not affected.878879    Fires between content chunks yielded by the openai SDK's streaming iterator880    (i.e., each call to `__anext__` on the response). Crucially, this is881    **not** the same as httpx's `timeout.read`:882883    - httpx's read timeout is inter-byte and gets reset every time *any* bytes884        arrive on the socket — including OpenAI's SSE keepalive comments885        (`: keepalive`) that trickle down during long model generations. A886        stream that's silent on *content* but still producing keepalives looks887        alive forever to httpx.888    - `stream_chunk_timeout` measures the gap between *parsed chunks*. The889        openai SDK's SSE parser consumes keepalive comments internally and does890        not emit them as chunks, so keepalives do *not* reset this timer. It891        fires on genuine content silence.892893    When it fires, a `StreamChunkTimeoutError`894    (subclass of `asyncio.TimeoutError`) is raised with a self-describing895    message naming this knob, the env-var override, the model, and the896    number of chunks received before the stall. A WARNING log with897    `extra={"source": "stream_chunk_timeout", "timeout_s": <value>,898    "model_name": <value>, "chunks_received": <value>}` also fires so899    aggregate logging can distinguish app-layer timeouts from900    transport-layer failures.901902    Defaults to 120s. Set to `None` or `0` to disable. Overridable via the903    `LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S` env var. Negative values904    (from either the env var or the constructor kwarg — e.g., hydrated905    from YAML/JSON configs) fall back to the default with a `WARNING` log906    rather than silently disabling the wrapper, so a misconfigured value907    still boots safely and the fallback is visible.908    """909910    stop: list[str] | str | None = Field(default=None, alias="stop_sequences")911    """Default stop sequences."""912913    extra_body: Mapping[str, Any] | None = None914    """Optional additional JSON properties to include in the request parameters915    when making requests to OpenAI compatible APIs, such as vLLM, LM Studio, or916    other providers.917918    This is the recommended way to pass custom parameters that are specific to your919    OpenAI-compatible API provider but not part of the standard OpenAI API.920921    Examples:922    - [LM Studio](https://lmstudio.ai/) TTL parameter: `extra_body={"ttl": 300}`923    - [vLLM](https://github.com/vllm-project/vllm) custom parameters:924        `extra_body={"use_beam_search": True}`925    - Any other provider-specific parameters926927    !!! warning928929        Do not use `model_kwargs` for custom parameters that are not part of the930        standard OpenAI API, as this will cause errors when making API calls. Use931        `extra_body` instead.932    """933934    include_response_headers: bool = False935    """Whether to include response headers in the output message `response_metadata`."""936937    disabled_params: dict[str, Any] | None = Field(default=None)938    """Parameters of the OpenAI client or `chat.completions` endpoint that should be939    disabled for the given model.940941    Should be specified as `{"param": None | ['val1', 'val2']}` where the key is the942    parameter and the value is either None, meaning that parameter should never be943    used, or it's a list of disabled values for the parameter.944945    For example, older models may not support the `'parallel_tool_calls'` parameter at946    all, in which case `disabled_params={"parallel_tool_calls": None}` can be passed947    in.948949    If a parameter is disabled then it will not be used by default in any methods, e.g.950    in `with_structured_output`. However this does not prevent a user from directly951    passed in the parameter during invocation.952    """953954    context_management: list[dict[str, Any]] | None = None955    """Configuration for956    [context management](https://developers.openai.com/api/docs/guides/compaction).957    """958959    include: list[str] | None = None960    """Additional fields to include in generations from Responses API.961962    Supported values:963964    - `'file_search_call.results'`965    - `'message.input_image.image_url'`966    - `'computer_call_output.output.image_url'`967    - `'reasoning.encrypted_content'`968    - `'code_interpreter_call.outputs'`969970    !!! version-added "Added in `langchain-openai` 0.3.24"971    """972973    service_tier: str | None = None974    """Latency tier for request.975976    Options are `'auto'`, `'default'`, or `'flex'`.977978    Relevant for users of OpenAI's scale tier service.979    """980981    store: bool | None = None982    """If `True`, OpenAI may store response data for future use.983984    Defaults to `True` for the Responses API and `False` for the Chat Completions API.985986    !!! version-added "Added in `langchain-openai` 0.3.24"987    """988989    truncation: str | None = None990    """Truncation strategy (Responses API).991992    Can be `'auto'` or `'disabled'` (default).993994    If `'auto'`, model may drop input items from the middle of the message sequence to995    fit the context window.996997    !!! version-added "Added in `langchain-openai` 0.3.24"998    """9991000    use_previous_response_id: bool = False1001    """If `True`, always pass `previous_response_id` using the ID of the most recent1002    response. Responses API only.10031004    Input messages up to the most recent response will be dropped from request1005    payloads.10061007    For example, the following two are equivalent:10081009    ```python1010    model = ChatOpenAI(1011        model="...",1012        use_previous_response_id=True,1013    )1014    model.invoke(1015        [1016            HumanMessage("Hello"),1017            AIMessage("Hi there!", response_metadata={"id": "resp_123"}),1018            HumanMessage("How are you?"),1019        ]1020    )1021    ```10221023    ```python1024    model = ChatOpenAI(model="...", use_responses_api=True)1025    model.invoke([HumanMessage("How are you?")], previous_response_id="resp_123")1026    ```10271028    !!! version-added "Added in `langchain-openai` 0.3.26"1029    """10301031    use_responses_api: bool | None = None1032    """Whether to use the Responses API instead of the Chat API.10331034    If not specified then will be inferred based on invocation params.10351036    !!! version-added "Added in `langchain-openai` 0.3.9"1037    """10381039    output_version: str | None = Field(1040        default_factory=from_env("LC_OUTPUT_VERSION", default=None)1041    )1042    """Version of `AIMessage` output format to use.10431044    This field is used to roll-out new output formats for chat model `AIMessage`1045    responses in a backwards-compatible way.10461047    Supported values:10481049    - `'v0'`: `AIMessage` format as of `langchain-openai 0.3.x`.1050    - `'responses/v1'`: Formats Responses API output items into AIMessage content blocks1051        (Responses API only)1052    - `'v1'`: v1 of LangChain cross-provider standard.10531054    !!! warning "Behavior changed in `langchain-openai` 1.0.0"10551056        Default updated to `"responses/v1"`.1057    """10581059    model_config = ConfigDict(populate_by_name=True)10601061    @property1062    def model(self) -> str:1063        """Same as model_name."""1064        return self.model_name10651066    @model_validator(mode="before")1067    @classmethod1068    def build_extra(cls, values: dict[str, Any]) -> Any:1069        """Build extra kwargs from additional params that were passed in."""1070        all_required_field_names = get_pydantic_field_names(cls)1071        return _build_model_kwargs(values, all_required_field_names)10721073    @field_validator("stream_chunk_timeout", mode="after")1074    @classmethod1075    def _validate_stream_chunk_timeout(cls, value: float | None) -> float | None:1076        """Reject negative constructor values; fall back to the env-driven default.10771078        Matches the env-var path in `_float_env`: a negative value is a typo,1079        not an opt-out (`None`/`0` are the documented off switches). Configs1080        hydrated from YAML/JSON would otherwise silently disable the wrapper1081        and reintroduce the indefinite-stream hang the feature prevents.1082        """1083        if value is not None and value < 0:1084            fallback = _float_env("LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S", 120.0)1085            logger.warning(1086                "Invalid `stream_chunk_timeout=%r` (negative); "1087                "falling back to %s. Pass `None` or `0` to disable.",1088                value,1089                fallback,1090            )1091            return fallback1092        return value10931094    @model_validator(mode="before")1095    @classmethod1096    def validate_temperature(cls, values: dict[str, Any]) -> Any:1097        """Validate temperature parameter for different models.10981099        - gpt-5 models (excluding gpt-5-chat) only allow `temperature=1` or unset1100            (Defaults to 1)1101        """1102        model = values.get("model_name") or values.get("model") or ""1103        model_lower = model.lower()11041105        # For o1 models, set temperature=1 if not provided1106        if model_lower.startswith("o1") and "temperature" not in values:1107            values["temperature"] = 111081109        # For gpt-5 models, handle temperature restrictions. Temperature is supported1110        # by gpt-5-chat and gpt-5 models with reasoning_effort='none' or1111        # reasoning={'effort': 'none'}.1112        if (1113            model_lower.startswith("gpt-5")1114            and ("chat" not in model_lower)1115            and values.get("reasoning_effort") != "none"1116            and (values.get("reasoning") or {}).get("effort") != "none"1117        ):1118            temperature = values.get("temperature")1119            if temperature is not None and temperature != 1:1120                # For gpt-5 (non-chat), only temperature=1 is supported1121                # So we remove any non-defaults1122                values.pop("temperature", None)11231124        return values11251126    @model_validator(mode="after")1127    def validate_environment(self) -> Self:1128        """Validate that api key and python package exists in environment."""1129        if self.n is not None and self.n < 1:1130            msg = "n must be at least 1."1131            raise ValueError(msg)1132        if self.n is not None and self.n > 1 and self.streaming:1133            msg = "n must be 1 when streaming."1134            raise ValueError(msg)11351136        # Check OPENAI_ORGANIZATION for backwards compatibility.1137        self.openai_organization = (1138            self.openai_organization1139            or os.getenv("OPENAI_ORG_ID")1140            or os.getenv("OPENAI_ORGANIZATION")1141        )1142        self.openai_api_base = self.openai_api_base or os.getenv("OPENAI_API_BASE")11431144        # Enable stream_usage by default if using default base URL and client1145        if (1146            all(1147                getattr(self, key, None) is None1148                for key in (1149                    "stream_usage",1150                    "openai_proxy",1151                    "openai_api_base",1152                    "base_url",1153                    "client",1154                    "root_client",1155                    "async_client",1156                    "root_async_client",1157                    "http_client",1158                    "http_async_client",1159                )1160            )1161            and "OPENAI_BASE_URL" not in os.environ1162        ):1163            self.stream_usage = True11641165        # Resolve API key from SecretStr or Callable1166        sync_api_key_value: str | Callable[[], str] | None = None1167        async_api_key_value: str | Callable[[], Awaitable[str]] | None = None11681169        if self.openai_api_key is not None:1170            # Because OpenAI and AsyncOpenAI clients support either sync or async1171            # callables for the API key, we need to resolve separate values here.1172            sync_api_key_value, async_api_key_value = _resolve_sync_and_async_api_keys(1173                self.openai_api_key1174            )11751176        client_params: dict = {1177            "organization": self.openai_organization,1178            "base_url": self.openai_api_base,1179            "timeout": self.request_timeout,1180            "default_headers": self.default_headers,1181            "default_query": self.default_query,1182        }1183        if self.max_retries is not None:1184            client_params["max_retries"] = self.max_retries11851186        if self.openai_proxy and (self.http_client or self.http_async_client):1187            openai_proxy = self.openai_proxy1188            http_client = self.http_client1189            http_async_client = self.http_async_client1190            msg = (1191                "Cannot specify 'openai_proxy' if one of "1192                "'http_client'/'http_async_client' is already specified. Received:\n"1193                f"{openai_proxy=}\n{http_client=}\n{http_async_client=}"1194            )1195            raise ValueError(msg)1196        if _should_bypass_socket_options_for_proxy_env(1197            http_socket_options=self.http_socket_options,1198            http_client=self.http_client,1199            http_async_client=self.http_async_client,1200            openai_proxy=self.openai_proxy,1201        ):1202            # Default-shape construction + proxy env var visible to httpx:1203            # skip the custom transport so httpx's env-proxy auto-detection1204            # still applies. Users who want kernel-level TCP tuning alongside1205            # an env proxy can opt in explicitly via `http_socket_options`.1206            resolved_socket_options: tuple[tuple[int, int, int], ...] = ()1207            _log_proxy_env_bypass_once()1208        else:1209            resolved_socket_options = _resolve_socket_options(self.http_socket_options)1210            _warn_if_proxy_env_shadowed(1211                resolved_socket_options, openai_proxy=self.openai_proxy1212            )1213        if not self.client:1214            if sync_api_key_value is None:1215                # No valid sync API key, leave client as None and raise informative1216                # error on invocation.1217                self.client = None1218                self.root_client = None1219            else:1220                if self.openai_proxy and not self.http_client:1221                    self.http_client = _build_proxied_sync_httpx_client(1222                        proxy=self.openai_proxy,1223                        verify=global_ssl_context,1224                        socket_options=resolved_socket_options,1225                    )1226                sync_specific = {1227                    "http_client": self.http_client1228                    or _get_default_httpx_client(1229                        self.openai_api_base,1230                        self.request_timeout,1231                        resolved_socket_options,1232                    ),1233                    "api_key": sync_api_key_value,1234                }1235                self.root_client = openai.OpenAI(**client_params, **sync_specific)  # type: ignore[arg-type]1236                self.client = self.root_client.chat.completions1237        if not self.async_client:1238            if self.openai_proxy and not self.http_async_client:1239                self.http_async_client = _build_proxied_async_httpx_client(1240                    proxy=self.openai_proxy,1241                    verify=global_ssl_context,1242                    socket_options=resolved_socket_options,1243                )1244            async_specific = {1245                "http_client": self.http_async_client1246                or _get_default_async_httpx_client(1247                    self.openai_api_base,1248                    self.request_timeout,1249                    resolved_socket_options,1250                ),1251                "api_key": async_api_key_value,1252            }1253            self.root_async_client = openai.AsyncOpenAI(1254                **client_params,1255                **async_specific,  # type: ignore[arg-type]1256            )1257            self.async_client = self.root_async_client.chat.completions1258        return self12591260    def _resolve_model_profile(self) -> ModelProfile | None:1261        return _get_default_model_profile(self.model_name) or None12621263    @property1264    def _default_params(self) -> dict[str, Any]:1265        """Get the default parameters for calling OpenAI API."""1266        exclude_if_none = {1267            "presence_penalty": self.presence_penalty,1268            "frequency_penalty": self.frequency_penalty,1269            "seed": self.seed,1270            "top_p": self.top_p,1271            "logprobs": self.logprobs,1272            "top_logprobs": self.top_logprobs,1273            "logit_bias": self.logit_bias,1274            "stop": self.stop or None,  # Also exclude empty list for this1275            "max_tokens": self.max_tokens,1276            "extra_body": self.extra_body,1277            "n": self.n,1278            "temperature": self.temperature,1279            "reasoning_effort": self.reasoning_effort,1280            "reasoning": self.reasoning,1281            "verbosity": self.verbosity,1282            "context_management": self.context_management,1283            "include": self.include,1284            "service_tier": self.service_tier,1285            "truncation": self.truncation,1286            "store": self.store,1287        }12881289        return {1290            "model": self.model_name,1291            "stream": self.streaming,1292            **{k: v for k, v in exclude_if_none.items() if v is not None},1293            **self.model_kwargs,1294        }12951296    def _combine_llm_outputs(self, llm_outputs: list[dict | None]) -> dict:1297        overall_token_usage: dict = {}1298        system_fingerprint = None1299        for output in llm_outputs:1300            if output is None:1301                # Happens in streaming1302                continue1303            token_usage = output.get("token_usage")1304            if token_usage is not None:1305                for k, v in token_usage.items():1306                    if v is None:1307                        continue1308                    if k in overall_token_usage:1309                        overall_token_usage[k] = _update_token_usage(1310                            overall_token_usage[k], v1311                        )1312                    else:1313                        overall_token_usage[k] = v1314            if system_fingerprint is None:1315                system_fingerprint = output.get("system_fingerprint")1316        combined = {"token_usage": overall_token_usage, "model_name": self.model_name}1317        if system_fingerprint:1318            combined["system_fingerprint"] = system_fingerprint1319        return combined13201321    def _convert_chunk_to_generation_chunk(1322        self,1323        chunk: dict,1324        default_chunk_class: type,1325        base_generation_info: dict | None,1326    ) -> ChatGenerationChunk | None:1327        if chunk.get("type") == "content.delta":  # From beta.chat.completions.stream1328            return None1329        token_usage = chunk.get("usage")1330        choices = (1331            chunk.get("choices", [])1332            # From beta.chat.completions.stream1333            or chunk.get("chunk", {}).get("choices", [])1334        )13351336        usage_metadata: UsageMetadata | None = (1337            _create_usage_metadata(token_usage, chunk.get("service_tier"))1338            if token_usage1339            else None1340        )1341        if len(choices) == 0:1342            # logprobs is implicitly None1343            generation_chunk = ChatGenerationChunk(1344                message=default_chunk_class(content="", usage_metadata=usage_metadata),1345                generation_info=base_generation_info,1346            )1347            if self.output_version == "v1":1348                generation_chunk.message.content = []1349                generation_chunk.message.response_metadata["output_version"] = "v1"13501351            return generation_chunk13521353        choice = choices[0]1354        if choice["delta"] is None:1355            return None13561357        message_chunk = _convert_delta_to_message_chunk(1358            choice["delta"], default_chunk_class1359        )1360        generation_info = {**base_generation_info} if base_generation_info else {}13611362        if finish_reason := choice.get("finish_reason"):1363            generation_info["finish_reason"] = finish_reason1364            if model_name := chunk.get("model"):1365                generation_info["model_name"] = model_name1366            if system_fingerprint := chunk.get("system_fingerprint"):1367                generation_info["system_fingerprint"] = system_fingerprint1368            if service_tier := chunk.get("service_tier"):1369                generation_info["service_tier"] = service_tier13701371        logprobs = choice.get("logprobs")1372        if logprobs:1373            generation_info["logprobs"] = logprobs13741375        if usage_metadata and isinstance(message_chunk, AIMessageChunk):1376            message_chunk.usage_metadata = usage_metadata13771378        message_chunk.response_metadata["model_provider"] = "openai"1379        return ChatGenerationChunk(1380            message=message_chunk, generation_info=generation_info or None1381        )13821383    def _ensure_sync_client_available(self) -> None:1384        """Check that sync client is available, raise error if not."""1385        if self.client is None:1386            msg = (1387                "Sync client is not available. This happens when an async callable "1388                "was provided for the API key. Use async methods (ainvoke, astream) "1389                "instead, or provide a string or sync callable for the API key."1390            )1391            raise ValueError(msg)13921393    def _stream_responses(1394        self,1395        messages: list[BaseMessage],1396        stop: list[str] | None = None,1397        run_manager: CallbackManagerForLLMRun | None = None,1398        **kwargs: Any,1399    ) -> Iterator[ChatGenerationChunk]:1400        self._ensure_sync_client_available()1401        kwargs["stream"] = True1402        payload = self._get_request_payload(messages, stop=stop, **kwargs)1403        try:1404            if self.include_response_headers:1405                raw_context_manager = (1406                    self.root_client.with_raw_response.responses.create(**payload)1407                )1408                context_manager = raw_context_manager.parse()1409                headers = {"headers": dict(raw_context_manager.headers)}1410            else:1411                context_manager = self.root_client.responses.create(**payload)1412                headers = {}1413            original_schema_obj = kwargs.get("response_format")14141415            with context_manager as response:1416                is_first_chunk = True1417                current_index = -11418                current_output_index = -11419                current_sub_index = -11420                has_reasoning = False1421                for chunk in response:1422                    metadata = headers if is_first_chunk else {}1423                    (1424                        current_index,1425                        current_output_index,1426                        current_sub_index,1427                        generation_chunk,1428                    ) = _convert_responses_chunk_to_generation_chunk(1429                        chunk,1430                        current_index,1431                        current_output_index,1432                        current_sub_index,1433                        schema=original_schema_obj,1434                        metadata=metadata,1435                        has_reasoning=has_reasoning,1436                        output_version=self.output_version,1437                    )1438                    if generation_chunk:1439                        if run_manager:1440                            run_manager.on_llm_new_token(1441                                generation_chunk.text, chunk=generation_chunk1442                            )1443                        is_first_chunk = False1444                        if "reasoning" in generation_chunk.message.additional_kwargs:1445                            has_reasoning = True1446                        yield generation_chunk1447        except openai.BadRequestError as e:1448            _handle_openai_bad_request(e)1449        except openai.APIError as e:1450            _handle_openai_api_error(e)14511452    async def _astream_responses(1453        self,1454        messages: list[BaseMessage],1455        stop: list[str] | None = None,1456        run_manager: AsyncCallbackManagerForLLMRun | None = None,1457        **kwargs: Any,1458    ) -> AsyncIterator[ChatGenerationChunk]:1459        kwargs["stream"] = True1460        payload = self._get_request_payload(messages, stop=stop, **kwargs)1461        try:1462            if self.include_response_headers:1463                raw_context_manager = (1464                    await self.root_async_client.with_raw_response.responses.create(1465                        **payload1466                    )1467                )1468                context_manager = raw_context_manager.parse()1469                headers = {"headers": dict(raw_context_manager.headers)}1470            else:1471                context_manager = await self.root_async_client.responses.create(1472                    **payload1473                )1474                headers = {}1475            original_schema_obj = kwargs.get("response_format")14761477            async with context_manager as response:1478                is_first_chunk = True1479                current_index = -11480                current_output_index = -11481                current_sub_index = -11482                has_reasoning = False1483                async for chunk in _astream_with_chunk_timeout(1484                    response,1485                    self.stream_chunk_timeout,1486                    model_name=self.model_name,1487                ):1488                    metadata = headers if is_first_chunk else {}1489                    (1490                        current_index,1491                        current_output_index,1492                        current_sub_index,1493                        generation_chunk,1494                    ) = _convert_responses_chunk_to_generation_chunk(1495                        chunk,1496                        current_index,1497                        current_output_index,1498                        current_sub_index,1499                        schema=original_schema_obj,1500                        metadata=metadata,1501                        has_reasoning=has_reasoning,1502                        output_version=self.output_version,1503                    )1504                    if generation_chunk:1505                        if run_manager:1506                            await run_manager.on_llm_new_token(1507                                generation_chunk.text, chunk=generation_chunk1508                            )1509                        is_first_chunk = False1510                        if "reasoning" in generation_chunk.message.additional_kwargs:1511                            has_reasoning = True1512                        yield generation_chunk1513        except openai.BadRequestError as e:1514            _handle_openai_bad_request(e)1515        except openai.APIError as e:1516            _handle_openai_api_error(e)15171518    def _should_stream_usage(1519        self, stream_usage: bool | None = None, **kwargs: Any1520    ) -> bool:1521        """Determine whether to include usage metadata in streaming output.15221523        For backwards compatibility, we check for `stream_options` passed1524        explicitly to kwargs or in the `model_kwargs` and override `self.stream_usage`.1525        """1526        stream_usage_sources = [  # order of precedence1527            stream_usage,1528            kwargs.get("stream_options", {}).get("include_usage"),1529            self.model_kwargs.get("stream_options", {}).get("include_usage"),1530            self.stream_usage,1531        ]1532        for source in stream_usage_sources:1533            if isinstance(source, bool):1534                return source1535        return self.stream_usage or False15361537    def _stream(1538        self,1539        messages: list[BaseMessage],1540        stop: list[str] | None = None,1541        run_manager: CallbackManagerForLLMRun | None = None,1542        *,1543        stream_usage: bool | None = None,1544        **kwargs: Any,1545    ) -> Iterator[ChatGenerationChunk]:1546        self._ensure_sync_client_available()1547        kwargs["stream"] = True1548        stream_usage = self._should_stream_usage(stream_usage, **kwargs)1549        if stream_usage:1550            kwargs["stream_options"] = {"include_usage": stream_usage}1551        payload = self._get_request_payload(messages, stop=stop, **kwargs)1552        default_chunk_class: type[BaseMessageChunk] = AIMessageChunk1553        base_generation_info = {}15541555        try:1556            if "response_format" in payload:1557                if self.include_response_headers:1558                    warnings.warn(1559                        "Cannot currently include response headers when "1560                        "response_format is specified."1561                    )1562                payload.pop("stream")1563                response_stream = self.root_client.beta.chat.completions.stream(1564                    **payload1565                )1566                context_manager = response_stream1567            else:1568                if self.include_response_headers:1569                    raw_response = self.client.with_raw_response.create(**payload)1570                    response = raw_response.parse()1571                    base_generation_info = {"headers": dict(raw_response.headers)}1572                else:1573                    response = self.client.create(**payload)1574                context_manager = response1575            with context_manager as response:1576                is_first_chunk = True1577                for chunk in response:1578                    if not isinstance(chunk, dict):1579                        chunk = chunk.model_dump()1580                    generation_chunk = self._convert_chunk_to_generation_chunk(1581                        chunk,1582                        default_chunk_class,1583                        base_generation_info if is_first_chunk else {},1584                    )1585                    if generation_chunk is None:1586                        continue1587                    default_chunk_class = generation_chunk.message.__class__1588                    logprobs = (generation_chunk.generation_info or {}).get("logprobs")1589                    if run_manager:1590                        run_manager.on_llm_new_token(1591                            generation_chunk.text,1592                            chunk=generation_chunk,1593                            logprobs=logprobs,1594                        )1595                    is_first_chunk = False1596                    yield generation_chunk1597        except openai.BadRequestError as e:1598            _handle_openai_bad_request(e)1599        except openai.APIError as e:1600            _handle_openai_api_error(e)1601        if hasattr(response, "get_final_completion") and "response_format" in payload:1602            final_completion = response.get_final_completion()1603            generation_chunk = self._get_generation_chunk_from_completion(1604                final_completion1605            )1606            if run_manager:1607                run_manager.on_llm_new_token(1608                    generation_chunk.text, chunk=generation_chunk1609                )1610            yield generation_chunk16111612    def _generate(1613        self,1614        messages: list[BaseMessage],1615        stop: list[str] | None = None,1616        run_manager: CallbackManagerForLLMRun | None = None,1617        **kwargs: Any,1618    ) -> ChatResult:1619        self._ensure_sync_client_available()1620        payload = self._get_request_payload(messages, stop=stop, **kwargs)1621        generation_info = None1622        raw_response = None1623        try:1624            if "response_format" in payload:1625                payload.pop("stream")1626                raw_response = (1627                    self.root_client.chat.completions.with_raw_response.parse(**payload)1628                )1629                response = raw_response.parse()1630            elif self._use_responses_api(payload):1631                original_schema_obj = kwargs.get("response_format")1632                if original_schema_obj and _is_pydantic_class(original_schema_obj):1633                    raw_response = self.root_client.responses.with_raw_response.parse(1634                        **payload1635                    )1636                else:1637                    raw_response = self.root_client.responses.with_raw_response.create(1638                        **payload1639                    )1640                response = raw_response.parse()1641                if self.include_response_headers:1642                    generation_info = {"headers": dict(raw_response.headers)}1643                return _construct_lc_result_from_responses_api(1644                    response,1645                    schema=original_schema_obj,1646                    metadata=generation_info,1647                    output_version=self.output_version,1648                )1649            else:1650                raw_response = self.client.with_raw_response.create(**payload)1651                response = raw_response.parse()1652        except openai.BadRequestError as e:1653            _handle_openai_bad_request(e)1654        except openai.APIError as e:1655            _handle_openai_api_error(e)1656        except Exception as e:1657            if raw_response is not None and hasattr(raw_response, "http_response"):1658                e.response = raw_response.http_response  # type: ignore[attr-defined]1659            raise e1660        if (1661            self.include_response_headers1662            and raw_response is not None1663            and hasattr(raw_response, "headers")1664        ):1665            generation_info = {"headers": dict(raw_response.headers)}1666        return self._create_chat_result(response, generation_info)16671668    def _use_responses_api(self, payload: dict) -> bool:1669        if isinstance(self.use_responses_api, bool):1670            return self.use_responses_api1671        if (1672            self.output_version == "responses/v1"1673            or self.context_management is not None1674            or self.include is not None1675            or self.reasoning is not None1676            or self.truncation is not None1677            or self.use_previous_response_id1678            or _model_prefers_responses_api(self.model_name)1679        ):1680            return True1681        return _use_responses_api(payload)16821683    def _get_request_payload(1684        self,1685        input_: LanguageModelInput,1686        *,1687        stop: list[str] | None = None,1688        **kwargs: Any,1689    ) -> dict:1690        messages = self._convert_input(input_).to_messages()1691        if stop is not None:1692            kwargs["stop"] = stop16931694        payload = {**self._default_params, **kwargs}16951696        if self._use_responses_api(payload):1697            if self.use_previous_response_id:1698                last_messages, previous_response_id = _get_last_messages(messages)1699                payload_to_use = last_messages if previous_response_id else messages1700                if previous_response_id:1701                    payload["previous_response_id"] = previous_response_id1702                payload = _construct_responses_api_payload(payload_to_use, payload)1703            else:1704                payload = _construct_responses_api_payload(messages, payload)1705        else:1706            payload["messages"] = [1707                _convert_message_to_dict(_convert_from_v1_to_chat_completions(m))1708                if isinstance(m, AIMessage)1709                else _convert_message_to_dict(m)1710                for m in messages1711            ]1712        return payload17131714    def _create_chat_result(1715        self,1716        response: dict | openai.BaseModel,1717        generation_info: dict | None = None,1718    ) -> ChatResult:1719        generations = []17201721        response_dict = (1722            response1723            if isinstance(response, dict)1724            # `parsed` may hold arbitrary Pydantic models from structured output.1725            # Exclude it from this dump and copy it from the typed response below.1726            else response.model_dump(1727                exclude={"choices": {"__all__": {"message": {"parsed"}}}}1728            )1729        )1730        # Sometimes the AI Model calling will get error, we should raise it (this is1731        # typically followed by a null value for `choices`, which we raise for1732        # separately below).1733        if response_dict.get("error"):1734            raise ValueError(response_dict.get("error"))17351736        # Raise informative error messages for non-OpenAI chat completions APIs1737        # that return malformed responses.1738        try:1739            choices = response_dict["choices"]1740        except KeyError as e:1741            msg = f"Response missing 'choices' key: {response_dict.keys()}"1742            raise KeyError(msg) from e17431744        if choices is None:1745            # Some OpenAI-compatible APIs (e.g., vLLM) may return null choices1746            # when the response format differs or an error occurs without1747            # populating the error field. Provide a more helpful error message.1748            msg = (1749                "Received response with null value for 'choices'. "1750                "This can happen when using OpenAI-compatible APIs (e.g., vLLM) "1751                "that return a response in an unexpected format. "1752                f"Full response keys: {list(response_dict.keys())}"1753            )1754            raise TypeError(msg)17551756        token_usage = response_dict.get("usage")1757        service_tier = response_dict.get("service_tier")17581759        for res in choices:1760            message = _convert_dict_to_message(res["message"])1761            if token_usage and isinstance(message, AIMessage):1762                message.usage_metadata = _create_usage_metadata(1763                    token_usage, service_tier1764                )1765            generation_info = generation_info or {}1766            generation_info["finish_reason"] = (1767                res.get("finish_reason")1768                if res.get("finish_reason") is not None1769                else generation_info.get("finish_reason")1770            )1771            if "logprobs" in res:1772                generation_info["logprobs"] = res["logprobs"]1773            gen = ChatGeneration(message=message, generation_info=generation_info)1774            generations.append(gen)1775        llm_output = {1776            "token_usage": token_usage,1777            "model_provider": "openai",1778            "model_name": response_dict.get("model", self.model_name),1779            "system_fingerprint": response_dict.get("system_fingerprint", ""),1780        }1781        if "id" in response_dict:1782            llm_output["id"] = response_dict["id"]1783        if service_tier:1784            llm_output["service_tier"] = service_tier17851786        if isinstance(response, openai.BaseModel) and getattr(1787            response, "choices", None1788        ):1789            message = response.choices[0].message  # type: ignore[attr-defined]1790            if hasattr(message, "parsed"):1791                generations[0].message.additional_kwargs["parsed"] = message.parsed1792            if hasattr(message, "refusal"):1793                generations[0].message.additional_kwargs["refusal"] = message.refusal17941795        return ChatResult(generations=generations, llm_output=llm_output)17961797    async def _astream(1798        self,1799        messages: list[BaseMessage],1800        stop: list[str] | None = None,1801        run_manager: AsyncCallbackManagerForLLMRun | None = None,1802        *,1803        stream_usage: bool | None = None,1804        **kwargs: Any,1805    ) -> AsyncIterator[ChatGenerationChunk]:1806        kwargs["stream"] = True1807        stream_usage = self._should_stream_usage(stream_usage, **kwargs)1808        if stream_usage:1809            kwargs["stream_options"] = {"include_usage": stream_usage}1810        payload = self._get_request_payload(messages, stop=stop, **kwargs)1811        default_chunk_class: type[BaseMessageChunk] = AIMessageChunk1812        base_generation_info = {}18131814        try:1815            if "response_format" in payload:1816                if self.include_response_headers:1817                    warnings.warn(1818                        "Cannot currently include response headers when "1819                        "response_format is specified."1820                    )1821                payload.pop("stream")1822                response_stream = self.root_async_client.beta.chat.completions.stream(1823                    **payload1824                )1825                context_manager = response_stream1826            else:1827                if self.include_response_headers:1828                    raw_response = await self.async_client.with_raw_response.create(1829                        **payload1830                    )1831                    response = raw_response.parse()1832                    base_generation_info = {"headers": dict(raw_response.headers)}1833                else:1834                    response = await self.async_client.create(**payload)1835                context_manager = response1836            async with context_manager as response:1837                is_first_chunk = True1838                async for chunk in _astream_with_chunk_timeout(1839                    response,1840                    self.stream_chunk_timeout,1841                    model_name=self.model_name,1842                ):1843                    if not isinstance(chunk, dict):1844                        chunk = chunk.model_dump()1845                    generation_chunk = self._convert_chunk_to_generation_chunk(1846                        chunk,1847                        default_chunk_class,1848                        base_generation_info if is_first_chunk else {},1849                    )1850                    if generation_chunk is None:1851                        continue1852                    default_chunk_class = generation_chunk.message.__class__1853                    logprobs = (generation_chunk.generation_info or {}).get("logprobs")1854                    if run_manager:1855                        await run_manager.on_llm_new_token(1856                            generation_chunk.text,1857                            chunk=generation_chunk,1858                            logprobs=logprobs,1859                        )1860                    is_first_chunk = False1861                    yield generation_chunk1862        except openai.BadRequestError as e:1863            _handle_openai_bad_request(e)1864        except openai.APIError as e:1865            _handle_openai_api_error(e)1866        if hasattr(response, "get_final_completion") and "response_format" in payload:1867            final_completion = await response.get_final_completion()1868            generation_chunk = self._get_generation_chunk_from_completion(1869                final_completion1870            )1871            if run_manager:1872                await run_manager.on_llm_new_token(1873                    generation_chunk.text, chunk=generation_chunk1874                )1875            yield generation_chunk18761877    async def _agenerate(1878        self,1879        messages: list[BaseMessage],1880        stop: list[str] | None = None,1881        run_manager: AsyncCallbackManagerForLLMRun | None = None,1882        **kwargs: Any,1883    ) -> ChatResult:1884        payload = self._get_request_payload(messages, stop=stop, **kwargs)1885        generation_info = None1886        raw_response = None1887        try:1888            if "response_format" in payload:1889                payload.pop("stream")1890                raw_response = await self.root_async_client.chat.completions.with_raw_response.parse(  # noqa: E5011891                    **payload1892                )1893                response = raw_response.parse()1894            elif self._use_responses_api(payload):1895                original_schema_obj = kwargs.get("response_format")1896                if original_schema_obj and _is_pydantic_class(original_schema_obj):1897                    raw_response = (1898                        await self.root_async_client.responses.with_raw_response.parse(1899                            **payload1900                        )1901                    )1902                else:1903                    raw_response = (1904                        await self.root_async_client.responses.with_raw_response.create(1905                            **payload1906                        )1907                    )1908                response = raw_response.parse()1909                if self.include_response_headers:1910                    generation_info = {"headers": dict(raw_response.headers)}1911                return _construct_lc_result_from_responses_api(1912                    response,1913                    schema=original_schema_obj,1914                    metadata=generation_info,1915                    output_version=self.output_version,1916                )1917            else:1918                raw_response = await self.async_client.with_raw_response.create(1919                    **payload1920                )1921                response = raw_response.parse()1922        except openai.BadRequestError as e:1923            _handle_openai_bad_request(e)1924        except openai.APIError as e:1925            _handle_openai_api_error(e)1926        except Exception as e:1927            if raw_response is not None and hasattr(raw_response, "http_response"):1928                e.response = raw_response.http_response  # type: ignore[attr-defined]1929            raise e1930        if (1931            self.include_response_headers1932            and raw_response is not None1933            and hasattr(raw_response, "headers")1934        ):1935            generation_info = {"headers": dict(raw_response.headers)}1936        return await run_in_executor(1937            None, self._create_chat_result, response, generation_info1938        )19391940    @property1941    def _identifying_params(self) -> dict[str, Any]:1942        """Get the identifying parameters."""1943        return {"model_name": self.model_name, **self._default_params}19441945    def _get_invocation_params(1946        self, stop: list[str] | None = None, **kwargs: Any1947    ) -> dict[str, Any]:1948        """Get the parameters used to invoke the model."""1949        params = {1950            "model": self.model_name,1951            **super()._get_invocation_params(stop=stop),1952            **self._default_params,1953            **kwargs,1954        }1955        # Redact headers from built-in remote MCP tool invocations1956        if (tools := params.get("tools")) and isinstance(tools, list):1957            params["tools"] = [1958                ({**tool, "headers": "**REDACTED**"} if "headers" in tool else tool)1959                if isinstance(tool, dict) and tool.get("type") == "mcp"1960                else tool1961                for tool in tools1962            ]19631964        return params19651966    def _get_ls_params(1967        self, stop: list[str] | None = None, **kwargs: Any1968    ) -> LangSmithParams:1969        """Get standard params for tracing."""1970        params = self._get_invocation_params(stop=stop, **kwargs)1971        ls_params = LangSmithParams(1972            ls_provider="openai",1973            ls_model_name=params.get("model", self.model_name),1974            ls_model_type="chat",1975            ls_temperature=params.get("temperature", self.temperature),1976        )1977        if ls_max_tokens := params.get("max_tokens", self.max_tokens) or params.get(1978            "max_completion_tokens", self.max_tokens1979        ):1980            ls_params["ls_max_tokens"] = ls_max_tokens1981        if ls_stop := stop or params.get("stop", None):1982            ls_params["ls_stop"] = ls_stop1983        return ls_params19841985    @property1986    def _llm_type(self) -> str:1987        """Return type of chat model.19881989        Will always return `'openai-chat'` regardless of the specific model name.1990        """1991        return "openai-chat"19921993    def _get_encoding_model(self) -> tuple[str, tiktoken.Encoding]:1994        if self.tiktoken_model_name is not None:1995            model = self.tiktoken_model_name1996        else:1997            model = self.model_name19981999        try:2000            encoding = tiktoken.encoding_for_model(model)
Findings

✓ No findings reported for this file.
Findings

Get this view in your editor