libs/partners/openai/langchain_openai/chat_models/base.py PYTHON 5,089 lines View on github.com → Search inside
File is large — showing lines 1–2,000 of 5,089.
1"""OpenAI chat wrapper.23!!! warning "API scope"45        `ChatOpenAI` targets6        [official OpenAI API specifications](https://github.com/openai/openai-openapi)7        only. Non-standard response fields added by third-party providers (e.g.,8        `reasoning_content`, `reasoning_details`) are **not** extracted or9        preserved. If you are pointing `base_url` at a provider such as10        OpenRouter, vLLM, or DeepSeek, use the corresponding provider-specific11        LangChain package instead (e.g., `ChatDeepSeek`, `ChatOpenRouter`).12"""1314from __future__ import annotations1516import base6417import json18import logging19import os20import re21import ssl22import sys23import warnings24from collections.abc import (25    AsyncIterator,26    Awaitable,27    Callable,28    Iterator,29    Mapping,30    Sequence,31)32from functools import partial33from io import BytesIO34from json import JSONDecodeError35from math import ceil36from operator import itemgetter37from typing import (38    TYPE_CHECKING,39    Any,40    Literal,41    TypeAlias,42    TypeVar,43    cast,44)45from urllib.parse import urlparse4647import certifi48import openai49import tiktoken50from langchain_core.callbacks import (51    AsyncCallbackManagerForLLMRun,52    CallbackManagerForLLMRun,53)54from langchain_core.exceptions import ContextOverflowError55from langchain_core.language_models import (56    LanguageModelInput,57    ModelProfileRegistry,58)59from langchain_core.language_models.chat_models import (60    BaseChatModel,61    LangSmithParams,62)63from langchain_core.messages import (64    AIMessage,65    AIMessageChunk,66    BaseMessage,67    BaseMessageChunk,68    ChatMessage,69    ChatMessageChunk,70    FunctionMessage,71    FunctionMessageChunk,72    HumanMessage,73    HumanMessageChunk,74    InvalidToolCall,75    SystemMessage,76    SystemMessageChunk,77    ToolCall,78    ToolMessage,79    ToolMessageChunk,80    is_data_content_block,81)82from langchain_core.messages import content as types83from langchain_core.messages.ai import (84    InputTokenDetails,85    OutputTokenDetails,86    UsageMetadata,87)88from langchain_core.messages.block_translators.openai import (89    _convert_from_v03_ai_message,90    convert_to_openai_data_block,91)92from langchain_core.messages.tool import tool_call_chunk93from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser94from langchain_core.output_parsers.openai_tools import (95    JsonOutputKeyToolsParser,96    PydanticToolsParser,97    make_invalid_tool_call,98    parse_tool_call,99)100from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult101from langchain_core.runnables import (102    Runnable,103    RunnableLambda,104    RunnableMap,105    RunnablePassthrough,106)107from langchain_core.runnables.config import run_in_executor108from langchain_core.tools import BaseTool109from langchain_core.tools.base import _stringify110from langchain_core.utils import get_pydantic_field_names111from langchain_core.utils.function_calling import (112    convert_to_openai_function,113    convert_to_openai_tool,114)115from langchain_core.utils.pydantic import (116    PydanticBaseModel,117    TypeBaseModel,118    is_basemodel_subclass,119)120from langchain_core.utils.utils import _build_model_kwargs, from_env, secret_from_env121from pydantic import (122    BaseModel,123    ConfigDict,124    Field,125    SecretStr,126    ValidationError,127    field_validator,128    model_validator,129)130from pydantic.v1 import BaseModel as BaseModelV1131from typing_extensions import Self132133from langchain_openai._version import __version__134from langchain_openai.chat_models._client_utils import (135    _astream_with_chunk_timeout,136    _build_proxied_async_httpx_client,137    _build_proxied_sync_httpx_client,138    _float_env,139    _get_default_async_httpx_client,140    _get_default_httpx_client,141    _log_proxy_env_bypass_once,142    _resolve_socket_options,143    _resolve_sync_and_async_api_keys,144    _should_bypass_socket_options_for_proxy_env,145    _warn_if_proxy_env_shadowed,146)147from langchain_openai.chat_models._compat import (148    _convert_from_v1_to_chat_completions,149    _convert_from_v1_to_responses,150    _convert_to_v03_ai_message,151)152from langchain_openai.data._profiles import _PROFILES153154if TYPE_CHECKING:155    import httpx156    from langchain_core.language_models import ModelProfile157    from openai.types.responses import Response158159logger = logging.getLogger(__name__)160161# This SSL context is equivalent to the default `verify=True`.162# https://www.python-httpx.org/advanced/ssl/#configuring-client-instances163global_ssl_context = ssl.create_default_context(cafile=certifi.where())164165_ssrf_client: httpx.Client | None = None166167168def _get_ssrf_safe_client() -> httpx.Client:169    global _ssrf_client170    if _ssrf_client is None:171        from langchain_core._security._transport import ssrf_safe_client172173        _ssrf_client = ssrf_safe_client(174            verify=global_ssl_context, follow_redirects=False175        )176    return _ssrf_client177178179_MODEL_PROFILES = cast(ModelProfileRegistry, _PROFILES)180181182def _get_default_model_profile(model_name: str) -> ModelProfile:183    default = _MODEL_PROFILES.get(model_name) or {}184    return default.copy()185186187WellKnownTools = (188    "file_search",189    "web_search_preview",190    "web_search",191    "computer_use_preview",192    "code_interpreter",193    "mcp",194    "image_generation",195    "tool_search",196    "apply_patch",197)198199200def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:201    """Convert a dictionary to a LangChain message.202203    Args:204        _dict: The dictionary.205206    Returns:207        The LangChain message.208    """209    role = _dict.get("role")210    name = _dict.get("name")211    id_ = _dict.get("id")212    if role == "user":213        return HumanMessage(content=_dict.get("content", ""), id=id_, name=name)214    if role == "assistant":215        # Fix for azure216        # Also OpenAI returns None for tool invocations217        content = _dict.get("content", "") or ""218        additional_kwargs: dict = {}219        if function_call := _dict.get("function_call"):220            additional_kwargs["function_call"] = dict(function_call)221        tool_calls = []222        invalid_tool_calls = []223        if raw_tool_calls := _dict.get("tool_calls"):224            for raw_tool_call in raw_tool_calls:225                try:226                    tool_calls.append(parse_tool_call(raw_tool_call, return_id=True))227                except Exception as e:228                    invalid_tool_calls.append(229                        make_invalid_tool_call(raw_tool_call, str(e))230                    )231        if audio := _dict.get("audio"):232            additional_kwargs["audio"] = audio233        return AIMessage(234            content=content,235            additional_kwargs=additional_kwargs,236            name=name,237            id=id_,238            tool_calls=tool_calls,239            invalid_tool_calls=invalid_tool_calls,240        )241    if role in ("system", "developer"):242        additional_kwargs = {"__openai_role__": role} if role == "developer" else {}243        return SystemMessage(244            content=_dict.get("content", ""),245            name=name,246            id=id_,247            additional_kwargs=additional_kwargs,248        )249    if role == "function":250        return FunctionMessage(251            content=_dict.get("content", ""), name=cast(str, _dict.get("name")), id=id_252        )253    if role == "tool":254        additional_kwargs = {}255        if "name" in _dict:256            additional_kwargs["name"] = _dict["name"]257        return ToolMessage(258            content=_dict.get("content", ""),259            tool_call_id=cast(str, _dict.get("tool_call_id")),260            additional_kwargs=additional_kwargs,261            name=name,262            id=id_,263        )264    return ChatMessage(content=_dict.get("content", ""), role=role, id=id_)  # type: ignore[arg-type]265266267def _sanitize_chat_completions_content(content: str | list[dict]) -> str | list[dict]:268    """Sanitize content for chat/completions API.269270    For list content, filters text blocks to only keep 'type' and 'text' keys.271    """272    if isinstance(content, list):273        sanitized = []274        for block in content:275            if (276                isinstance(block, dict)277                and block.get("type") == "text"278                and "text" in block279            ):280                sanitized.append({"type": "text", "text": block["text"]})281            else:282                sanitized.append(block)283        return sanitized284    return content285286287def _format_message_content(288    content: Any,289    api: Literal["chat/completions", "responses"] = "chat/completions",290    role: str | None = None,291) -> Any:292    """Format message content."""293    if content and isinstance(content, list):294        formatted_content = []295        for block in content:296            # Remove unexpected block types297            if (298                isinstance(block, dict)299                and "type" in block300                and (301                    block["type"] in ("tool_use", "thinking", "reasoning_content")302                    or (303                        block["type"] in ("function_call", "code_interpreter_call")304                        and api == "chat/completions"305                    )306                )307            ):308                continue309            if (310                isinstance(block, dict)311                and is_data_content_block(block)312                # Responses API messages handled separately in _compat (parsed into313                # image generation calls)314                and not (api == "responses" and str(role).lower().startswith("ai"))315            ):316                formatted_content.append(convert_to_openai_data_block(block, api=api))317            # Anthropic image blocks318            elif (319                isinstance(block, dict)320                and block.get("type") == "image"321                and (source := block.get("source"))322                and isinstance(source, dict)323            ):324                if source.get("type") == "base64" and (325                    (media_type := source.get("media_type"))326                    and (data := source.get("data"))327                ):328                    formatted_content.append(329                        {330                            "type": "image_url",331                            "image_url": {"url": f"data:{media_type};base64,{data}"},332                        }333                    )334                elif source.get("type") == "url" and (url := source.get("url")):335                    formatted_content.append(336                        {"type": "image_url", "image_url": {"url": url}}337                    )338                else:339                    continue340            else:341                formatted_content.append(block)342    else:343        formatted_content = content344345    return formatted_content346347348def _convert_message_to_dict(349    message: BaseMessage,350    api: Literal["chat/completions", "responses"] = "chat/completions",351) -> dict:352    """Convert a LangChain message to dictionary format expected by OpenAI."""353    message_dict: dict[str, Any] = {354        "content": _format_message_content(message.content, api=api, role=message.type)355    }356    if (name := message.name or message.additional_kwargs.get("name")) is not None:357        message_dict["name"] = name358359    # populate role and additional message data360    if isinstance(message, ChatMessage):361        message_dict["role"] = message.role362    elif isinstance(message, HumanMessage):363        message_dict["role"] = "user"364    elif isinstance(message, AIMessage):365        message_dict["role"] = "assistant"366        if message.tool_calls or message.invalid_tool_calls:367            message_dict["tool_calls"] = [368                _lc_tool_call_to_openai_tool_call(tc) for tc in message.tool_calls369            ] + [370                _lc_invalid_tool_call_to_openai_tool_call(tc)371                for tc in message.invalid_tool_calls372            ]373        elif "tool_calls" in message.additional_kwargs:374            message_dict["tool_calls"] = message.additional_kwargs["tool_calls"]375            tool_call_supported_props = {"id", "type", "function"}376            message_dict["tool_calls"] = [377                {k: v for k, v in tool_call.items() if k in tool_call_supported_props}378                for tool_call in message_dict["tool_calls"]379            ]380        elif "function_call" in message.additional_kwargs:381            # OpenAI raises 400 if both function_call and tool_calls are present in the382            # same message.383            message_dict["function_call"] = message.additional_kwargs["function_call"]384        else:385            pass386        # If tool calls present, content null value should be None not empty string.387        if "function_call" in message_dict or "tool_calls" in message_dict:388            message_dict["content"] = message_dict["content"] or None389390        audio: dict[str, Any] | None = None391        for block in message.content:392            if (393                isinstance(block, dict)394                and block.get("type") == "audio"395                and (id_ := block.get("id"))396                and api != "responses"397            ):398                # openai doesn't support passing the data back - only the id399                # https://platform.openai.com/docs/guides/audio/multi-turn-conversations400                audio = {"id": id_}401        if not audio and "audio" in message.additional_kwargs:402            raw_audio = message.additional_kwargs["audio"]403            audio = (404                {"id": message.additional_kwargs["audio"]["id"]}405                if "id" in raw_audio406                else raw_audio407            )408        if audio:409            message_dict["audio"] = audio410    elif isinstance(message, SystemMessage):411        message_dict["role"] = message.additional_kwargs.get(412            "__openai_role__", "system"413        )414    elif isinstance(message, FunctionMessage):415        message_dict["role"] = "function"416    elif isinstance(message, ToolMessage):417        message_dict["role"] = "tool"418        message_dict["tool_call_id"] = message.tool_call_id419        message_dict["content"] = _sanitize_chat_completions_content(420            message_dict["content"]421        )422        supported_props = {"content", "role", "tool_call_id"}423        message_dict = {k: v for k, v in message_dict.items() if k in supported_props}424    else:425        msg = f"Got unknown type {message}"426        raise TypeError(msg)427    return message_dict428429430def _convert_delta_to_message_chunk(431    _dict: Mapping[str, Any], default_class: type[BaseMessageChunk]432) -> BaseMessageChunk:433    """Convert to a LangChain message chunk."""434    id_ = _dict.get("id")435    role = cast(str, _dict.get("role"))436    content = cast(str, _dict.get("content") or "")437    additional_kwargs: dict = {}438    if _dict.get("function_call"):439        function_call = dict(_dict["function_call"])440        if "name" in function_call and function_call["name"] is None:441            function_call["name"] = ""442        additional_kwargs["function_call"] = function_call443    tool_call_chunks = []444    if raw_tool_calls := _dict.get("tool_calls"):445        try:446            tool_call_chunks = [447                tool_call_chunk(448                    name=rtc["function"].get("name"),449                    args=rtc["function"].get("arguments"),450                    id=rtc.get("id"),451                    index=rtc["index"],452                )453                for rtc in raw_tool_calls454            ]455        except KeyError:456            pass457458    if role == "user" or default_class == HumanMessageChunk:459        return HumanMessageChunk(content=content, id=id_)460    if role == "assistant" or default_class == AIMessageChunk:461        return AIMessageChunk(462            content=content,463            additional_kwargs=additional_kwargs,464            id=id_,465            tool_call_chunks=tool_call_chunks,  # type: ignore[arg-type]466        )467    if role in ("system", "developer") or default_class == SystemMessageChunk:468        if role == "developer":469            additional_kwargs = {"__openai_role__": "developer"}470        else:471            additional_kwargs = {}472        return SystemMessageChunk(473            content=content, id=id_, additional_kwargs=additional_kwargs474        )475    if role == "function" or default_class == FunctionMessageChunk:476        return FunctionMessageChunk(content=content, name=_dict["name"], id=id_)477    if role == "tool" or default_class == ToolMessageChunk:478        return ToolMessageChunk(479            content=content, tool_call_id=_dict["tool_call_id"], id=id_480        )481    if role or default_class == ChatMessageChunk:482        return ChatMessageChunk(content=content, role=role, id=id_)483    return default_class(content=content, id=id_)  # type: ignore[call-arg]484485486def _update_token_usage(487    overall_token_usage: int | dict, new_usage: int | dict488) -> int | dict:489    # Token usage is either ints or dictionaries490    # `reasoning_tokens` is nested inside `completion_tokens_details`491    if isinstance(new_usage, int):492        if not isinstance(overall_token_usage, int):493            msg = (494                f"Got different types for token usage: "495                f"{type(new_usage)} and {type(overall_token_usage)}"496            )497            raise ValueError(msg)498        return new_usage + overall_token_usage499    if isinstance(new_usage, dict):500        if not isinstance(overall_token_usage, dict):501            msg = (502                f"Got different types for token usage: "503                f"{type(new_usage)} and {type(overall_token_usage)}"504            )505            raise ValueError(msg)506        return {507            k: _update_token_usage(overall_token_usage.get(k, 0), v)508            for k, v in new_usage.items()509        }510    warnings.warn(f"Unexpected type for token usage: {type(new_usage)}")511    return new_usage512513514class OpenAIContextOverflowError(openai.BadRequestError, ContextOverflowError):515    """BadRequestError raised when input exceeds OpenAI's context limit."""516517518class OpenAIAPIContextOverflowError(openai.APIError, ContextOverflowError):519    """APIError raised when input exceeds OpenAI's context limit."""520521522def _handle_openai_bad_request(e: openai.BadRequestError) -> None:523    if (524        "context_length_exceeded" in str(e)525        or "Input tokens exceed the configured limit" in e.message526        or "prompt is too long" in e.message527    ):528        raise OpenAIContextOverflowError(529            message=e.message, response=e.response, body=e.body530        ) from e531    if (532        "'response_format' of type 'json_schema' is not supported with this model"533    ) in e.message:534        message = (535            "This model does not support OpenAI's structured output feature, which "536            "is the default method for `with_structured_output` as of "537            "langchain-openai==0.3. To use `with_structured_output` with this model, "538            'specify `method="function_calling"`.'539        )540        warnings.warn(message)541        raise e542    if "Invalid schema for response_format" in e.message:543        message = (544            "Invalid schema for OpenAI's structured output feature, which is the "545            "default method for `with_structured_output` as of langchain-openai==0.3. "546            'Specify `method="function_calling"` instead or update your schema. '547            "See supported schemas: "548            "https://platform.openai.com/docs/guides/structured-outputs#supported-schemas"549        )550        warnings.warn(message)551        raise e552    raise553554555def _handle_openai_api_error(e: openai.APIError) -> None:556    error_message = str(e)557    if "exceeds the context window" in error_message:558        raise OpenAIAPIContextOverflowError(559            message=e.message, request=e.request, body=e.body560        ) from e561    raise562563564_RESPONSES_API_ONLY_PREFIXES = (565    "gpt-5-pro",566    "gpt-5.2-pro",567    "gpt-5.4-pro",568    "gpt-5.5-pro",569)570571572def _model_prefers_responses_api(model_name: str | None) -> bool:573    if not model_name:574        return False575    return model_name.startswith(_RESPONSES_API_ONLY_PREFIXES) or "codex" in model_name576577578_BM = TypeVar("_BM", bound=BaseModel)579_DictOrPydanticClass: TypeAlias = dict[str, Any] | type[_BM] | type580_DictOrPydantic: TypeAlias = dict | _BM581582583class BaseChatOpenAI(BaseChatModel):584    """Base wrapper around OpenAI large language models for chat.585586    This base class targets587    [official OpenAI API specifications](https://github.com/openai/openai-openapi)588    only. Non-standard response fields added by third-party providers (e.g.,589    `reasoning_content`) are not extracted. Use a provider-specific subclass for590    full provider support.591    """592593    client: Any = Field(default=None, exclude=True)594595    async_client: Any = Field(default=None, exclude=True)596597    root_client: Any = Field(default=None, exclude=True)598599    root_async_client: Any = Field(default=None, exclude=True)600601    model_name: str = Field(default="gpt-3.5-turbo", alias="model")602    """Model name to use."""603604    temperature: float | None = None605    """What sampling temperature to use."""606607    model_kwargs: dict[str, Any] = Field(default_factory=dict)608    """Holds any model parameters valid for `create` call not explicitly specified."""609610    openai_api_key: (611        SecretStr | None | Callable[[], str] | Callable[[], Awaitable[str]]612    ) = Field(613        alias="api_key", default_factory=secret_from_env("OPENAI_API_KEY", default=None)614    )615    """API key to use.616617    Can be inferred from the `OPENAI_API_KEY` environment variable, or specified618    as a string, or sync or async callable that returns a string.619620    ??? example "Specify with environment variable"621622        ```bash623        export OPENAI_API_KEY=...624        ```625        ```python626        from langchain_openai import ChatOpenAI627628        model = ChatOpenAI(model="gpt-5-nano")629        ```630631    ??? example "Specify with a string"632633        ```python634        from langchain_openai import ChatOpenAI635636        model = ChatOpenAI(model="gpt-5-nano", api_key="...")637        ```638639    ??? example "Specify with a sync callable"640641        ```python642        from langchain_openai import ChatOpenAI643644        def get_api_key() -> str:645            # Custom logic to retrieve API key646            return "..."647648        model = ChatOpenAI(model="gpt-5-nano", api_key=get_api_key)649        ```650651    ??? example "Specify with an async callable"652653        ```python654        from langchain_openai import ChatOpenAI655656        async def get_api_key() -> str:657            # Custom async logic to retrieve API key658            return "..."659660        model = ChatOpenAI(model="gpt-5-nano", api_key=get_api_key)661        ```662    """663664    openai_api_base: str | None = Field(default=None, alias="base_url")665    """Base URL path for API requests, leave blank if not using a proxy or service emulator.666667    Resolution order (first match wins):668669    1. Explicit `base_url` (or `openai_api_base`) kwarg.670    2. Env var `OPENAI_API_BASE` (read by LangChain at init).671    3. Env var `OPENAI_BASE_URL` (read by the underlying `openai` SDK client).672673    `OPENAI_BASE_URL` is also inspected by LangChain only to decide whether to674    default-enable `stream_usage`  when set, the default is left off because many675    non-OpenAI endpoints do not support streaming token usage.676    """  # noqa: E501677678    openai_organization: str | None = Field(default=None, alias="organization")679    """Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""680681    # to support explicit proxy for OpenAI682    openai_proxy: str | None = Field(683        default_factory=from_env("OPENAI_PROXY", default=None)684    )685686    request_timeout: float | tuple[float, float] | Any | None = Field(687        default=None, alias="timeout"688    )689    """Timeout for requests to OpenAI completion API.690691    Can be float, `httpx.Timeout` or `None`.692    """693694    stream_usage: bool | None = None695    """Whether to include usage metadata in streaming output.696697    If enabled, an additional message chunk will be generated during the stream698    including usage metadata.699700    This parameter is enabled unless `openai_api_base` is set or the model is701    initialized with a custom client, as many chat completions APIs do not702    support streaming token usage.703704    !!! version-added "Added in `langchain-openai` 0.3.9"705706    !!! warning "Behavior changed in `langchain-openai` 0.3.35"707708        Enabled for default base URL and client.709    """710711    max_retries: int | None = None712    """Maximum number of retries to make when generating."""713714    presence_penalty: float | None = None715    """Penalizes repeated tokens."""716717    frequency_penalty: float | None = None718    """Penalizes repeated tokens according to frequency."""719720    seed: int | None = None721    """Seed for generation"""722723    logprobs: bool | None = None724    """Whether to return logprobs."""725726    top_logprobs: int | None = None727    """Number of most likely tokens to return at each token position, each with an728    associated log probability.729730    `logprobs` must be set to true if this parameter is used.731    """732733    logit_bias: dict[int, int] | None = None734    """Modify the likelihood of specified tokens appearing in the completion."""735736    streaming: bool = False737    """Whether to stream the results or not."""738739    n: int | None = None740    """Number of chat completions to generate for each prompt."""741742    top_p: float | None = None743    """Total probability mass of tokens to consider at each step."""744745    max_tokens: int | None = Field(default=None)746    """Maximum number of tokens to generate."""747748    reasoning_effort: str | None = None749    """Constrains effort on reasoning for reasoning models.750751    For use with the Chat Completions API. Reasoning models only.752753    Currently supported values are `'minimal'`, `'low'`, `'medium'`, and754    `'high'`. Reducing reasoning effort can result in faster responses and fewer755    tokens used on reasoning in a response.756    """757758    reasoning: dict[str, Any] | None = None759    """Reasoning parameters for reasoning models. None disables reasoning.760761    For use with the Responses API.762763    ```python764    reasoning={765        "effort": None,  # Default None; can be "low", "medium", or "high"766        "summary": "auto",  # Can be "auto", "concise", or "detailed"767    }768    ```769770    !!! version-added "Added in `langchain-openai` 0.3.24"771    """772773    verbosity: str | None = None774    """Controls the verbosity level of responses for reasoning models.775776    For use with the Responses API.777778    Currently supported values are `'low'`, `'medium'`, and `'high'`.779780    !!! version-added "Added in `langchain-openai` 0.3.28"781    """782783    tiktoken_model_name: str | None = None784    """The model name to pass to tiktoken when using this class.785786    Tiktoken is used to count the number of tokens in documents to constrain787    them to be under a certain limit.788789    By default, when set to `None`, this will be the same as the embedding model name.790    However, there are some cases where you may want to use this `Embedding` class with791    a model name not supported by tiktoken. This can include when using Azure embeddings792    or when using one of the many model providers that expose an OpenAI-like793    API but with different models. In those cases, in order to avoid erroring794    when tiktoken is called, you can specify a model name to use here.795    """796797    default_headers: Mapping[str, str] | None = None798799    default_query: Mapping[str, object] | None = None800801    # Configure a custom httpx client. See the802    # [httpx documentation](https://www.python-httpx.org/api/#client) for more details.803    http_client: Any | None = Field(default=None, exclude=True)804    """Optional `httpx.Client`.805806    Only used for sync invocations. Must specify `http_async_client` as well if807    you'd like a custom client for async invocations.808    """809810    http_async_client: Any | None = Field(default=None, exclude=True)811    """Optional `httpx.AsyncClient`.812813    Only used for async invocations. Must specify `http_client` as well if you'd814    like a custom client for sync invocations.815    """816817    http_socket_options: Sequence[tuple[int, int, int]] | None = Field(818        default=None, exclude=True819    )820    """TCP socket options applied to the httpx transports built by this instance.821822    Defaults to a conservative TCP-keepalive + `TCP_USER_TIMEOUT` profile that823    targets a ~2-minute bound on silent connection hangs (silent mid-stream peer824    loss, gVisor/NAT idle timeouts, silent TCP black holes) on platforms that825    support the full option set. On platforms that only support a subset826    (macOS without `TCP_USER_TIMEOUT`, Windows with only `SO_KEEPALIVE`,827    minimal kernels), unsupported options are silently dropped and the bound828    degrades to whatever the remaining options + OS defaults provide  still829    better than indefinite hang.830831    Accepted values:832833    - `None` (default): use env-driven defaults. Matches the "unset" convention834        used by `http_client` elsewhere on this class.835    - `()` (empty): disable socket-option injection entirely. Inherits the OS836        defaults and restores httpx's native env-proxy auto-detection.837    - A non-empty sequence of `(level, option, value)` tuples: explicit838        override; passed verbatim to the transport (not filtered). Unsupported839        options raise `OSError` at connect time rather than being silently840        dropped  the user chose them explicitly.841842    Environment variables (only consulted when this field is `None`):843    `LANGCHAIN_OPENAI_TCP_KEEPALIVE` (set to `0` to disable entirely  the844    kill-switch), `LANGCHAIN_OPENAI_TCP_KEEPIDLE`,845    `LANGCHAIN_OPENAI_TCP_KEEPINTVL`, `LANGCHAIN_OPENAI_TCP_KEEPCNT`,846    `LANGCHAIN_OPENAI_TCP_USER_TIMEOUT_MS`.847848    Applied per side: if `http_client` is supplied, the sync path uses849    that user-owned client's socket options as-is; the async path still850    gets `http_socket_options` applied to its default builder (and851    vice-versa for `http_async_client`). Supply both to take full control.852853    !!! note "Interaction with env-proxy auto-detection"854855        When a custom `httpx` transport is active, `httpx` disables its856        native env-proxy auto-detection (`HTTP_PROXY` / `HTTPS_PROXY` /857        `ALL_PROXY` / `NO_PROXY` and macOS/Windows system proxy settings).858859        To keep the default shape safe, `ChatOpenAI` detects the860        "proxy-env-shadow" pattern and **skips the custom transport861        entirely** when **all** of the following hold:862863        - `http_socket_options` is left at its default (`None`)864        - No `http_client` or `http_async_client` supplied865        - No `openai_proxy` supplied866        - A proxy env var or system proxy is visible to httpx867868        On that specific shape, the instance falls back to pre-PR behavior869        and httpx's env-proxy auto-detection applies (a one-time `INFO` log870        records the bypass for observability).871872        If you explicitly set `http_socket_options=[...]` while a proxy873        env var is also set, no bypass  you opted into the transport, and874        a one-time `WARNING` records the shadowing. Set875        `http_socket_options=()` or `LANGCHAIN_OPENAI_TCP_KEEPALIVE=0` to876        disable transport injection explicitly, or pass a fully-configured877        `http_async_client` / `http_client` to take full control. The878        `openai_proxy` constructor kwarg is unaffected  socket options879        are applied cleanly through the proxied transport on that path.880    """881882    stream_chunk_timeout: float | None = Field(883        default_factory=lambda: _float_env(884            "LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S", 120.0885        ),886        exclude=True,887    )888    """Per-chunk wall-clock timeout (seconds) on async streaming responses.889890    Applies to async invocations only (`astream`, `ainvoke` with streaming,891    etc.). Sync streaming (`stream`) is not affected.892893    Fires between content chunks yielded by the openai SDK's streaming iterator894    (i.e., each call to `__anext__` on the response). Crucially, this is895    **not** the same as httpx's `timeout.read`:896897    - httpx's read timeout is inter-byte and gets reset every time *any* bytes898        arrive on the socket  including OpenAI's SSE keepalive comments899        (`: keepalive`) that trickle down during long model generations. A900        stream that's silent on *content* but still producing keepalives looks901        alive forever to httpx.902    - `stream_chunk_timeout` measures the gap between *parsed chunks*. The903        openai SDK's SSE parser consumes keepalive comments internally and does904        not emit them as chunks, so keepalives do *not* reset this timer. It905        fires on genuine content silence.906907    When it fires, a `StreamChunkTimeoutError`908    (subclass of `asyncio.TimeoutError`) is raised with a self-describing909    message naming this knob, the env-var override, the model, and the910    number of chunks received before the stall. A WARNING log with911    `extra={"source": "stream_chunk_timeout", "timeout_s": <value>,912    "model_name": <value>, "chunks_received": <value>}` also fires so913    aggregate logging can distinguish app-layer timeouts from914    transport-layer failures.915916    Defaults to 120s. Set to `None` or `0` to disable. Overridable via the917    `LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S` env var. Negative values918    (from either the env var or the constructor kwarg  e.g., hydrated919    from YAML/JSON configs) fall back to the default with a `WARNING` log920    rather than silently disabling the wrapper, so a misconfigured value921    still boots safely and the fallback is visible.922    """923924    stop: list[str] | str | None = Field(default=None, alias="stop_sequences")925    """Default stop sequences."""926927    extra_body: Mapping[str, Any] | None = None928    """Optional additional JSON properties to include in the request parameters929    when making requests to OpenAI compatible APIs, such as vLLM, LM Studio, or930    other providers.931932    This is the recommended way to pass custom parameters that are specific to your933    OpenAI-compatible API provider but not part of the standard OpenAI API.934935    Examples:936    - [LM Studio](https://lmstudio.ai/) TTL parameter: `extra_body={"ttl": 300}`937    - [vLLM](https://github.com/vllm-project/vllm) custom parameters:938        `extra_body={"use_beam_search": True}`939    - Any other provider-specific parameters940941    !!! warning942943        Do not use `model_kwargs` for custom parameters that are not part of the944        standard OpenAI API, as this will cause errors when making API calls. Use945        `extra_body` instead.946    """947948    include_response_headers: bool = False949    """Whether to include response headers in the output message `response_metadata`."""950951    disabled_params: dict[str, Any] | None = Field(default=None)952    """Parameters of the OpenAI client or `chat.completions` endpoint that should be953    disabled for the given model.954955    Should be specified as `{"param": None | ['val1', 'val2']}` where the key is the956    parameter and the value is either None, meaning that parameter should never be957    used, or it's a list of disabled values for the parameter.958959    For example, older models may not support the `'parallel_tool_calls'` parameter at960    all, in which case `disabled_params={"parallel_tool_calls": None}` can be passed961    in.962963    If a parameter is disabled then it will not be used by default in any methods, e.g.964    in `with_structured_output`. However this does not prevent a user from directly965    passed in the parameter during invocation.966    """967968    context_management: list[dict[str, Any]] | None = None969    """Configuration for970    [context management](https://developers.openai.com/api/docs/guides/compaction).971    """972973    include: list[str] | None = None974    """Additional fields to include in generations from Responses API.975976    Supported values:977978    - `'file_search_call.results'`979    - `'message.input_image.image_url'`980    - `'computer_call_output.output.image_url'`981    - `'reasoning.encrypted_content'`982    - `'code_interpreter_call.outputs'`983984    !!! version-added "Added in `langchain-openai` 0.3.24"985    """986987    service_tier: str | None = None988    """Latency tier for request.989990    Options are `'auto'`, `'default'`, or `'flex'`.991992    Relevant for users of OpenAI's scale tier service.993    """994995    store: bool | None = None996    """If `True`, OpenAI may store response data for future use.997998    Defaults to `True` for the Responses API and `False` for the Chat Completions API.9991000    !!! version-added "Added in `langchain-openai` 0.3.24"1001    """10021003    truncation: str | None = None1004    """Truncation strategy (Responses API).10051006    Can be `'auto'` or `'disabled'` (default).10071008    If `'auto'`, model may drop input items from the middle of the message sequence to1009    fit the context window.10101011    !!! version-added "Added in `langchain-openai` 0.3.24"1012    """10131014    use_previous_response_id: bool = False1015    """If `True`, always pass `previous_response_id` using the ID of the most recent1016    response. Responses API only.10171018    Input messages up to the most recent response will be dropped from request1019    payloads.10201021    For example, the following two are equivalent:10221023    ```python1024    model = ChatOpenAI(1025        model="...",1026        use_previous_response_id=True,1027    )1028    model.invoke(1029        [1030            HumanMessage("Hello"),1031            AIMessage("Hi there!", response_metadata={"id": "resp_123"}),1032            HumanMessage("How are you?"),1033        ]1034    )1035    ```10361037    ```python1038    model = ChatOpenAI(model="...", use_responses_api=True)1039    model.invoke([HumanMessage("How are you?")], previous_response_id="resp_123")1040    ```10411042    !!! version-added "Added in `langchain-openai` 0.3.26"1043    """10441045    use_responses_api: bool | None = None1046    """Whether to use the Responses API instead of the Chat API.10471048    If not specified then will be inferred based on invocation params.10491050    !!! version-added "Added in `langchain-openai` 0.3.9"1051    """10521053    output_version: str | None = Field(1054        default_factory=from_env("LC_OUTPUT_VERSION", default=None)1055    )1056    """Version of `AIMessage` output format to use.10571058    This field is used to roll-out new output formats for chat model `AIMessage`1059    responses in a backwards-compatible way.10601061    Supported values:10621063    - `'v0'`: `AIMessage` format as of `langchain-openai 0.3.x`.1064    - `'responses/v1'`: Formats Responses API output items into AIMessage content blocks1065        (Responses API only)1066    - `'v1'`: v1 of LangChain cross-provider standard.10671068    !!! warning "Behavior changed in `langchain-openai` 1.0.0"10691070        Default updated to `"responses/v1"`.1071    """10721073    model_config = ConfigDict(populate_by_name=True)10741075    @property1076    def model(self) -> str:1077        """Same as model_name."""1078        return self.model_name10791080    @model_validator(mode="before")1081    @classmethod1082    def build_extra(cls, values: dict[str, Any]) -> Any:1083        """Build extra kwargs from additional params that were passed in."""1084        all_required_field_names = get_pydantic_field_names(cls)1085        return _build_model_kwargs(values, all_required_field_names)10861087    @field_validator("stream_chunk_timeout", mode="after")1088    @classmethod1089    def _validate_stream_chunk_timeout(cls, value: float | None) -> float | None:1090        """Reject negative constructor values; fall back to the env-driven default.10911092        Matches the env-var path in `_float_env`: a negative value is a typo,1093        not an opt-out (`None`/`0` are the documented off switches). Configs1094        hydrated from YAML/JSON would otherwise silently disable the wrapper1095        and reintroduce the indefinite-stream hang the feature prevents.1096        """1097        if value is not None and value < 0:1098            fallback = _float_env("LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S", 120.0)1099            logger.warning(1100                "Invalid `stream_chunk_timeout=%r` (negative); "1101                "falling back to %s. Pass `None` or `0` to disable.",1102                value,1103                fallback,1104            )1105            return fallback1106        return value11071108    @model_validator(mode="before")1109    @classmethod1110    def validate_temperature(cls, values: dict[str, Any]) -> Any:1111        """Validate temperature parameter for different models.11121113        - gpt-5 models (excluding gpt-5-chat) only allow `temperature=1` or unset1114            (Defaults to 1)1115        """1116        model = values.get("model_name") or values.get("model") or ""1117        model_lower = model.lower()11181119        # For o1 models, set temperature=1 if not provided1120        if model_lower.startswith("o1") and "temperature" not in values:1121            values["temperature"] = 111221123        # For gpt-5 models, handle temperature restrictions. Temperature is supported1124        # by gpt-5-chat and gpt-5 models with reasoning_effort='none' or1125        # reasoning={'effort': 'none'}.1126        if (1127            model_lower.startswith("gpt-5")1128            and ("chat" not in model_lower)1129            and values.get("reasoning_effort") != "none"1130            and (values.get("reasoning") or {}).get("effort") != "none"1131        ):1132            temperature = values.get("temperature")1133            if temperature is not None and temperature != 1:1134                # For gpt-5 (non-chat), only temperature=1 is supported1135                # So we remove any non-defaults1136                values.pop("temperature", None)11371138        return values11391140    @model_validator(mode="after")1141    def _set_openai_chat_version(self) -> Self:1142        """Set package version in metadata.11431144        Note: Subclasses that inherit from `BaseChatOpenAI` (e.g.1145        `ChatDeepSeek`, `ChatXAI`) must use a **unique** validator name1146        (e.g. `_set_deepseek_version`) instead of overriding this one. Pydantic1147        replaces same-named `model_validator` methods rather than chaining them,1148        so reusing `_set_openai_chat_version` would silently drop the parent's1149        `langchain-openai` version entry.1150        """1151        self._add_version("langchain-openai", __version__)1152        return self11531154    @model_validator(mode="after")1155    def validate_environment(self) -> Self:1156        """Validate that api key and python package exists in environment."""1157        if self.n is not None and self.n < 1:1158            msg = "n must be at least 1."1159            raise ValueError(msg)1160        if self.n is not None and self.n > 1 and self.streaming:1161            msg = "n must be 1 when streaming."1162            raise ValueError(msg)11631164        # Check OPENAI_ORGANIZATION for backwards compatibility.1165        self.openai_organization = (1166            self.openai_organization1167            or os.getenv("OPENAI_ORG_ID")1168            or os.getenv("OPENAI_ORGANIZATION")1169        )1170        self.openai_api_base = self.openai_api_base or os.getenv("OPENAI_API_BASE")11711172        # Enable stream_usage by default if using default base URL and client1173        if (1174            all(1175                getattr(self, key, None) is None1176                for key in (1177                    "stream_usage",1178                    "openai_proxy",1179                    "openai_api_base",1180                    "base_url",1181                    "client",1182                    "root_client",1183                    "async_client",1184                    "root_async_client",1185                    "http_client",1186                    "http_async_client",1187                )1188            )1189            and "OPENAI_BASE_URL" not in os.environ1190        ):1191            self.stream_usage = True11921193        # Resolve API key from SecretStr or Callable1194        sync_api_key_value: str | Callable[[], str] | None = None1195        async_api_key_value: str | Callable[[], Awaitable[str]] | None = None11961197        if self.openai_api_key is not None:1198            # Because OpenAI and AsyncOpenAI clients support either sync or async1199            # callables for the API key, we need to resolve separate values here.1200            sync_api_key_value, async_api_key_value = _resolve_sync_and_async_api_keys(1201                self.openai_api_key1202            )12031204        client_params: dict = {1205            "organization": self.openai_organization,1206            "base_url": self.openai_api_base,1207            "timeout": self.request_timeout,1208            "default_headers": self.default_headers,1209            "default_query": self.default_query,1210        }1211        if self.max_retries is not None:1212            client_params["max_retries"] = self.max_retries12131214        if self.openai_proxy and (self.http_client or self.http_async_client):1215            openai_proxy = self.openai_proxy1216            http_client = self.http_client1217            http_async_client = self.http_async_client1218            msg = (1219                "Cannot specify 'openai_proxy' if one of "1220                "'http_client'/'http_async_client' is already specified. Received:\n"1221                f"{openai_proxy=}\n{http_client=}\n{http_async_client=}"1222            )1223            raise ValueError(msg)1224        if _should_bypass_socket_options_for_proxy_env(1225            http_socket_options=self.http_socket_options,1226            http_client=self.http_client,1227            http_async_client=self.http_async_client,1228            openai_proxy=self.openai_proxy,1229        ):1230            # Default-shape construction + proxy env var visible to httpx:1231            # skip the custom transport so httpx's env-proxy auto-detection1232            # still applies. Users who want kernel-level TCP tuning alongside1233            # an env proxy can opt in explicitly via `http_socket_options`.1234            resolved_socket_options: tuple[tuple[int, int, int], ...] = ()1235            _log_proxy_env_bypass_once()1236        else:1237            resolved_socket_options = _resolve_socket_options(self.http_socket_options)1238            _warn_if_proxy_env_shadowed(1239                resolved_socket_options, openai_proxy=self.openai_proxy1240            )1241        if not self.client:1242            if sync_api_key_value is None:1243                # No valid sync API key, leave client as None and raise informative1244                # error on invocation.1245                self.client = None1246                self.root_client = None1247            else:1248                if self.openai_proxy and not self.http_client:1249                    self.http_client = _build_proxied_sync_httpx_client(1250                        proxy=self.openai_proxy,1251                        verify=global_ssl_context,1252                        socket_options=resolved_socket_options,1253                    )1254                sync_specific = {1255                    "http_client": self.http_client1256                    or _get_default_httpx_client(1257                        self.openai_api_base,1258                        self.request_timeout,1259                        resolved_socket_options,1260                    ),1261                    "api_key": sync_api_key_value,1262                }1263                self.root_client = openai.OpenAI(**client_params, **sync_specific)  # type: ignore[arg-type]1264                self.client = self.root_client.chat.completions1265        if not self.async_client:1266            if self.openai_proxy and not self.http_async_client:1267                self.http_async_client = _build_proxied_async_httpx_client(1268                    proxy=self.openai_proxy,1269                    verify=global_ssl_context,1270                    socket_options=resolved_socket_options,1271                )1272            async_specific = {1273                "http_client": self.http_async_client1274                or _get_default_async_httpx_client(1275                    self.openai_api_base,1276                    self.request_timeout,1277                    resolved_socket_options,1278                ),1279                "api_key": async_api_key_value,1280            }1281            self.root_async_client = openai.AsyncOpenAI(1282                **client_params,1283                **async_specific,  # type: ignore[arg-type]1284            )1285            self.async_client = self.root_async_client.chat.completions1286        return self12871288    def _resolve_model_profile(self) -> ModelProfile | None:1289        return _get_default_model_profile(self.model_name) or None12901291    @property1292    def _default_params(self) -> dict[str, Any]:1293        """Get the default parameters for calling OpenAI API."""1294        exclude_if_none = {1295            "presence_penalty": self.presence_penalty,1296            "frequency_penalty": self.frequency_penalty,1297            "seed": self.seed,1298            "top_p": self.top_p,1299            "logprobs": self.logprobs,1300            "top_logprobs": self.top_logprobs,1301            "logit_bias": self.logit_bias,1302            "stop": self.stop or None,  # Also exclude empty list for this1303            "max_tokens": self.max_tokens,1304            "extra_body": self.extra_body,1305            "n": self.n,1306            "temperature": self.temperature,1307            "reasoning_effort": self.reasoning_effort,1308            "reasoning": self.reasoning,1309            "verbosity": self.verbosity,1310            "context_management": self.context_management,1311            "include": self.include,1312            "service_tier": self.service_tier,1313            "truncation": self.truncation,1314            "store": self.store,1315        }13161317        return {1318            "model": self.model_name,1319            "stream": self.streaming,1320            **{k: v for k, v in exclude_if_none.items() if v is not None},1321            **self.model_kwargs,1322        }13231324    def _combine_llm_outputs(self, llm_outputs: list[dict | None]) -> dict:1325        overall_token_usage: dict = {}1326        system_fingerprint = None1327        for output in llm_outputs:1328            if output is None:1329                # Happens in streaming1330                continue1331            token_usage = output.get("token_usage")1332            if token_usage is not None:1333                for k, v in token_usage.items():1334                    if v is None:1335                        continue1336                    if k in overall_token_usage:1337                        overall_token_usage[k] = _update_token_usage(1338                            overall_token_usage[k], v1339                        )1340                    else:1341                        overall_token_usage[k] = v1342            if system_fingerprint is None:1343                system_fingerprint = output.get("system_fingerprint")1344        combined = {"token_usage": overall_token_usage, "model_name": self.model_name}1345        if system_fingerprint:1346            combined["system_fingerprint"] = system_fingerprint1347        return combined13481349    def _convert_chunk_to_generation_chunk(1350        self,1351        chunk: dict,1352        default_chunk_class: type,1353        base_generation_info: dict | None,1354    ) -> ChatGenerationChunk | None:1355        if chunk.get("type") == "content.delta":  # From beta.chat.completions.stream1356            return None1357        token_usage = chunk.get("usage")1358        choices = (1359            chunk.get("choices", [])1360            # From beta.chat.completions.stream1361            or chunk.get("chunk", {}).get("choices", [])1362        )13631364        usage_metadata: UsageMetadata | None = (1365            _create_usage_metadata(token_usage, chunk.get("service_tier"))1366            if token_usage1367            else None1368        )1369        if len(choices) == 0:1370            # logprobs is implicitly None1371            generation_chunk = ChatGenerationChunk(1372                message=default_chunk_class(content="", usage_metadata=usage_metadata),1373                generation_info=base_generation_info,1374            )1375            # Keep content as "" (the default) rather than converting to [].1376            # Chat Completions content deltas are normalized to strings in1377            # _convert_delta_to_message_chunk. Starting with [] causes1378            # merge_content to silently drop string content (empty list is1379            # falsy, so no merge branch applies). The empty list also triggers1380            # the content_blocks isinstance(list) short-circuit, which would1381            # return [] and miss tool_call_chunks.1382            if self.output_version == "v1":1383                generation_chunk.message.response_metadata["output_version"] = "v1"13841385            return generation_chunk13861387        choice = choices[0]1388        if choice["delta"] is None:1389            return None13901391        message_chunk = _convert_delta_to_message_chunk(1392            choice["delta"], default_chunk_class1393        )1394        generation_info = {**base_generation_info} if base_generation_info else {}13951396        if finish_reason := choice.get("finish_reason"):1397            generation_info["finish_reason"] = finish_reason1398            if model_name := chunk.get("model"):1399                generation_info["model_name"] = model_name1400            if system_fingerprint := chunk.get("system_fingerprint"):1401                generation_info["system_fingerprint"] = system_fingerprint1402            if service_tier := chunk.get("service_tier"):1403                generation_info["service_tier"] = service_tier14041405        logprobs = choice.get("logprobs")1406        if logprobs:1407            generation_info["logprobs"] = logprobs14081409        if usage_metadata and isinstance(message_chunk, AIMessageChunk):1410            message_chunk.usage_metadata = usage_metadata14111412        message_chunk.response_metadata["model_provider"] = "openai"1413        # Propagate output_version so content_blocks can detect v1 mode.1414        if self.output_version == "v1":1415            message_chunk.response_metadata["output_version"] = "v1"1416        return ChatGenerationChunk(1417            message=message_chunk, generation_info=generation_info or None1418        )14191420    def _ensure_sync_client_available(self) -> None:1421        """Check that sync client is available, raise error if not."""1422        if self.client is None:1423            msg = (1424                "Sync client is not available. This happens when an async callable "1425                "was provided for the API key. Use async methods (ainvoke, astream) "1426                "instead, or provide a string or sync callable for the API key."1427            )1428            raise ValueError(msg)14291430    def _stream_responses(1431        self,1432        messages: list[BaseMessage],1433        stop: list[str] | None = None,1434        run_manager: CallbackManagerForLLMRun | None = None,1435        **kwargs: Any,1436    ) -> Iterator[ChatGenerationChunk]:1437        self._ensure_sync_client_available()1438        kwargs["stream"] = True1439        payload = self._get_request_payload(messages, stop=stop, **kwargs)1440        try:1441            if self.include_response_headers:1442                raw_context_manager = (1443                    self.root_client.with_raw_response.responses.create(**payload)1444                )1445                context_manager = raw_context_manager.parse()1446                headers = {"headers": dict(raw_context_manager.headers)}1447            else:1448                context_manager = self.root_client.responses.create(**payload)1449                headers = {}1450            original_schema_obj = kwargs.get("response_format")14511452            with context_manager as response:1453                is_first_chunk = True1454                current_index = -11455                current_output_index = -11456                current_sub_index = -11457                has_reasoning = False1458                for chunk in response:1459                    metadata = headers if is_first_chunk else {}1460                    (1461                        current_index,1462                        current_output_index,1463                        current_sub_index,1464                        generation_chunk,1465                    ) = _convert_responses_chunk_to_generation_chunk(1466                        chunk,1467                        current_index,1468                        current_output_index,1469                        current_sub_index,1470                        schema=original_schema_obj,1471                        metadata=metadata,1472                        has_reasoning=has_reasoning,1473                        output_version=self.output_version,1474                    )1475                    if generation_chunk:1476                        if run_manager:1477                            run_manager.on_llm_new_token(1478                                generation_chunk.text, chunk=generation_chunk1479                            )1480                        is_first_chunk = False1481                        if "reasoning" in generation_chunk.message.additional_kwargs:1482                            has_reasoning = True1483                        yield generation_chunk1484        except openai.BadRequestError as e:1485            _handle_openai_bad_request(e)1486        except openai.APIError as e:1487            _handle_openai_api_error(e)14881489    async def _astream_responses(1490        self,1491        messages: list[BaseMessage],1492        stop: list[str] | None = None,1493        run_manager: AsyncCallbackManagerForLLMRun | None = None,1494        **kwargs: Any,1495    ) -> AsyncIterator[ChatGenerationChunk]:1496        kwargs["stream"] = True1497        payload = self._get_request_payload(messages, stop=stop, **kwargs)1498        try:1499            if self.include_response_headers:1500                raw_context_manager = (1501                    await self.root_async_client.with_raw_response.responses.create(1502                        **payload1503                    )1504                )1505                context_manager = raw_context_manager.parse()1506                headers = {"headers": dict(raw_context_manager.headers)}1507            else:1508                context_manager = await self.root_async_client.responses.create(1509                    **payload1510                )1511                headers = {}1512            original_schema_obj = kwargs.get("response_format")15131514            async with context_manager as response:1515                is_first_chunk = True1516                current_index = -11517                current_output_index = -11518                current_sub_index = -11519                has_reasoning = False1520                async for chunk in _astream_with_chunk_timeout(1521                    response,1522                    self.stream_chunk_timeout,1523                    model_name=self.model_name,1524                ):1525                    metadata = headers if is_first_chunk else {}1526                    (1527                        current_index,1528                        current_output_index,1529                        current_sub_index,1530                        generation_chunk,1531                    ) = _convert_responses_chunk_to_generation_chunk(1532                        chunk,1533                        current_index,1534                        current_output_index,1535                        current_sub_index,1536                        schema=original_schema_obj,1537                        metadata=metadata,1538                        has_reasoning=has_reasoning,1539                        output_version=self.output_version,1540                    )1541                    if generation_chunk:1542                        if run_manager:1543                            await run_manager.on_llm_new_token(1544                                generation_chunk.text, chunk=generation_chunk1545                            )1546                        is_first_chunk = False1547                        if "reasoning" in generation_chunk.message.additional_kwargs:1548                            has_reasoning = True1549                        yield generation_chunk1550        except openai.BadRequestError as e:1551            _handle_openai_bad_request(e)1552        except openai.APIError as e:1553            _handle_openai_api_error(e)15541555    def _should_stream_usage(1556        self, stream_usage: bool | None = None, **kwargs: Any1557    ) -> bool:1558        """Determine whether to include usage metadata in streaming output.15591560        For backwards compatibility, we check for `stream_options` passed1561        explicitly to kwargs or in the `model_kwargs` and override `self.stream_usage`.1562        """1563        stream_usage_sources = [  # order of precedence1564            stream_usage,1565            kwargs.get("stream_options", {}).get("include_usage"),1566            self.model_kwargs.get("stream_options", {}).get("include_usage"),1567            self.stream_usage,1568        ]1569        for source in stream_usage_sources:1570            if isinstance(source, bool):1571                return source1572        return self.stream_usage or False15731574    def _stream(1575        self,1576        messages: list[BaseMessage],1577        stop: list[str] | None = None,1578        run_manager: CallbackManagerForLLMRun | None = None,1579        *,1580        stream_usage: bool | None = None,1581        **kwargs: Any,1582    ) -> Iterator[ChatGenerationChunk]:1583        self._ensure_sync_client_available()1584        kwargs["stream"] = True1585        stream_usage = self._should_stream_usage(stream_usage, **kwargs)1586        if stream_usage:1587            kwargs["stream_options"] = {"include_usage": stream_usage}1588        payload = self._get_request_payload(messages, stop=stop, **kwargs)1589        default_chunk_class: type[BaseMessageChunk] = AIMessageChunk1590        base_generation_info = {}15911592        try:1593            if "response_format" in payload:1594                if self.include_response_headers:1595                    warnings.warn(1596                        "Cannot currently include response headers when "1597                        "response_format is specified."1598                    )1599                payload.pop("stream")1600                response_stream = self.root_client.beta.chat.completions.stream(1601                    **payload1602                )1603                context_manager = response_stream1604            else:1605                if self.include_response_headers:1606                    raw_response = self.client.with_raw_response.create(**payload)1607                    response = raw_response.parse()1608                    base_generation_info = {"headers": dict(raw_response.headers)}1609                else:1610                    response = self.client.create(**payload)1611                context_manager = response1612            with context_manager as response:1613                is_first_chunk = True1614                for chunk in response:1615                    if not isinstance(chunk, dict):1616                        chunk = chunk.model_dump()1617                    generation_chunk = self._convert_chunk_to_generation_chunk(1618                        chunk,1619                        default_chunk_class,1620                        base_generation_info if is_first_chunk else {},1621                    )1622                    if generation_chunk is None:1623                        continue1624                    default_chunk_class = generation_chunk.message.__class__1625                    logprobs = (generation_chunk.generation_info or {}).get("logprobs")1626                    if run_manager:1627                        run_manager.on_llm_new_token(1628                            generation_chunk.text,1629                            chunk=generation_chunk,1630                            logprobs=logprobs,1631                        )1632                    is_first_chunk = False1633                    yield generation_chunk1634        except openai.BadRequestError as e:1635            _handle_openai_bad_request(e)1636        except openai.APIError as e:1637            _handle_openai_api_error(e)1638        if hasattr(response, "get_final_completion") and "response_format" in payload:1639            final_completion = response.get_final_completion()1640            generation_chunk = self._get_generation_chunk_from_completion(1641                final_completion1642            )1643            if run_manager:1644                run_manager.on_llm_new_token(1645                    generation_chunk.text, chunk=generation_chunk1646                )1647            yield generation_chunk16481649    def _generate(1650        self,1651        messages: list[BaseMessage],1652        stop: list[str] | None = None,1653        run_manager: CallbackManagerForLLMRun | None = None,1654        **kwargs: Any,1655    ) -> ChatResult:1656        self._ensure_sync_client_available()1657        payload = self._get_request_payload(messages, stop=stop, **kwargs)1658        generation_info = None1659        raw_response = None1660        try:1661            if "response_format" in payload:1662                payload.pop("stream")1663                raw_response = (1664                    self.root_client.chat.completions.with_raw_response.parse(**payload)1665                )1666                response = raw_response.parse()1667            elif self._use_responses_api(payload):1668                original_schema_obj = kwargs.get("response_format")1669                if original_schema_obj and _is_pydantic_class(original_schema_obj):1670                    raw_response = self.root_client.responses.with_raw_response.parse(1671                        **payload1672                    )1673                else:1674                    raw_response = self.root_client.responses.with_raw_response.create(1675                        **payload1676                    )1677                response = raw_response.parse()1678                if self.include_response_headers:1679                    generation_info = {"headers": dict(raw_response.headers)}1680                return _construct_lc_result_from_responses_api(1681                    response,1682                    schema=original_schema_obj,1683                    metadata=generation_info,1684                    output_version=self.output_version,1685                )1686            else:1687                raw_response = self.client.with_raw_response.create(**payload)1688                response = raw_response.parse()1689        except openai.BadRequestError as e:1690            _handle_openai_bad_request(e)1691        except openai.APIError as e:1692            _handle_openai_api_error(e)1693        except Exception as e:1694            if raw_response is not None and hasattr(raw_response, "http_response"):1695                e.response = raw_response.http_response  # type: ignore[attr-defined]1696            raise e1697        if (1698            self.include_response_headers1699            and raw_response is not None1700            and hasattr(raw_response, "headers")1701        ):1702            generation_info = {"headers": dict(raw_response.headers)}1703        return self._create_chat_result(response, generation_info)17041705    def _use_responses_api(self, payload: dict) -> bool:1706        if isinstance(self.use_responses_api, bool):1707            return self.use_responses_api1708        if (1709            self.output_version == "responses/v1"1710            or self.context_management is not None1711            or self.include is not None1712            or self.reasoning is not None1713            or self.truncation is not None1714            or self.use_previous_response_id1715            or _model_prefers_responses_api(self.model_name)1716        ):1717            return True1718        return _use_responses_api(payload)17191720    def _get_request_payload(1721        self,1722        input_: LanguageModelInput,1723        *,1724        stop: list[str] | None = None,1725        **kwargs: Any,1726    ) -> dict:1727        messages = self._convert_input(input_).to_messages()1728        if stop is not None:1729            kwargs["stop"] = stop17301731        payload = {**self._default_params, **kwargs}17321733        if self._use_responses_api(payload):1734            if self.use_previous_response_id:1735                last_messages, previous_response_id = _get_last_messages(messages)1736                payload_to_use = last_messages if previous_response_id else messages1737                if previous_response_id:1738                    payload["previous_response_id"] = previous_response_id1739                payload = _construct_responses_api_payload(payload_to_use, payload)1740            else:1741                payload = _construct_responses_api_payload(messages, payload)1742        else:1743            payload["messages"] = [1744                _convert_message_to_dict(_convert_from_v1_to_chat_completions(m))1745                if isinstance(m, AIMessage)1746                else _convert_message_to_dict(m)1747                for m in messages1748            ]1749        return payload17501751    def _create_chat_result(1752        self,1753        response: dict | openai.BaseModel,1754        generation_info: dict | None = None,1755    ) -> ChatResult:1756        generations = []17571758        response_dict = (1759            response1760            if isinstance(response, dict)1761            # `parsed` may hold arbitrary Pydantic models from structured output.1762            # Exclude it from this dump and copy it from the typed response below.1763            else response.model_dump(1764                exclude={"choices": {"__all__": {"message": {"parsed"}}}}1765            )1766        )1767        # Sometimes the AI Model calling will get error, we should raise it (this is1768        # typically followed by a null value for `choices`, which we raise for1769        # separately below).1770        if response_dict.get("error"):1771            raise ValueError(response_dict.get("error"))17721773        # Raise informative error messages for non-OpenAI chat completions APIs1774        # that return malformed responses.1775        try:1776            choices = response_dict["choices"]1777        except KeyError as e:1778            msg = f"Response missing 'choices' key: {response_dict.keys()}"1779            raise KeyError(msg) from e17801781        if choices is None:1782            # Some OpenAI-compatible APIs (e.g., vLLM) may return null choices1783            # when the response format differs or an error occurs without1784            # populating the error field. Provide a more helpful error message.1785            msg = (1786                "Received response with null value for 'choices'. "1787                "This can happen when using OpenAI-compatible APIs (e.g., vLLM) "1788                "that return a response in an unexpected format. "1789                f"Full response keys: {list(response_dict.keys())}"1790            )1791            raise TypeError(msg)17921793        token_usage = response_dict.get("usage")1794        service_tier = response_dict.get("service_tier")17951796        for res in choices:1797            message = _convert_dict_to_message(res["message"])1798            if token_usage and isinstance(message, AIMessage):1799                message.usage_metadata = _create_usage_metadata(1800                    token_usage, service_tier1801                )1802            generation_info = generation_info or {}1803            generation_info["finish_reason"] = (1804                res.get("finish_reason")1805                if res.get("finish_reason") is not None1806                else generation_info.get("finish_reason")1807            )1808            if "logprobs" in res:1809                generation_info["logprobs"] = res["logprobs"]1810            gen = ChatGeneration(message=message, generation_info=generation_info)1811            generations.append(gen)1812        llm_output = {1813            "token_usage": token_usage,1814            "model_provider": "openai",1815            "model_name": response_dict.get("model", self.model_name),1816            "system_fingerprint": response_dict.get("system_fingerprint", ""),1817        }1818        if "id" in response_dict:1819            llm_output["id"] = response_dict["id"]1820        if service_tier:1821            llm_output["service_tier"] = service_tier18221823        if isinstance(response, openai.BaseModel) and getattr(1824            response, "choices", None1825        ):1826            message = response.choices[0].message  # type: ignore[attr-defined]1827            if hasattr(message, "parsed"):1828                generations[0].message.additional_kwargs["parsed"] = message.parsed1829            if hasattr(message, "refusal"):1830                generations[0].message.additional_kwargs["refusal"] = message.refusal18311832        return ChatResult(generations=generations, llm_output=llm_output)18331834    async def _astream(1835        self,1836        messages: list[BaseMessage],1837        stop: list[str] | None = None,1838        run_manager: AsyncCallbackManagerForLLMRun | None = None,1839        *,1840        stream_usage: bool | None = None,1841        **kwargs: Any,1842    ) -> AsyncIterator[ChatGenerationChunk]:1843        kwargs["stream"] = True1844        stream_usage = self._should_stream_usage(stream_usage, **kwargs)1845        if stream_usage:1846            kwargs["stream_options"] = {"include_usage": stream_usage}1847        payload = self._get_request_payload(messages, stop=stop, **kwargs)1848        default_chunk_class: type[BaseMessageChunk] = AIMessageChunk1849        base_generation_info = {}18501851        try:1852            if "response_format" in payload:1853                if self.include_response_headers:1854                    warnings.warn(1855                        "Cannot currently include response headers when "1856                        "response_format is specified."1857                    )1858                payload.pop("stream")1859                response_stream = self.root_async_client.beta.chat.completions.stream(1860                    **payload1861                )1862                context_manager = response_stream1863            else:1864                if self.include_response_headers:1865                    raw_response = await self.async_client.with_raw_response.create(1866                        **payload1867                    )1868                    response = raw_response.parse()1869                    base_generation_info = {"headers": dict(raw_response.headers)}1870                else:1871                    response = await self.async_client.create(**payload)1872                context_manager = response1873            async with context_manager as response:1874                is_first_chunk = True1875                async for chunk in _astream_with_chunk_timeout(1876                    response,1877                    self.stream_chunk_timeout,1878                    model_name=self.model_name,1879                ):1880                    if not isinstance(chunk, dict):1881                        chunk = chunk.model_dump()1882                    generation_chunk = self._convert_chunk_to_generation_chunk(1883                        chunk,1884                        default_chunk_class,1885                        base_generation_info if is_first_chunk else {},1886                    )1887                    if generation_chunk is None:1888                        continue1889                    default_chunk_class = generation_chunk.message.__class__1890                    logprobs = (generation_chunk.generation_info or {}).get("logprobs")1891                    if run_manager:1892                        await run_manager.on_llm_new_token(1893                            generation_chunk.text,1894                            chunk=generation_chunk,1895                            logprobs=logprobs,1896                        )1897                    is_first_chunk = False1898                    yield generation_chunk1899        except openai.BadRequestError as e:1900            _handle_openai_bad_request(e)1901        except openai.APIError as e:1902            _handle_openai_api_error(e)1903        if hasattr(response, "get_final_completion") and "response_format" in payload:1904            final_completion = await response.get_final_completion()1905            generation_chunk = self._get_generation_chunk_from_completion(1906                final_completion1907            )1908            if run_manager:1909                await run_manager.on_llm_new_token(1910                    generation_chunk.text, chunk=generation_chunk1911                )1912            yield generation_chunk19131914    async def _agenerate(1915        self,1916        messages: list[BaseMessage],1917        stop: list[str] | None = None,1918        run_manager: AsyncCallbackManagerForLLMRun | None = None,1919        **kwargs: Any,1920    ) -> ChatResult:1921        payload = self._get_request_payload(messages, stop=stop, **kwargs)1922        generation_info = None1923        raw_response = None1924        try:1925            if "response_format" in payload:1926                payload.pop("stream")1927                raw_response = await self.root_async_client.chat.completions.with_raw_response.parse(  # noqa: E5011928                    **payload1929                )1930                response = raw_response.parse()1931            elif self._use_responses_api(payload):1932                original_schema_obj = kwargs.get("response_format")1933                if original_schema_obj and _is_pydantic_class(original_schema_obj):1934                    raw_response = (1935                        await self.root_async_client.responses.with_raw_response.parse(1936                            **payload1937                        )1938                    )1939                else:1940                    raw_response = (1941                        await self.root_async_client.responses.with_raw_response.create(1942                            **payload1943                        )1944                    )1945                response = raw_response.parse()1946                if self.include_response_headers:1947                    generation_info = {"headers": dict(raw_response.headers)}1948                return _construct_lc_result_from_responses_api(1949                    response,1950                    schema=original_schema_obj,1951                    metadata=generation_info,1952                    output_version=self.output_version,1953                )1954            else:1955                raw_response = await self.async_client.with_raw_response.create(1956                    **payload1957                )1958                response = raw_response.parse()1959        except openai.BadRequestError as e:1960            _handle_openai_bad_request(e)1961        except openai.APIError as e:1962            _handle_openai_api_error(e)1963        except Exception as e:1964            if raw_response is not None and hasattr(raw_response, "http_response"):1965                e.response = raw_response.http_response  # type: ignore[attr-defined]1966            raise e1967        if (1968            self.include_response_headers1969            and raw_response is not None1970            and hasattr(raw_response, "headers")1971        ):1972            generation_info = {"headers": dict(raw_response.headers)}1973        return await run_in_executor(1974            None, self._create_chat_result, response, generation_info1975        )19761977    @property1978    def _identifying_params(self) -> dict[str, Any]:1979        """Get the identifying parameters."""1980        return {"model_name": self.model_name, **self._default_params}19811982    def _get_invocation_params(1983        self, stop: list[str] | None = None, **kwargs: Any1984    ) -> dict[str, Any]:1985        """Get the parameters used to invoke the model."""1986        params = {1987            "model": self.model_name,1988            **super()._get_invocation_params(stop=stop),1989            **self._default_params,1990            **kwargs,1991        }1992        # Redact headers from built-in remote MCP tool invocations1993        if (tools := params.get("tools")) and isinstance(tools, list):1994            params["tools"] = [1995                ({**tool, "headers": "**REDACTED**"} if "headers" in tool else tool)1996                if isinstance(tool, dict) and tool.get("type") == "mcp"1997                else tool1998                for tool in tools1999            ]

Findings

✓ No findings reported for this file.

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.