1"""OpenAI chat wrapper.23!!! warning "API scope"45 `ChatOpenAI` targets6 [official OpenAI API specifications](https://github.com/openai/openai-openapi)7 only. Non-standard response fields added by third-party providers (e.g.,8 `reasoning_content`, `reasoning_details`) are **not** extracted or9 preserved. If you are pointing `base_url` at a provider such as10 OpenRouter, vLLM, or DeepSeek, use the corresponding provider-specific11 LangChain package instead (e.g., `ChatDeepSeek`, `ChatOpenRouter`).12"""1314from __future__ import annotations1516import base6417import json18import logging19import os20import re21import ssl22import sys23import warnings24from collections.abc import (25 AsyncIterator,26 Awaitable,27 Callable,28 Iterator,29 Mapping,30 Sequence,31)32from functools import partial33from io import BytesIO34from json import JSONDecodeError35from math import ceil36from operator import itemgetter37from typing import (38 TYPE_CHECKING,39 Any,40 Literal,41 TypeAlias,42 TypeVar,43 cast,44)45from urllib.parse import urlparse4647import certifi48import openai49import tiktoken50from langchain_core.callbacks import (51 AsyncCallbackManagerForLLMRun,52 CallbackManagerForLLMRun,53)54from langchain_core.exceptions import ContextOverflowError55from langchain_core.language_models import (56 LanguageModelInput,57 ModelProfileRegistry,58)59from langchain_core.language_models.chat_models import (60 BaseChatModel,61 LangSmithParams,62)63from langchain_core.messages import (64 AIMessage,65 AIMessageChunk,66 BaseMessage,67 BaseMessageChunk,68 ChatMessage,69 ChatMessageChunk,70 FunctionMessage,71 FunctionMessageChunk,72 HumanMessage,73 HumanMessageChunk,74 InvalidToolCall,75 SystemMessage,76 SystemMessageChunk,77 ToolCall,78 ToolMessage,79 ToolMessageChunk,80 is_data_content_block,81)82from langchain_core.messages import content as types83from langchain_core.messages.ai import (84 InputTokenDetails,85 OutputTokenDetails,86 UsageMetadata,87)88from langchain_core.messages.block_translators.openai import (89 _convert_from_v03_ai_message,90 convert_to_openai_data_block,91)92from langchain_core.messages.tool import tool_call_chunk93from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser94from langchain_core.output_parsers.openai_tools import (95 JsonOutputKeyToolsParser,96 PydanticToolsParser,97 make_invalid_tool_call,98 parse_tool_call,99)100from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult101from langchain_core.runnables import (102 Runnable,103 RunnableLambda,104 RunnableMap,105 RunnablePassthrough,106)107from langchain_core.runnables.config import run_in_executor108from langchain_core.tools import BaseTool109from langchain_core.tools.base import _stringify110from langchain_core.utils import get_pydantic_field_names111from langchain_core.utils.function_calling import (112 convert_to_openai_function,113 convert_to_openai_tool,114)115from langchain_core.utils.pydantic import (116 PydanticBaseModel,117 TypeBaseModel,118 is_basemodel_subclass,119)120from langchain_core.utils.utils import _build_model_kwargs, from_env, secret_from_env121from pydantic import (122 BaseModel,123 ConfigDict,124 Field,125 SecretStr,126 ValidationError,127 field_validator,128 model_validator,129)130from pydantic.v1 import BaseModel as BaseModelV1131from typing_extensions import Self132133from langchain_openai._version import __version__134from langchain_openai.chat_models._client_utils import (135 _astream_with_chunk_timeout,136 _build_proxied_async_httpx_client,137 _build_proxied_sync_httpx_client,138 _float_env,139 _get_default_async_httpx_client,140 _get_default_httpx_client,141 _log_proxy_env_bypass_once,142 _resolve_socket_options,143 _resolve_sync_and_async_api_keys,144 _should_bypass_socket_options_for_proxy_env,145 _warn_if_proxy_env_shadowed,146)147from langchain_openai.chat_models._compat import (148 _convert_from_v1_to_chat_completions,149 _convert_from_v1_to_responses,150 _convert_to_v03_ai_message,151)152from langchain_openai.data._profiles import _PROFILES153154if TYPE_CHECKING:155 import httpx156 from langchain_core.language_models import ModelProfile157 from openai.types.responses import Response158159logger = logging.getLogger(__name__)160161# This SSL context is equivalent to the default `verify=True`.162# https://www.python-httpx.org/advanced/ssl/#configuring-client-instances163global_ssl_context = ssl.create_default_context(cafile=certifi.where())164165_ssrf_client: httpx.Client | None = None166167168def _get_ssrf_safe_client() -> httpx.Client:169 global _ssrf_client170 if _ssrf_client is None:171 from langchain_core._security._transport import ssrf_safe_client172173 _ssrf_client = ssrf_safe_client(174 verify=global_ssl_context, follow_redirects=False175 )176 return _ssrf_client177178179_MODEL_PROFILES = cast(ModelProfileRegistry, _PROFILES)180181182def _get_default_model_profile(model_name: str) -> ModelProfile:183 default = _MODEL_PROFILES.get(model_name) or {}184 return default.copy()185186187WellKnownTools = (188 "file_search",189 "web_search_preview",190 "web_search",191 "computer_use_preview",192 "code_interpreter",193 "mcp",194 "image_generation",195 "tool_search",196 "apply_patch",197)198199200def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:201 """Convert a dictionary to a LangChain message.202203 Args:204 _dict: The dictionary.205206 Returns:207 The LangChain message.208 """209 role = _dict.get("role")210 name = _dict.get("name")211 id_ = _dict.get("id")212 if role == "user":213 return HumanMessage(content=_dict.get("content", ""), id=id_, name=name)214 if role == "assistant":215 # Fix for azure216 # Also OpenAI returns None for tool invocations217 content = _dict.get("content", "") or ""218 additional_kwargs: dict = {}219 if function_call := _dict.get("function_call"):220 additional_kwargs["function_call"] = dict(function_call)221 tool_calls = []222 invalid_tool_calls = []223 if raw_tool_calls := _dict.get("tool_calls"):224 for raw_tool_call in raw_tool_calls:225 try:226 tool_calls.append(parse_tool_call(raw_tool_call, return_id=True))227 except Exception as e:228 invalid_tool_calls.append(229 make_invalid_tool_call(raw_tool_call, str(e))230 )231 if audio := _dict.get("audio"):232 additional_kwargs["audio"] = audio233 return AIMessage(234 content=content,235 additional_kwargs=additional_kwargs,236 name=name,237 id=id_,238 tool_calls=tool_calls,239 invalid_tool_calls=invalid_tool_calls,240 )241 if role in ("system", "developer"):242 additional_kwargs = {"__openai_role__": role} if role == "developer" else {}243 return SystemMessage(244 content=_dict.get("content", ""),245 name=name,246 id=id_,247 additional_kwargs=additional_kwargs,248 )249 if role == "function":250 return FunctionMessage(251 content=_dict.get("content", ""), name=cast(str, _dict.get("name")), id=id_252 )253 if role == "tool":254 additional_kwargs = {}255 if "name" in _dict:256 additional_kwargs["name"] = _dict["name"]257 return ToolMessage(258 content=_dict.get("content", ""),259 tool_call_id=cast(str, _dict.get("tool_call_id")),260 additional_kwargs=additional_kwargs,261 name=name,262 id=id_,263 )264 return ChatMessage(content=_dict.get("content", ""), role=role, id=id_) # type: ignore[arg-type]265266267def _sanitize_chat_completions_content(content: str | list[dict]) -> str | list[dict]:268 """Sanitize content for chat/completions API.269270 For list content, filters text blocks to only keep 'type' and 'text' keys.271 """272 if isinstance(content, list):273 sanitized = []274 for block in content:275 if (276 isinstance(block, dict)277 and block.get("type") == "text"278 and "text" in block279 ):280 sanitized.append({"type": "text", "text": block["text"]})281 else:282 sanitized.append(block)283 return sanitized284 return content285286287def _format_message_content(288 content: Any,289 api: Literal["chat/completions", "responses"] = "chat/completions",290 role: str | None = None,291) -> Any:292 """Format message content."""293 if content and isinstance(content, list):294 formatted_content = []295 for block in content:296 # Remove unexpected block types297 if (298 isinstance(block, dict)299 and "type" in block300 and (301 block["type"] in ("tool_use", "thinking", "reasoning_content")302 or (303 block["type"] in ("function_call", "code_interpreter_call")304 and api == "chat/completions"305 )306 )307 ):308 continue309 if (310 isinstance(block, dict)311 and is_data_content_block(block)312 # Responses API messages handled separately in _compat (parsed into313 # image generation calls)314 and not (api == "responses" and str(role).lower().startswith("ai"))315 ):316 formatted_content.append(convert_to_openai_data_block(block, api=api))317 # Anthropic image blocks318 elif (319 isinstance(block, dict)320 and block.get("type") == "image"321 and (source := block.get("source"))322 and isinstance(source, dict)323 ):324 if source.get("type") == "base64" and (325 (media_type := source.get("media_type"))326 and (data := source.get("data"))327 ):328 formatted_content.append(329 {330 "type": "image_url",331 "image_url": {"url": f"data:{media_type};base64,{data}"},332 }333 )334 elif source.get("type") == "url" and (url := source.get("url")):335 formatted_content.append(336 {"type": "image_url", "image_url": {"url": url}}337 )338 else:339 continue340 else:341 formatted_content.append(block)342 else:343 formatted_content = content344345 return formatted_content346347348def _convert_message_to_dict(349 message: BaseMessage,350 api: Literal["chat/completions", "responses"] = "chat/completions",351) -> dict:352 """Convert a LangChain message to dictionary format expected by OpenAI."""353 message_dict: dict[str, Any] = {354 "content": _format_message_content(message.content, api=api, role=message.type)355 }356 if (name := message.name or message.additional_kwargs.get("name")) is not None:357 message_dict["name"] = name358359 # populate role and additional message data360 if isinstance(message, ChatMessage):361 message_dict["role"] = message.role362 elif isinstance(message, HumanMessage):363 message_dict["role"] = "user"364 elif isinstance(message, AIMessage):365 message_dict["role"] = "assistant"366 if message.tool_calls or message.invalid_tool_calls:367 message_dict["tool_calls"] = [368 _lc_tool_call_to_openai_tool_call(tc) for tc in message.tool_calls369 ] + [370 _lc_invalid_tool_call_to_openai_tool_call(tc)371 for tc in message.invalid_tool_calls372 ]373 elif "tool_calls" in message.additional_kwargs:374 message_dict["tool_calls"] = message.additional_kwargs["tool_calls"]375 tool_call_supported_props = {"id", "type", "function"}376 message_dict["tool_calls"] = [377 {k: v for k, v in tool_call.items() if k in tool_call_supported_props}378 for tool_call in message_dict["tool_calls"]379 ]380 elif "function_call" in message.additional_kwargs:381 # OpenAI raises 400 if both function_call and tool_calls are present in the382 # same message.383 message_dict["function_call"] = message.additional_kwargs["function_call"]384 else:385 pass386 # If tool calls present, content null value should be None not empty string.387 if "function_call" in message_dict or "tool_calls" in message_dict:388 message_dict["content"] = message_dict["content"] or None389390 audio: dict[str, Any] | None = None391 for block in message.content:392 if (393 isinstance(block, dict)394 and block.get("type") == "audio"395 and (id_ := block.get("id"))396 and api != "responses"397 ):398 # openai doesn't support passing the data back - only the id399 # https://platform.openai.com/docs/guides/audio/multi-turn-conversations400 audio = {"id": id_}401 if not audio and "audio" in message.additional_kwargs:402 raw_audio = message.additional_kwargs["audio"]403 audio = (404 {"id": message.additional_kwargs["audio"]["id"]}405 if "id" in raw_audio406 else raw_audio407 )408 if audio:409 message_dict["audio"] = audio410 elif isinstance(message, SystemMessage):411 message_dict["role"] = message.additional_kwargs.get(412 "__openai_role__", "system"413 )414 elif isinstance(message, FunctionMessage):415 message_dict["role"] = "function"416 elif isinstance(message, ToolMessage):417 message_dict["role"] = "tool"418 message_dict["tool_call_id"] = message.tool_call_id419 message_dict["content"] = _sanitize_chat_completions_content(420 message_dict["content"]421 )422 supported_props = {"content", "role", "tool_call_id"}423 message_dict = {k: v for k, v in message_dict.items() if k in supported_props}424 else:425 msg = f"Got unknown type {message}"426 raise TypeError(msg)427 return message_dict428429430def _convert_delta_to_message_chunk(431 _dict: Mapping[str, Any], default_class: type[BaseMessageChunk]432) -> BaseMessageChunk:433 """Convert to a LangChain message chunk."""434 id_ = _dict.get("id")435 role = cast(str, _dict.get("role"))436 content = cast(str, _dict.get("content") or "")437 additional_kwargs: dict = {}438 if _dict.get("function_call"):439 function_call = dict(_dict["function_call"])440 if "name" in function_call and function_call["name"] is None:441 function_call["name"] = ""442 additional_kwargs["function_call"] = function_call443 tool_call_chunks = []444 if raw_tool_calls := _dict.get("tool_calls"):445 try:446 tool_call_chunks = [447 tool_call_chunk(448 name=rtc["function"].get("name"),449 args=rtc["function"].get("arguments"),450 id=rtc.get("id"),451 index=rtc["index"],452 )453 for rtc in raw_tool_calls454 ]455 except KeyError:456 pass457458 if role == "user" or default_class == HumanMessageChunk:459 return HumanMessageChunk(content=content, id=id_)460 if role == "assistant" or default_class == AIMessageChunk:461 return AIMessageChunk(462 content=content,463 additional_kwargs=additional_kwargs,464 id=id_,465 tool_call_chunks=tool_call_chunks, # type: ignore[arg-type]466 )467 if role in ("system", "developer") or default_class == SystemMessageChunk:468 if role == "developer":469 additional_kwargs = {"__openai_role__": "developer"}470 else:471 additional_kwargs = {}472 return SystemMessageChunk(473 content=content, id=id_, additional_kwargs=additional_kwargs474 )475 if role == "function" or default_class == FunctionMessageChunk:476 return FunctionMessageChunk(content=content, name=_dict["name"], id=id_)477 if role == "tool" or default_class == ToolMessageChunk:478 return ToolMessageChunk(479 content=content, tool_call_id=_dict["tool_call_id"], id=id_480 )481 if role or default_class == ChatMessageChunk:482 return ChatMessageChunk(content=content, role=role, id=id_)483 return default_class(content=content, id=id_) # type: ignore[call-arg]484485486def _update_token_usage(487 overall_token_usage: int | dict, new_usage: int | dict488) -> int | dict:489 # Token usage is either ints or dictionaries490 # `reasoning_tokens` is nested inside `completion_tokens_details`491 if isinstance(new_usage, int):492 if not isinstance(overall_token_usage, int):493 msg = (494 f"Got different types for token usage: "495 f"{type(new_usage)} and {type(overall_token_usage)}"496 )497 raise ValueError(msg)498 return new_usage + overall_token_usage499 if isinstance(new_usage, dict):500 if not isinstance(overall_token_usage, dict):501 msg = (502 f"Got different types for token usage: "503 f"{type(new_usage)} and {type(overall_token_usage)}"504 )505 raise ValueError(msg)506 return {507 k: _update_token_usage(overall_token_usage.get(k, 0), v)508 for k, v in new_usage.items()509 }510 warnings.warn(f"Unexpected type for token usage: {type(new_usage)}")511 return new_usage512513514class OpenAIContextOverflowError(openai.BadRequestError, ContextOverflowError):515 """BadRequestError raised when input exceeds OpenAI's context limit."""516517518class OpenAIAPIContextOverflowError(openai.APIError, ContextOverflowError):519 """APIError raised when input exceeds OpenAI's context limit."""520521522def _handle_openai_bad_request(e: openai.BadRequestError) -> None:523 if (524 "context_length_exceeded" in str(e)525 or "Input tokens exceed the configured limit" in e.message526 or "prompt is too long" in e.message527 ):528 raise OpenAIContextOverflowError(529 message=e.message, response=e.response, body=e.body530 ) from e531 if (532 "'response_format' of type 'json_schema' is not supported with this model"533 ) in e.message:534 message = (535 "This model does not support OpenAI's structured output feature, which "536 "is the default method for `with_structured_output` as of "537 "langchain-openai==0.3. To use `with_structured_output` with this model, "538 'specify `method="function_calling"`.'539 )540 warnings.warn(message)541 raise e542 if "Invalid schema for response_format" in e.message:543 message = (544 "Invalid schema for OpenAI's structured output feature, which is the "545 "default method for `with_structured_output` as of langchain-openai==0.3. "546 'Specify `method="function_calling"` instead or update your schema. '547 "See supported schemas: "548 "https://platform.openai.com/docs/guides/structured-outputs#supported-schemas"549 )550 warnings.warn(message)551 raise e552 raise553554555def _handle_openai_api_error(e: openai.APIError) -> None:556 error_message = str(e)557 if "exceeds the context window" in error_message:558 raise OpenAIAPIContextOverflowError(559 message=e.message, request=e.request, body=e.body560 ) from e561 raise562563564_RESPONSES_API_ONLY_PREFIXES = (565 "gpt-5-pro",566 "gpt-5.2-pro",567 "gpt-5.4-pro",568 "gpt-5.5-pro",569)570571572def _model_prefers_responses_api(model_name: str | None) -> bool:573 if not model_name:574 return False575 return model_name.startswith(_RESPONSES_API_ONLY_PREFIXES) or "codex" in model_name576577578_BM = TypeVar("_BM", bound=BaseModel)579_DictOrPydanticClass: TypeAlias = dict[str, Any] | type[_BM] | type580_DictOrPydantic: TypeAlias = dict | _BM581582583class BaseChatOpenAI(BaseChatModel):584 """Base wrapper around OpenAI large language models for chat.585586 This base class targets587 [official OpenAI API specifications](https://github.com/openai/openai-openapi)588 only. Non-standard response fields added by third-party providers (e.g.,589 `reasoning_content`) are not extracted. Use a provider-specific subclass for590 full provider support.591 """592593 client: Any = Field(default=None, exclude=True)594595 async_client: Any = Field(default=None, exclude=True)596597 root_client: Any = Field(default=None, exclude=True)598599 root_async_client: Any = Field(default=None, exclude=True)600601 model_name: str = Field(default="gpt-3.5-turbo", alias="model")602 """Model name to use."""603604 temperature: float | None = None605 """What sampling temperature to use."""606607 model_kwargs: dict[str, Any] = Field(default_factory=dict)608 """Holds any model parameters valid for `create` call not explicitly specified."""609610 openai_api_key: (611 SecretStr | None | Callable[[], str] | Callable[[], Awaitable[str]]612 ) = Field(613 alias="api_key", default_factory=secret_from_env("OPENAI_API_KEY", default=None)614 )615 """API key to use.616617 Can be inferred from the `OPENAI_API_KEY` environment variable, or specified618 as a string, or sync or async callable that returns a string.619620 ??? example "Specify with environment variable"621622 ```bash623 export OPENAI_API_KEY=...624 ```625 ```python626 from langchain_openai import ChatOpenAI627628 model = ChatOpenAI(model="gpt-5-nano")629 ```630631 ??? example "Specify with a string"632633 ```python634 from langchain_openai import ChatOpenAI635636 model = ChatOpenAI(model="gpt-5-nano", api_key="...")637 ```638639 ??? example "Specify with a sync callable"640641 ```python642 from langchain_openai import ChatOpenAI643644 def get_api_key() -> str:645 # Custom logic to retrieve API key646 return "..."647648 model = ChatOpenAI(model="gpt-5-nano", api_key=get_api_key)649 ```650651 ??? example "Specify with an async callable"652653 ```python654 from langchain_openai import ChatOpenAI655656 async def get_api_key() -> str:657 # Custom async logic to retrieve API key658 return "..."659660 model = ChatOpenAI(model="gpt-5-nano", api_key=get_api_key)661 ```662 """663664 openai_api_base: str | None = Field(default=None, alias="base_url")665 """Base URL path for API requests, leave blank if not using a proxy or service emulator.666667 Resolution order (first match wins):668669 1. Explicit `base_url` (or `openai_api_base`) kwarg.670 2. Env var `OPENAI_API_BASE` (read by LangChain at init).671 3. Env var `OPENAI_BASE_URL` (read by the underlying `openai` SDK client).672673 `OPENAI_BASE_URL` is also inspected by LangChain only to decide whether to674 default-enable `stream_usage` — when set, the default is left off because many675 non-OpenAI endpoints do not support streaming token usage.676 """ # noqa: E501677678 openai_organization: str | None = Field(default=None, alias="organization")679 """Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""680681 # to support explicit proxy for OpenAI682 openai_proxy: str | None = Field(683 default_factory=from_env("OPENAI_PROXY", default=None)684 )685686 request_timeout: float | tuple[float, float] | Any | None = Field(687 default=None, alias="timeout"688 )689 """Timeout for requests to OpenAI completion API.690691 Can be float, `httpx.Timeout` or `None`.692 """693694 stream_usage: bool | None = None695 """Whether to include usage metadata in streaming output.696697 If enabled, an additional message chunk will be generated during the stream698 including usage metadata.699700 This parameter is enabled unless `openai_api_base` is set or the model is701 initialized with a custom client, as many chat completions APIs do not702 support streaming token usage.703704 !!! version-added "Added in `langchain-openai` 0.3.9"705706 !!! warning "Behavior changed in `langchain-openai` 0.3.35"707708 Enabled for default base URL and client.709 """710711 max_retries: int | None = None712 """Maximum number of retries to make when generating."""713714 presence_penalty: float | None = None715 """Penalizes repeated tokens."""716717 frequency_penalty: float | None = None718 """Penalizes repeated tokens according to frequency."""719720 seed: int | None = None721 """Seed for generation"""722723 logprobs: bool | None = None724 """Whether to return logprobs."""725726 top_logprobs: int | None = None727 """Number of most likely tokens to return at each token position, each with an728 associated log probability.729730 `logprobs` must be set to true if this parameter is used.731 """732733 logit_bias: dict[int, int] | None = None734 """Modify the likelihood of specified tokens appearing in the completion."""735736 streaming: bool = False737 """Whether to stream the results or not."""738739 n: int | None = None740 """Number of chat completions to generate for each prompt."""741742 top_p: float | None = None743 """Total probability mass of tokens to consider at each step."""744745 max_tokens: int | None = Field(default=None)746 """Maximum number of tokens to generate."""747748 reasoning_effort: str | None = None749 """Constrains effort on reasoning for reasoning models.750751 For use with the Chat Completions API. Reasoning models only.752753 Currently supported values are `'minimal'`, `'low'`, `'medium'`, and754 `'high'`. Reducing reasoning effort can result in faster responses and fewer755 tokens used on reasoning in a response.756 """757758 reasoning: dict[str, Any] | None = None759 """Reasoning parameters for reasoning models. None disables reasoning.760761 For use with the Responses API.762763 ```python764 reasoning={765 "effort": None, # Default None; can be "low", "medium", or "high"766 "summary": "auto", # Can be "auto", "concise", or "detailed"767 }768 ```769770 !!! version-added "Added in `langchain-openai` 0.3.24"771 """772773 verbosity: str | None = None774 """Controls the verbosity level of responses for reasoning models.775776 For use with the Responses API.777778 Currently supported values are `'low'`, `'medium'`, and `'high'`.779780 !!! version-added "Added in `langchain-openai` 0.3.28"781 """782783 tiktoken_model_name: str | None = None784 """The model name to pass to tiktoken when using this class.785786 Tiktoken is used to count the number of tokens in documents to constrain787 them to be under a certain limit.788789 By default, when set to `None`, this will be the same as the embedding model name.790 However, there are some cases where you may want to use this `Embedding` class with791 a model name not supported by tiktoken. This can include when using Azure embeddings792 or when using one of the many model providers that expose an OpenAI-like793 API but with different models. In those cases, in order to avoid erroring794 when tiktoken is called, you can specify a model name to use here.795 """796797 default_headers: Mapping[str, str] | None = None798799 default_query: Mapping[str, object] | None = None800801 # Configure a custom httpx client. See the802 # [httpx documentation](https://www.python-httpx.org/api/#client) for more details.803 http_client: Any | None = Field(default=None, exclude=True)804 """Optional `httpx.Client`.805806 Only used for sync invocations. Must specify `http_async_client` as well if807 you'd like a custom client for async invocations.808 """809810 http_async_client: Any | None = Field(default=None, exclude=True)811 """Optional `httpx.AsyncClient`.812813 Only used for async invocations. Must specify `http_client` as well if you'd814 like a custom client for sync invocations.815 """816817 http_socket_options: Sequence[tuple[int, int, int]] | None = Field(818 default=None, exclude=True819 )820 """TCP socket options applied to the httpx transports built by this instance.821822 Defaults to a conservative TCP-keepalive + `TCP_USER_TIMEOUT` profile that823 targets a ~2-minute bound on silent connection hangs (silent mid-stream peer824 loss, gVisor/NAT idle timeouts, silent TCP black holes) on platforms that825 support the full option set. On platforms that only support a subset826 (macOS without `TCP_USER_TIMEOUT`, Windows with only `SO_KEEPALIVE`,827 minimal kernels), unsupported options are silently dropped and the bound828 degrades to whatever the remaining options + OS defaults provide — still829 better than indefinite hang.830831 Accepted values:832833 - `None` (default): use env-driven defaults. Matches the "unset" convention834 used by `http_client` elsewhere on this class.835 - `()` (empty): disable socket-option injection entirely. Inherits the OS836 defaults and restores httpx's native env-proxy auto-detection.837 - A non-empty sequence of `(level, option, value)` tuples: explicit838 override; passed verbatim to the transport (not filtered). Unsupported839 options raise `OSError` at connect time rather than being silently840 dropped — the user chose them explicitly.841842 Environment variables (only consulted when this field is `None`):843 `LANGCHAIN_OPENAI_TCP_KEEPALIVE` (set to `0` to disable entirely — the844 kill-switch), `LANGCHAIN_OPENAI_TCP_KEEPIDLE`,845 `LANGCHAIN_OPENAI_TCP_KEEPINTVL`, `LANGCHAIN_OPENAI_TCP_KEEPCNT`,846 `LANGCHAIN_OPENAI_TCP_USER_TIMEOUT_MS`.847848 Applied per side: if `http_client` is supplied, the sync path uses849 that user-owned client's socket options as-is; the async path still850 gets `http_socket_options` applied to its default builder (and851 vice-versa for `http_async_client`). Supply both to take full control.852853 !!! note "Interaction with env-proxy auto-detection"854855 When a custom `httpx` transport is active, `httpx` disables its856 native env-proxy auto-detection (`HTTP_PROXY` / `HTTPS_PROXY` /857 `ALL_PROXY` / `NO_PROXY` and macOS/Windows system proxy settings).858859 To keep the default shape safe, `ChatOpenAI` detects the860 "proxy-env-shadow" pattern and **skips the custom transport861 entirely** when **all** of the following hold:862863 - `http_socket_options` is left at its default (`None`)864 - No `http_client` or `http_async_client` supplied865 - No `openai_proxy` supplied866 - A proxy env var or system proxy is visible to httpx867868 On that specific shape, the instance falls back to pre-PR behavior869 and httpx's env-proxy auto-detection applies (a one-time `INFO` log870 records the bypass for observability).871872 If you explicitly set `http_socket_options=[...]` while a proxy873 env var is also set, no bypass — you opted into the transport, and874 a one-time `WARNING` records the shadowing. Set875 `http_socket_options=()` or `LANGCHAIN_OPENAI_TCP_KEEPALIVE=0` to876 disable transport injection explicitly, or pass a fully-configured877 `http_async_client` / `http_client` to take full control. The878 `openai_proxy` constructor kwarg is unaffected — socket options879 are applied cleanly through the proxied transport on that path.880 """881882 stream_chunk_timeout: float | None = Field(883 default_factory=lambda: _float_env(884 "LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S", 120.0885 ),886 exclude=True,887 )888 """Per-chunk wall-clock timeout (seconds) on async streaming responses.889890 Applies to async invocations only (`astream`, `ainvoke` with streaming,891 etc.). Sync streaming (`stream`) is not affected.892893 Fires between content chunks yielded by the openai SDK's streaming iterator894 (i.e., each call to `__anext__` on the response). Crucially, this is895 **not** the same as httpx's `timeout.read`:896897 - httpx's read timeout is inter-byte and gets reset every time *any* bytes898 arrive on the socket — including OpenAI's SSE keepalive comments899 (`: keepalive`) that trickle down during long model generations. A900 stream that's silent on *content* but still producing keepalives looks901 alive forever to httpx.902 - `stream_chunk_timeout` measures the gap between *parsed chunks*. The903 openai SDK's SSE parser consumes keepalive comments internally and does904 not emit them as chunks, so keepalives do *not* reset this timer. It905 fires on genuine content silence.906907 When it fires, a `StreamChunkTimeoutError`908 (subclass of `asyncio.TimeoutError`) is raised with a self-describing909 message naming this knob, the env-var override, the model, and the910 number of chunks received before the stall. A WARNING log with911 `extra={"source": "stream_chunk_timeout", "timeout_s": <value>,912 "model_name": <value>, "chunks_received": <value>}` also fires so913 aggregate logging can distinguish app-layer timeouts from914 transport-layer failures.915916 Defaults to 120s. Set to `None` or `0` to disable. Overridable via the917 `LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S` env var. Negative values918 (from either the env var or the constructor kwarg — e.g., hydrated919 from YAML/JSON configs) fall back to the default with a `WARNING` log920 rather than silently disabling the wrapper, so a misconfigured value921 still boots safely and the fallback is visible.922 """923924 stop: list[str] | str | None = Field(default=None, alias="stop_sequences")925 """Default stop sequences."""926927 extra_body: Mapping[str, Any] | None = None928 """Optional additional JSON properties to include in the request parameters929 when making requests to OpenAI compatible APIs, such as vLLM, LM Studio, or930 other providers.931932 This is the recommended way to pass custom parameters that are specific to your933 OpenAI-compatible API provider but not part of the standard OpenAI API.934935 Examples:936 - [LM Studio](https://lmstudio.ai/) TTL parameter: `extra_body={"ttl": 300}`937 - [vLLM](https://github.com/vllm-project/vllm) custom parameters:938 `extra_body={"use_beam_search": True}`939 - Any other provider-specific parameters940941 !!! warning942943 Do not use `model_kwargs` for custom parameters that are not part of the944 standard OpenAI API, as this will cause errors when making API calls. Use945 `extra_body` instead.946 """947948 include_response_headers: bool = False949 """Whether to include response headers in the output message `response_metadata`."""950951 disabled_params: dict[str, Any] | None = Field(default=None)952 """Parameters of the OpenAI client or `chat.completions` endpoint that should be953 disabled for the given model.954955 Should be specified as `{"param": None | ['val1', 'val2']}` where the key is the956 parameter and the value is either None, meaning that parameter should never be957 used, or it's a list of disabled values for the parameter.958959 For example, older models may not support the `'parallel_tool_calls'` parameter at960 all, in which case `disabled_params={"parallel_tool_calls": None}` can be passed961 in.962963 If a parameter is disabled then it will not be used by default in any methods, e.g.964 in `with_structured_output`. However this does not prevent a user from directly965 passed in the parameter during invocation.966 """967968 context_management: list[dict[str, Any]] | None = None969 """Configuration for970 [context management](https://developers.openai.com/api/docs/guides/compaction).971 """972973 include: list[str] | None = None974 """Additional fields to include in generations from Responses API.975976 Supported values:977978 - `'file_search_call.results'`979 - `'message.input_image.image_url'`980 - `'computer_call_output.output.image_url'`981 - `'reasoning.encrypted_content'`982 - `'code_interpreter_call.outputs'`983984 !!! version-added "Added in `langchain-openai` 0.3.24"985 """986987 service_tier: str | None = None988 """Latency tier for request.989990 Options are `'auto'`, `'default'`, or `'flex'`.991992 Relevant for users of OpenAI's scale tier service.993 """994995 store: bool | None = None996 """If `True`, OpenAI may store response data for future use.997998 Defaults to `True` for the Responses API and `False` for the Chat Completions API.9991000 !!! version-added "Added in `langchain-openai` 0.3.24"1001 """10021003 truncation: str | None = None1004 """Truncation strategy (Responses API).10051006 Can be `'auto'` or `'disabled'` (default).10071008 If `'auto'`, model may drop input items from the middle of the message sequence to1009 fit the context window.10101011 !!! version-added "Added in `langchain-openai` 0.3.24"1012 """10131014 use_previous_response_id: bool = False1015 """If `True`, always pass `previous_response_id` using the ID of the most recent1016 response. Responses API only.10171018 Input messages up to the most recent response will be dropped from request1019 payloads.10201021 For example, the following two are equivalent:10221023 ```python1024 model = ChatOpenAI(1025 model="...",1026 use_previous_response_id=True,1027 )1028 model.invoke(1029 [1030 HumanMessage("Hello"),1031 AIMessage("Hi there!", response_metadata={"id": "resp_123"}),1032 HumanMessage("How are you?"),1033 ]1034 )1035 ```10361037 ```python1038 model = ChatOpenAI(model="...", use_responses_api=True)1039 model.invoke([HumanMessage("How are you?")], previous_response_id="resp_123")1040 ```10411042 !!! version-added "Added in `langchain-openai` 0.3.26"1043 """10441045 use_responses_api: bool | None = None1046 """Whether to use the Responses API instead of the Chat API.10471048 If not specified then will be inferred based on invocation params.10491050 !!! version-added "Added in `langchain-openai` 0.3.9"1051 """10521053 output_version: str | None = Field(1054 default_factory=from_env("LC_OUTPUT_VERSION", default=None)1055 )1056 """Version of `AIMessage` output format to use.10571058 This field is used to roll-out new output formats for chat model `AIMessage`1059 responses in a backwards-compatible way.10601061 Supported values:10621063 - `'v0'`: `AIMessage` format as of `langchain-openai 0.3.x`.1064 - `'responses/v1'`: Formats Responses API output items into AIMessage content blocks1065 (Responses API only)1066 - `'v1'`: v1 of LangChain cross-provider standard.10671068 !!! warning "Behavior changed in `langchain-openai` 1.0.0"10691070 Default updated to `"responses/v1"`.1071 """10721073 model_config = ConfigDict(populate_by_name=True)10741075 @property1076 def model(self) -> str:1077 """Same as model_name."""1078 return self.model_name10791080 @model_validator(mode="before")1081 @classmethod1082 def build_extra(cls, values: dict[str, Any]) -> Any:1083 """Build extra kwargs from additional params that were passed in."""1084 all_required_field_names = get_pydantic_field_names(cls)1085 return _build_model_kwargs(values, all_required_field_names)10861087 @field_validator("stream_chunk_timeout", mode="after")1088 @classmethod1089 def _validate_stream_chunk_timeout(cls, value: float | None) -> float | None:1090 """Reject negative constructor values; fall back to the env-driven default.10911092 Matches the env-var path in `_float_env`: a negative value is a typo,1093 not an opt-out (`None`/`0` are the documented off switches). Configs1094 hydrated from YAML/JSON would otherwise silently disable the wrapper1095 and reintroduce the indefinite-stream hang the feature prevents.1096 """1097 if value is not None and value < 0:1098 fallback = _float_env("LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S", 120.0)1099 logger.warning(1100 "Invalid `stream_chunk_timeout=%r` (negative); "1101 "falling back to %s. Pass `None` or `0` to disable.",1102 value,1103 fallback,1104 )1105 return fallback1106 return value11071108 @model_validator(mode="before")1109 @classmethod1110 def validate_temperature(cls, values: dict[str, Any]) -> Any:1111 """Validate temperature parameter for different models.11121113 - gpt-5 models (excluding gpt-5-chat) only allow `temperature=1` or unset1114 (Defaults to 1)1115 """1116 model = values.get("model_name") or values.get("model") or ""1117 model_lower = model.lower()11181119 # For o1 models, set temperature=1 if not provided1120 if model_lower.startswith("o1") and "temperature" not in values:1121 values["temperature"] = 111221123 # For gpt-5 models, handle temperature restrictions. Temperature is supported1124 # by gpt-5-chat and gpt-5 models with reasoning_effort='none' or1125 # reasoning={'effort': 'none'}.1126 if (1127 model_lower.startswith("gpt-5")1128 and ("chat" not in model_lower)1129 and values.get("reasoning_effort") != "none"1130 and (values.get("reasoning") or {}).get("effort") != "none"1131 ):1132 temperature = values.get("temperature")1133 if temperature is not None and temperature != 1:1134 # For gpt-5 (non-chat), only temperature=1 is supported1135 # So we remove any non-defaults1136 values.pop("temperature", None)11371138 return values11391140 @model_validator(mode="after")1141 def _set_openai_chat_version(self) -> Self:1142 """Set package version in metadata.11431144 Note: Subclasses that inherit from `BaseChatOpenAI` (e.g.1145 `ChatDeepSeek`, `ChatXAI`) must use a **unique** validator name1146 (e.g. `_set_deepseek_version`) instead of overriding this one. Pydantic1147 replaces same-named `model_validator` methods rather than chaining them,1148 so reusing `_set_openai_chat_version` would silently drop the parent's1149 `langchain-openai` version entry.1150 """1151 self._add_version("langchain-openai", __version__)1152 return self11531154 @model_validator(mode="after")1155 def validate_environment(self) -> Self:1156 """Validate that api key and python package exists in environment."""1157 if self.n is not None and self.n < 1:1158 msg = "n must be at least 1."1159 raise ValueError(msg)1160 if self.n is not None and self.n > 1 and self.streaming:1161 msg = "n must be 1 when streaming."1162 raise ValueError(msg)11631164 # Check OPENAI_ORGANIZATION for backwards compatibility.1165 self.openai_organization = (1166 self.openai_organization1167 or os.getenv("OPENAI_ORG_ID")1168 or os.getenv("OPENAI_ORGANIZATION")1169 )1170 self.openai_api_base = self.openai_api_base or os.getenv("OPENAI_API_BASE")11711172 # Enable stream_usage by default if using default base URL and client1173 if (1174 all(1175 getattr(self, key, None) is None1176 for key in (1177 "stream_usage",1178 "openai_proxy",1179 "openai_api_base",1180 "base_url",1181 "client",1182 "root_client",1183 "async_client",1184 "root_async_client",1185 "http_client",1186 "http_async_client",1187 )1188 )1189 and "OPENAI_BASE_URL" not in os.environ1190 ):1191 self.stream_usage = True11921193 # Resolve API key from SecretStr or Callable1194 sync_api_key_value: str | Callable[[], str] | None = None1195 async_api_key_value: str | Callable[[], Awaitable[str]] | None = None11961197 if self.openai_api_key is not None:1198 # Because OpenAI and AsyncOpenAI clients support either sync or async1199 # callables for the API key, we need to resolve separate values here.1200 sync_api_key_value, async_api_key_value = _resolve_sync_and_async_api_keys(1201 self.openai_api_key1202 )12031204 client_params: dict = {1205 "organization": self.openai_organization,1206 "base_url": self.openai_api_base,1207 "timeout": self.request_timeout,1208 "default_headers": self.default_headers,1209 "default_query": self.default_query,1210 }1211 if self.max_retries is not None:1212 client_params["max_retries"] = self.max_retries12131214 if self.openai_proxy and (self.http_client or self.http_async_client):1215 openai_proxy = self.openai_proxy1216 http_client = self.http_client1217 http_async_client = self.http_async_client1218 msg = (1219 "Cannot specify 'openai_proxy' if one of "1220 "'http_client'/'http_async_client' is already specified. Received:\n"1221 f"{openai_proxy=}\n{http_client=}\n{http_async_client=}"1222 )1223 raise ValueError(msg)1224 if _should_bypass_socket_options_for_proxy_env(1225 http_socket_options=self.http_socket_options,1226 http_client=self.http_client,1227 http_async_client=self.http_async_client,1228 openai_proxy=self.openai_proxy,1229 ):1230 # Default-shape construction + proxy env var visible to httpx:1231 # skip the custom transport so httpx's env-proxy auto-detection1232 # still applies. Users who want kernel-level TCP tuning alongside1233 # an env proxy can opt in explicitly via `http_socket_options`.1234 resolved_socket_options: tuple[tuple[int, int, int], ...] = ()1235 _log_proxy_env_bypass_once()1236 else:1237 resolved_socket_options = _resolve_socket_options(self.http_socket_options)1238 _warn_if_proxy_env_shadowed(1239 resolved_socket_options, openai_proxy=self.openai_proxy1240 )1241 if not self.client:1242 if sync_api_key_value is None:1243 # No valid sync API key, leave client as None and raise informative1244 # error on invocation.1245 self.client = None1246 self.root_client = None1247 else:1248 if self.openai_proxy and not self.http_client:1249 self.http_client = _build_proxied_sync_httpx_client(1250 proxy=self.openai_proxy,1251 verify=global_ssl_context,1252 socket_options=resolved_socket_options,1253 )1254 sync_specific = {1255 "http_client": self.http_client1256 or _get_default_httpx_client(1257 self.openai_api_base,1258 self.request_timeout,1259 resolved_socket_options,1260 ),1261 "api_key": sync_api_key_value,1262 }1263 self.root_client = openai.OpenAI(**client_params, **sync_specific) # type: ignore[arg-type]1264 self.client = self.root_client.chat.completions1265 if not self.async_client:1266 if self.openai_proxy and not self.http_async_client:1267 self.http_async_client = _build_proxied_async_httpx_client(1268 proxy=self.openai_proxy,1269 verify=global_ssl_context,1270 socket_options=resolved_socket_options,1271 )1272 async_specific = {1273 "http_client": self.http_async_client1274 or _get_default_async_httpx_client(1275 self.openai_api_base,1276 self.request_timeout,1277 resolved_socket_options,1278 ),1279 "api_key": async_api_key_value,1280 }1281 self.root_async_client = openai.AsyncOpenAI(1282 **client_params,1283 **async_specific, # type: ignore[arg-type]1284 )1285 self.async_client = self.root_async_client.chat.completions1286 return self12871288 def _resolve_model_profile(self) -> ModelProfile | None:1289 return _get_default_model_profile(self.model_name) or None12901291 @property1292 def _default_params(self) -> dict[str, Any]:1293 """Get the default parameters for calling OpenAI API."""1294 exclude_if_none = {1295 "presence_penalty": self.presence_penalty,1296 "frequency_penalty": self.frequency_penalty,1297 "seed": self.seed,1298 "top_p": self.top_p,1299 "logprobs": self.logprobs,1300 "top_logprobs": self.top_logprobs,1301 "logit_bias": self.logit_bias,1302 "stop": self.stop or None, # Also exclude empty list for this1303 "max_tokens": self.max_tokens,1304 "extra_body": self.extra_body,1305 "n": self.n,1306 "temperature": self.temperature,1307 "reasoning_effort": self.reasoning_effort,1308 "reasoning": self.reasoning,1309 "verbosity": self.verbosity,1310 "context_management": self.context_management,1311 "include": self.include,1312 "service_tier": self.service_tier,1313 "truncation": self.truncation,1314 "store": self.store,1315 }13161317 return {1318 "model": self.model_name,1319 "stream": self.streaming,1320 **{k: v for k, v in exclude_if_none.items() if v is not None},1321 **self.model_kwargs,1322 }13231324 def _combine_llm_outputs(self, llm_outputs: list[dict | None]) -> dict:1325 overall_token_usage: dict = {}1326 system_fingerprint = None1327 for output in llm_outputs:1328 if output is None:1329 # Happens in streaming1330 continue1331 token_usage = output.get("token_usage")1332 if token_usage is not None:1333 for k, v in token_usage.items():1334 if v is None:1335 continue1336 if k in overall_token_usage:1337 overall_token_usage[k] = _update_token_usage(1338 overall_token_usage[k], v1339 )1340 else:1341 overall_token_usage[k] = v1342 if system_fingerprint is None:1343 system_fingerprint = output.get("system_fingerprint")1344 combined = {"token_usage": overall_token_usage, "model_name": self.model_name}1345 if system_fingerprint:1346 combined["system_fingerprint"] = system_fingerprint1347 return combined13481349 def _convert_chunk_to_generation_chunk(1350 self,1351 chunk: dict,1352 default_chunk_class: type,1353 base_generation_info: dict | None,1354 ) -> ChatGenerationChunk | None:1355 if chunk.get("type") == "content.delta": # From beta.chat.completions.stream1356 return None1357 token_usage = chunk.get("usage")1358 choices = (1359 chunk.get("choices", [])1360 # From beta.chat.completions.stream1361 or chunk.get("chunk", {}).get("choices", [])1362 )13631364 usage_metadata: UsageMetadata | None = (1365 _create_usage_metadata(token_usage, chunk.get("service_tier"))1366 if token_usage1367 else None1368 )1369 if len(choices) == 0:1370 # logprobs is implicitly None1371 generation_chunk = ChatGenerationChunk(1372 message=default_chunk_class(content="", usage_metadata=usage_metadata),1373 generation_info=base_generation_info,1374 )1375 # Keep content as "" (the default) rather than converting to [].1376 # Chat Completions content deltas are normalized to strings in1377 # _convert_delta_to_message_chunk. Starting with [] causes1378 # merge_content to silently drop string content (empty list is1379 # falsy, so no merge branch applies). The empty list also triggers1380 # the content_blocks isinstance(list) short-circuit, which would1381 # return [] and miss tool_call_chunks.1382 if self.output_version == "v1":1383 generation_chunk.message.response_metadata["output_version"] = "v1"13841385 return generation_chunk13861387 choice = choices[0]1388 if choice["delta"] is None:1389 return None13901391 message_chunk = _convert_delta_to_message_chunk(1392 choice["delta"], default_chunk_class1393 )1394 generation_info = {**base_generation_info} if base_generation_info else {}13951396 if finish_reason := choice.get("finish_reason"):1397 generation_info["finish_reason"] = finish_reason1398 if model_name := chunk.get("model"):1399 generation_info["model_name"] = model_name1400 if system_fingerprint := chunk.get("system_fingerprint"):1401 generation_info["system_fingerprint"] = system_fingerprint1402 if service_tier := chunk.get("service_tier"):1403 generation_info["service_tier"] = service_tier14041405 logprobs = choice.get("logprobs")1406 if logprobs:1407 generation_info["logprobs"] = logprobs14081409 if usage_metadata and isinstance(message_chunk, AIMessageChunk):1410 message_chunk.usage_metadata = usage_metadata14111412 message_chunk.response_metadata["model_provider"] = "openai"1413 # Propagate output_version so content_blocks can detect v1 mode.1414 if self.output_version == "v1":1415 message_chunk.response_metadata["output_version"] = "v1"1416 return ChatGenerationChunk(1417 message=message_chunk, generation_info=generation_info or None1418 )14191420 def _ensure_sync_client_available(self) -> None:1421 """Check that sync client is available, raise error if not."""1422 if self.client is None:1423 msg = (1424 "Sync client is not available. This happens when an async callable "1425 "was provided for the API key. Use async methods (ainvoke, astream) "1426 "instead, or provide a string or sync callable for the API key."1427 )1428 raise ValueError(msg)14291430 def _stream_responses(1431 self,1432 messages: list[BaseMessage],1433 stop: list[str] | None = None,1434 run_manager: CallbackManagerForLLMRun | None = None,1435 **kwargs: Any,1436 ) -> Iterator[ChatGenerationChunk]:1437 self._ensure_sync_client_available()1438 kwargs["stream"] = True1439 payload = self._get_request_payload(messages, stop=stop, **kwargs)1440 try:1441 if self.include_response_headers:1442 raw_context_manager = (1443 self.root_client.with_raw_response.responses.create(**payload)1444 )1445 context_manager = raw_context_manager.parse()1446 headers = {"headers": dict(raw_context_manager.headers)}1447 else:1448 context_manager = self.root_client.responses.create(**payload)1449 headers = {}1450 original_schema_obj = kwargs.get("response_format")14511452 with context_manager as response:1453 is_first_chunk = True1454 current_index = -11455 current_output_index = -11456 current_sub_index = -11457 has_reasoning = False1458 for chunk in response:1459 metadata = headers if is_first_chunk else {}1460 (1461 current_index,1462 current_output_index,1463 current_sub_index,1464 generation_chunk,1465 ) = _convert_responses_chunk_to_generation_chunk(1466 chunk,1467 current_index,1468 current_output_index,1469 current_sub_index,1470 schema=original_schema_obj,1471 metadata=metadata,1472 has_reasoning=has_reasoning,1473 output_version=self.output_version,1474 )1475 if generation_chunk:1476 if run_manager:1477 run_manager.on_llm_new_token(1478 generation_chunk.text, chunk=generation_chunk1479 )1480 is_first_chunk = False1481 if "reasoning" in generation_chunk.message.additional_kwargs:1482 has_reasoning = True1483 yield generation_chunk1484 except openai.BadRequestError as e:1485 _handle_openai_bad_request(e)1486 except openai.APIError as e:1487 _handle_openai_api_error(e)14881489 async def _astream_responses(1490 self,1491 messages: list[BaseMessage],1492 stop: list[str] | None = None,1493 run_manager: AsyncCallbackManagerForLLMRun | None = None,1494 **kwargs: Any,1495 ) -> AsyncIterator[ChatGenerationChunk]:1496 kwargs["stream"] = True1497 payload = self._get_request_payload(messages, stop=stop, **kwargs)1498 try:1499 if self.include_response_headers:1500 raw_context_manager = (1501 await self.root_async_client.with_raw_response.responses.create(1502 **payload1503 )1504 )1505 context_manager = raw_context_manager.parse()1506 headers = {"headers": dict(raw_context_manager.headers)}1507 else:1508 context_manager = await self.root_async_client.responses.create(1509 **payload1510 )1511 headers = {}1512 original_schema_obj = kwargs.get("response_format")15131514 async with context_manager as response:1515 is_first_chunk = True1516 current_index = -11517 current_output_index = -11518 current_sub_index = -11519 has_reasoning = False1520 async for chunk in _astream_with_chunk_timeout(1521 response,1522 self.stream_chunk_timeout,1523 model_name=self.model_name,1524 ):1525 metadata = headers if is_first_chunk else {}1526 (1527 current_index,1528 current_output_index,1529 current_sub_index,1530 generation_chunk,1531 ) = _convert_responses_chunk_to_generation_chunk(1532 chunk,1533 current_index,1534 current_output_index,1535 current_sub_index,1536 schema=original_schema_obj,1537 metadata=metadata,1538 has_reasoning=has_reasoning,1539 output_version=self.output_version,1540 )1541 if generation_chunk:1542 if run_manager:1543 await run_manager.on_llm_new_token(1544 generation_chunk.text, chunk=generation_chunk1545 )1546 is_first_chunk = False1547 if "reasoning" in generation_chunk.message.additional_kwargs:1548 has_reasoning = True1549 yield generation_chunk1550 except openai.BadRequestError as e:1551 _handle_openai_bad_request(e)1552 except openai.APIError as e:1553 _handle_openai_api_error(e)15541555 def _should_stream_usage(1556 self, stream_usage: bool | None = None, **kwargs: Any1557 ) -> bool:1558 """Determine whether to include usage metadata in streaming output.15591560 For backwards compatibility, we check for `stream_options` passed1561 explicitly to kwargs or in the `model_kwargs` and override `self.stream_usage`.1562 """1563 stream_usage_sources = [ # order of precedence1564 stream_usage,1565 kwargs.get("stream_options", {}).get("include_usage"),1566 self.model_kwargs.get("stream_options", {}).get("include_usage"),1567 self.stream_usage,1568 ]1569 for source in stream_usage_sources:1570 if isinstance(source, bool):1571 return source1572 return self.stream_usage or False15731574 def _stream(1575 self,1576 messages: list[BaseMessage],1577 stop: list[str] | None = None,1578 run_manager: CallbackManagerForLLMRun | None = None,1579 *,1580 stream_usage: bool | None = None,1581 **kwargs: Any,1582 ) -> Iterator[ChatGenerationChunk]:1583 self._ensure_sync_client_available()1584 kwargs["stream"] = True1585 stream_usage = self._should_stream_usage(stream_usage, **kwargs)1586 if stream_usage:1587 kwargs["stream_options"] = {"include_usage": stream_usage}1588 payload = self._get_request_payload(messages, stop=stop, **kwargs)1589 default_chunk_class: type[BaseMessageChunk] = AIMessageChunk1590 base_generation_info = {}15911592 try:1593 if "response_format" in payload:1594 if self.include_response_headers:1595 warnings.warn(1596 "Cannot currently include response headers when "1597 "response_format is specified."1598 )1599 payload.pop("stream")1600 response_stream = self.root_client.beta.chat.completions.stream(1601 **payload1602 )1603 context_manager = response_stream1604 else:1605 if self.include_response_headers:1606 raw_response = self.client.with_raw_response.create(**payload)1607 response = raw_response.parse()1608 base_generation_info = {"headers": dict(raw_response.headers)}1609 else:1610 response = self.client.create(**payload)1611 context_manager = response1612 with context_manager as response:1613 is_first_chunk = True1614 for chunk in response:1615 if not isinstance(chunk, dict):1616 chunk = chunk.model_dump()1617 generation_chunk = self._convert_chunk_to_generation_chunk(1618 chunk,1619 default_chunk_class,1620 base_generation_info if is_first_chunk else {},1621 )1622 if generation_chunk is None:1623 continue1624 default_chunk_class = generation_chunk.message.__class__1625 logprobs = (generation_chunk.generation_info or {}).get("logprobs")1626 if run_manager:1627 run_manager.on_llm_new_token(1628 generation_chunk.text,1629 chunk=generation_chunk,1630 logprobs=logprobs,1631 )1632 is_first_chunk = False1633 yield generation_chunk1634 except openai.BadRequestError as e:1635 _handle_openai_bad_request(e)1636 except openai.APIError as e:1637 _handle_openai_api_error(e)1638 if hasattr(response, "get_final_completion") and "response_format" in payload:1639 final_completion = response.get_final_completion()1640 generation_chunk = self._get_generation_chunk_from_completion(1641 final_completion1642 )1643 if run_manager:1644 run_manager.on_llm_new_token(1645 generation_chunk.text, chunk=generation_chunk1646 )1647 yield generation_chunk16481649 def _generate(1650 self,1651 messages: list[BaseMessage],1652 stop: list[str] | None = None,1653 run_manager: CallbackManagerForLLMRun | None = None,1654 **kwargs: Any,1655 ) -> ChatResult:1656 self._ensure_sync_client_available()1657 payload = self._get_request_payload(messages, stop=stop, **kwargs)1658 generation_info = None1659 raw_response = None1660 try:1661 if "response_format" in payload:1662 payload.pop("stream")1663 raw_response = (1664 self.root_client.chat.completions.with_raw_response.parse(**payload)1665 )1666 response = raw_response.parse()1667 elif self._use_responses_api(payload):1668 original_schema_obj = kwargs.get("response_format")1669 if original_schema_obj and _is_pydantic_class(original_schema_obj):1670 raw_response = self.root_client.responses.with_raw_response.parse(1671 **payload1672 )1673 else:1674 raw_response = self.root_client.responses.with_raw_response.create(1675 **payload1676 )1677 response = raw_response.parse()1678 if self.include_response_headers:1679 generation_info = {"headers": dict(raw_response.headers)}1680 return _construct_lc_result_from_responses_api(1681 response,1682 schema=original_schema_obj,1683 metadata=generation_info,1684 output_version=self.output_version,1685 )1686 else:1687 raw_response = self.client.with_raw_response.create(**payload)1688 response = raw_response.parse()1689 except openai.BadRequestError as e:1690 _handle_openai_bad_request(e)1691 except openai.APIError as e:1692 _handle_openai_api_error(e)1693 except Exception as e:1694 if raw_response is not None and hasattr(raw_response, "http_response"):1695 e.response = raw_response.http_response # type: ignore[attr-defined]1696 raise e1697 if (1698 self.include_response_headers1699 and raw_response is not None1700 and hasattr(raw_response, "headers")1701 ):1702 generation_info = {"headers": dict(raw_response.headers)}1703 return self._create_chat_result(response, generation_info)17041705 def _use_responses_api(self, payload: dict) -> bool:1706 if isinstance(self.use_responses_api, bool):1707 return self.use_responses_api1708 if (1709 self.output_version == "responses/v1"1710 or self.context_management is not None1711 or self.include is not None1712 or self.reasoning is not None1713 or self.truncation is not None1714 or self.use_previous_response_id1715 or _model_prefers_responses_api(self.model_name)1716 ):1717 return True1718 return _use_responses_api(payload)17191720 def _get_request_payload(1721 self,1722 input_: LanguageModelInput,1723 *,1724 stop: list[str] | None = None,1725 **kwargs: Any,1726 ) -> dict:1727 messages = self._convert_input(input_).to_messages()1728 if stop is not None:1729 kwargs["stop"] = stop17301731 payload = {**self._default_params, **kwargs}17321733 if self._use_responses_api(payload):1734 if self.use_previous_response_id:1735 last_messages, previous_response_id = _get_last_messages(messages)1736 payload_to_use = last_messages if previous_response_id else messages1737 if previous_response_id:1738 payload["previous_response_id"] = previous_response_id1739 payload = _construct_responses_api_payload(payload_to_use, payload)1740 else:1741 payload = _construct_responses_api_payload(messages, payload)1742 else:1743 payload["messages"] = [1744 _convert_message_to_dict(_convert_from_v1_to_chat_completions(m))1745 if isinstance(m, AIMessage)1746 else _convert_message_to_dict(m)1747 for m in messages1748 ]1749 return payload17501751 def _create_chat_result(1752 self,1753 response: dict | openai.BaseModel,1754 generation_info: dict | None = None,1755 ) -> ChatResult:1756 generations = []17571758 response_dict = (1759 response1760 if isinstance(response, dict)1761 # `parsed` may hold arbitrary Pydantic models from structured output.1762 # Exclude it from this dump and copy it from the typed response below.1763 else response.model_dump(1764 exclude={"choices": {"__all__": {"message": {"parsed"}}}}1765 )1766 )1767 # Sometimes the AI Model calling will get error, we should raise it (this is1768 # typically followed by a null value for `choices`, which we raise for1769 # separately below).1770 if response_dict.get("error"):1771 raise ValueError(response_dict.get("error"))17721773 # Raise informative error messages for non-OpenAI chat completions APIs1774 # that return malformed responses.1775 try:1776 choices = response_dict["choices"]1777 except KeyError as e:1778 msg = f"Response missing 'choices' key: {response_dict.keys()}"1779 raise KeyError(msg) from e17801781 if choices is None:1782 # Some OpenAI-compatible APIs (e.g., vLLM) may return null choices1783 # when the response format differs or an error occurs without1784 # populating the error field. Provide a more helpful error message.1785 msg = (1786 "Received response with null value for 'choices'. "1787 "This can happen when using OpenAI-compatible APIs (e.g., vLLM) "1788 "that return a response in an unexpected format. "1789 f"Full response keys: {list(response_dict.keys())}"1790 )1791 raise TypeError(msg)17921793 token_usage = response_dict.get("usage")1794 service_tier = response_dict.get("service_tier")17951796 for res in choices:1797 message = _convert_dict_to_message(res["message"])1798 if token_usage and isinstance(message, AIMessage):1799 message.usage_metadata = _create_usage_metadata(1800 token_usage, service_tier1801 )1802 generation_info = generation_info or {}1803 generation_info["finish_reason"] = (1804 res.get("finish_reason")1805 if res.get("finish_reason") is not None1806 else generation_info.get("finish_reason")1807 )1808 if "logprobs" in res:1809 generation_info["logprobs"] = res["logprobs"]1810 gen = ChatGeneration(message=message, generation_info=generation_info)1811 generations.append(gen)1812 llm_output = {1813 "token_usage": token_usage,1814 "model_provider": "openai",1815 "model_name": response_dict.get("model", self.model_name),1816 "system_fingerprint": response_dict.get("system_fingerprint", ""),1817 }1818 if "id" in response_dict:1819 llm_output["id"] = response_dict["id"]1820 if service_tier:1821 llm_output["service_tier"] = service_tier18221823 if isinstance(response, openai.BaseModel) and getattr(1824 response, "choices", None1825 ):1826 message = response.choices[0].message # type: ignore[attr-defined]1827 if hasattr(message, "parsed"):1828 generations[0].message.additional_kwargs["parsed"] = message.parsed1829 if hasattr(message, "refusal"):1830 generations[0].message.additional_kwargs["refusal"] = message.refusal18311832 return ChatResult(generations=generations, llm_output=llm_output)18331834 async def _astream(1835 self,1836 messages: list[BaseMessage],1837 stop: list[str] | None = None,1838 run_manager: AsyncCallbackManagerForLLMRun | None = None,1839 *,1840 stream_usage: bool | None = None,1841 **kwargs: Any,1842 ) -> AsyncIterator[ChatGenerationChunk]:1843 kwargs["stream"] = True1844 stream_usage = self._should_stream_usage(stream_usage, **kwargs)1845 if stream_usage:1846 kwargs["stream_options"] = {"include_usage": stream_usage}1847 payload = self._get_request_payload(messages, stop=stop, **kwargs)1848 default_chunk_class: type[BaseMessageChunk] = AIMessageChunk1849 base_generation_info = {}18501851 try:1852 if "response_format" in payload:1853 if self.include_response_headers:1854 warnings.warn(1855 "Cannot currently include response headers when "1856 "response_format is specified."1857 )1858 payload.pop("stream")1859 response_stream = self.root_async_client.beta.chat.completions.stream(1860 **payload1861 )1862 context_manager = response_stream1863 else:1864 if self.include_response_headers:1865 raw_response = await self.async_client.with_raw_response.create(1866 **payload1867 )1868 response = raw_response.parse()1869 base_generation_info = {"headers": dict(raw_response.headers)}1870 else:1871 response = await self.async_client.create(**payload)1872 context_manager = response1873 async with context_manager as response:1874 is_first_chunk = True1875 async for chunk in _astream_with_chunk_timeout(1876 response,1877 self.stream_chunk_timeout,1878 model_name=self.model_name,1879 ):1880 if not isinstance(chunk, dict):1881 chunk = chunk.model_dump()1882 generation_chunk = self._convert_chunk_to_generation_chunk(1883 chunk,1884 default_chunk_class,1885 base_generation_info if is_first_chunk else {},1886 )1887 if generation_chunk is None:1888 continue1889 default_chunk_class = generation_chunk.message.__class__1890 logprobs = (generation_chunk.generation_info or {}).get("logprobs")1891 if run_manager:1892 await run_manager.on_llm_new_token(1893 generation_chunk.text,1894 chunk=generation_chunk,1895 logprobs=logprobs,1896 )1897 is_first_chunk = False1898 yield generation_chunk1899 except openai.BadRequestError as e:1900 _handle_openai_bad_request(e)1901 except openai.APIError as e:1902 _handle_openai_api_error(e)1903 if hasattr(response, "get_final_completion") and "response_format" in payload:1904 final_completion = await response.get_final_completion()1905 generation_chunk = self._get_generation_chunk_from_completion(1906 final_completion1907 )1908 if run_manager:1909 await run_manager.on_llm_new_token(1910 generation_chunk.text, chunk=generation_chunk1911 )1912 yield generation_chunk19131914 async def _agenerate(1915 self,1916 messages: list[BaseMessage],1917 stop: list[str] | None = None,1918 run_manager: AsyncCallbackManagerForLLMRun | None = None,1919 **kwargs: Any,1920 ) -> ChatResult:1921 payload = self._get_request_payload(messages, stop=stop, **kwargs)1922 generation_info = None1923 raw_response = None1924 try:1925 if "response_format" in payload:1926 payload.pop("stream")1927 raw_response = await self.root_async_client.chat.completions.with_raw_response.parse( # noqa: E5011928 **payload1929 )1930 response = raw_response.parse()1931 elif self._use_responses_api(payload):1932 original_schema_obj = kwargs.get("response_format")1933 if original_schema_obj and _is_pydantic_class(original_schema_obj):1934 raw_response = (1935 await self.root_async_client.responses.with_raw_response.parse(1936 **payload1937 )1938 )1939 else:1940 raw_response = (1941 await self.root_async_client.responses.with_raw_response.create(1942 **payload1943 )1944 )1945 response = raw_response.parse()1946 if self.include_response_headers:1947 generation_info = {"headers": dict(raw_response.headers)}1948 return _construct_lc_result_from_responses_api(1949 response,1950 schema=original_schema_obj,1951 metadata=generation_info,1952 output_version=self.output_version,1953 )1954 else:1955 raw_response = await self.async_client.with_raw_response.create(1956 **payload1957 )1958 response = raw_response.parse()1959 except openai.BadRequestError as e:1960 _handle_openai_bad_request(e)1961 except openai.APIError as e:1962 _handle_openai_api_error(e)1963 except Exception as e:1964 if raw_response is not None and hasattr(raw_response, "http_response"):1965 e.response = raw_response.http_response # type: ignore[attr-defined]1966 raise e1967 if (1968 self.include_response_headers1969 and raw_response is not None1970 and hasattr(raw_response, "headers")1971 ):1972 generation_info = {"headers": dict(raw_response.headers)}1973 return await run_in_executor(1974 None, self._create_chat_result, response, generation_info1975 )19761977 @property1978 def _identifying_params(self) -> dict[str, Any]:1979 """Get the identifying parameters."""1980 return {"model_name": self.model_name, **self._default_params}19811982 def _get_invocation_params(1983 self, stop: list[str] | None = None, **kwargs: Any1984 ) -> dict[str, Any]:1985 """Get the parameters used to invoke the model."""1986 params = {1987 "model": self.model_name,1988 **super()._get_invocation_params(stop=stop),1989 **self._default_params,1990 **kwargs,1991 }1992 # Redact headers from built-in remote MCP tool invocations1993 if (tools := params.get("tools")) and isinstance(tools, list):1994 params["tools"] = [1995 ({**tool, "headers": "**REDACTED**"} if "headers" in tool else tool)1996 if isinstance(tool, dict) and tool.get("type") == "mcp"1997 else tool1998 for tool in tools1999 ]
Findings
✓ No findings reported for this file.