1"""OpenAI chat wrapper.23!!! warning "API scope"45 `ChatOpenAI` targets6 [official OpenAI API specifications](https://github.com/openai/openai-openapi)7 only. Non-standard response fields added by third-party providers (e.g.,8 `reasoning_content`, `reasoning_details`) are **not** extracted or9 preserved. If you are pointing `base_url` at a provider such as10 OpenRouter, vLLM, or DeepSeek, use the corresponding provider-specific11 LangChain package instead (e.g., `ChatDeepSeek`, `ChatOpenRouter`).12"""1314from __future__ import annotations1516import base6417import json18import logging19import os20import re21import ssl22import sys23import warnings24from collections.abc import (25 AsyncIterator,26 Awaitable,27 Callable,28 Iterator,29 Mapping,30 Sequence,31)32from functools import partial33from io import BytesIO34from json import JSONDecodeError35from math import ceil36from operator import itemgetter37from typing import (38 TYPE_CHECKING,39 Any,40 Literal,41 TypeAlias,42 TypeVar,43 cast,44)45from urllib.parse import urlparse4647import certifi48import openai49import tiktoken50from langchain_core.callbacks import (51 AsyncCallbackManagerForLLMRun,52 CallbackManagerForLLMRun,53)54from langchain_core.exceptions import ContextOverflowError55from langchain_core.language_models import (56 LanguageModelInput,57 ModelProfileRegistry,58)59from langchain_core.language_models.chat_models import (60 BaseChatModel,61 LangSmithParams,62)63from langchain_core.messages import (64 AIMessage,65 AIMessageChunk,66 BaseMessage,67 BaseMessageChunk,68 ChatMessage,69 ChatMessageChunk,70 FunctionMessage,71 FunctionMessageChunk,72 HumanMessage,73 HumanMessageChunk,74 InvalidToolCall,75 SystemMessage,76 SystemMessageChunk,77 ToolCall,78 ToolMessage,79 ToolMessageChunk,80 is_data_content_block,81)82from langchain_core.messages import content as types83from langchain_core.messages.ai import (84 InputTokenDetails,85 OutputTokenDetails,86 UsageMetadata,87)88from langchain_core.messages.block_translators.openai import (89 _convert_from_v03_ai_message,90 convert_to_openai_data_block,91)92from langchain_core.messages.tool import tool_call_chunk93from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser94from langchain_core.output_parsers.openai_tools import (95 JsonOutputKeyToolsParser,96 PydanticToolsParser,97 make_invalid_tool_call,98 parse_tool_call,99)100from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult101from langchain_core.runnables import (102 Runnable,103 RunnableLambda,104 RunnableMap,105 RunnablePassthrough,106)107from langchain_core.runnables.config import run_in_executor108from langchain_core.tools import BaseTool109from langchain_core.tools.base import _stringify110from langchain_core.utils import get_pydantic_field_names111from langchain_core.utils.function_calling import (112 convert_to_openai_function,113 convert_to_openai_tool,114)115from langchain_core.utils.pydantic import (116 PydanticBaseModel,117 TypeBaseModel,118 is_basemodel_subclass,119)120from langchain_core.utils.utils import _build_model_kwargs, from_env, secret_from_env121from pydantic import (122 BaseModel,123 ConfigDict,124 Field,125 SecretStr,126 ValidationError,127 field_validator,128 model_validator,129)130from pydantic.v1 import BaseModel as BaseModelV1131from typing_extensions import Self132133from langchain_openai.chat_models._client_utils import (134 _astream_with_chunk_timeout,135 _build_proxied_async_httpx_client,136 _build_proxied_sync_httpx_client,137 _float_env,138 _get_default_async_httpx_client,139 _get_default_httpx_client,140 _log_proxy_env_bypass_once,141 _resolve_socket_options,142 _resolve_sync_and_async_api_keys,143 _should_bypass_socket_options_for_proxy_env,144 _warn_if_proxy_env_shadowed,145)146from langchain_openai.chat_models._compat import (147 _convert_from_v1_to_chat_completions,148 _convert_from_v1_to_responses,149 _convert_to_v03_ai_message,150)151from langchain_openai.data._profiles import _PROFILES152153if TYPE_CHECKING:154 import httpx155 from langchain_core.language_models import ModelProfile156 from openai.types.responses import Response157158logger = logging.getLogger(__name__)159160# This SSL context is equivalent to the default `verify=True`.161# https://www.python-httpx.org/advanced/ssl/#configuring-client-instances162global_ssl_context = ssl.create_default_context(cafile=certifi.where())163164_ssrf_client: httpx.Client | None = None165166167def _get_ssrf_safe_client() -> httpx.Client:168 global _ssrf_client169 if _ssrf_client is None:170 from langchain_core._security._transport import ssrf_safe_client171172 _ssrf_client = ssrf_safe_client(173 verify=global_ssl_context, follow_redirects=False174 )175 return _ssrf_client176177178_MODEL_PROFILES = cast(ModelProfileRegistry, _PROFILES)179180181def _get_default_model_profile(model_name: str) -> ModelProfile:182 default = _MODEL_PROFILES.get(model_name) or {}183 return default.copy()184185186WellKnownTools = (187 "file_search",188 "web_search_preview",189 "web_search",190 "computer_use_preview",191 "code_interpreter",192 "mcp",193 "image_generation",194 "tool_search",195)196197198def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:199 """Convert a dictionary to a LangChain message.200201 Args:202 _dict: The dictionary.203204 Returns:205 The LangChain message.206 """207 role = _dict.get("role")208 name = _dict.get("name")209 id_ = _dict.get("id")210 if role == "user":211 return HumanMessage(content=_dict.get("content", ""), id=id_, name=name)212 if role == "assistant":213 # Fix for azure214 # Also OpenAI returns None for tool invocations215 content = _dict.get("content", "") or ""216 additional_kwargs: dict = {}217 if function_call := _dict.get("function_call"):218 additional_kwargs["function_call"] = dict(function_call)219 tool_calls = []220 invalid_tool_calls = []221 if raw_tool_calls := _dict.get("tool_calls"):222 for raw_tool_call in raw_tool_calls:223 try:224 tool_calls.append(parse_tool_call(raw_tool_call, return_id=True))225 except Exception as e:226 invalid_tool_calls.append(227 make_invalid_tool_call(raw_tool_call, str(e))228 )229 if audio := _dict.get("audio"):230 additional_kwargs["audio"] = audio231 return AIMessage(232 content=content,233 additional_kwargs=additional_kwargs,234 name=name,235 id=id_,236 tool_calls=tool_calls,237 invalid_tool_calls=invalid_tool_calls,238 )239 if role in ("system", "developer"):240 additional_kwargs = {"__openai_role__": role} if role == "developer" else {}241 return SystemMessage(242 content=_dict.get("content", ""),243 name=name,244 id=id_,245 additional_kwargs=additional_kwargs,246 )247 if role == "function":248 return FunctionMessage(249 content=_dict.get("content", ""), name=cast(str, _dict.get("name")), id=id_250 )251 if role == "tool":252 additional_kwargs = {}253 if "name" in _dict:254 additional_kwargs["name"] = _dict["name"]255 return ToolMessage(256 content=_dict.get("content", ""),257 tool_call_id=cast(str, _dict.get("tool_call_id")),258 additional_kwargs=additional_kwargs,259 name=name,260 id=id_,261 )262 return ChatMessage(content=_dict.get("content", ""), role=role, id=id_) # type: ignore[arg-type]263264265def _sanitize_chat_completions_content(content: str | list[dict]) -> str | list[dict]:266 """Sanitize content for chat/completions API.267268 For list content, filters text blocks to only keep 'type' and 'text' keys.269 """270 if isinstance(content, list):271 sanitized = []272 for block in content:273 if (274 isinstance(block, dict)275 and block.get("type") == "text"276 and "text" in block277 ):278 sanitized.append({"type": "text", "text": block["text"]})279 else:280 sanitized.append(block)281 return sanitized282 return content283284285def _format_message_content(286 content: Any,287 api: Literal["chat/completions", "responses"] = "chat/completions",288 role: str | None = None,289) -> Any:290 """Format message content."""291 if content and isinstance(content, list):292 formatted_content = []293 for block in content:294 # Remove unexpected block types295 if (296 isinstance(block, dict)297 and "type" in block298 and (299 block["type"] in ("tool_use", "thinking", "reasoning_content")300 or (301 block["type"] in ("function_call", "code_interpreter_call")302 and api == "chat/completions"303 )304 )305 ):306 continue307 if (308 isinstance(block, dict)309 and is_data_content_block(block)310 # Responses API messages handled separately in _compat (parsed into311 # image generation calls)312 and not (api == "responses" and str(role).lower().startswith("ai"))313 ):314 formatted_content.append(convert_to_openai_data_block(block, api=api))315 # Anthropic image blocks316 elif (317 isinstance(block, dict)318 and block.get("type") == "image"319 and (source := block.get("source"))320 and isinstance(source, dict)321 ):322 if source.get("type") == "base64" and (323 (media_type := source.get("media_type"))324 and (data := source.get("data"))325 ):326 formatted_content.append(327 {328 "type": "image_url",329 "image_url": {"url": f"data:{media_type};base64,{data}"},330 }331 )332 elif source.get("type") == "url" and (url := source.get("url")):333 formatted_content.append(334 {"type": "image_url", "image_url": {"url": url}}335 )336 else:337 continue338 else:339 formatted_content.append(block)340 else:341 formatted_content = content342343 return formatted_content344345346def _convert_message_to_dict(347 message: BaseMessage,348 api: Literal["chat/completions", "responses"] = "chat/completions",349) -> dict:350 """Convert a LangChain message to dictionary format expected by OpenAI."""351 message_dict: dict[str, Any] = {352 "content": _format_message_content(message.content, api=api, role=message.type)353 }354 if (name := message.name or message.additional_kwargs.get("name")) is not None:355 message_dict["name"] = name356357 # populate role and additional message data358 if isinstance(message, ChatMessage):359 message_dict["role"] = message.role360 elif isinstance(message, HumanMessage):361 message_dict["role"] = "user"362 elif isinstance(message, AIMessage):363 message_dict["role"] = "assistant"364 if message.tool_calls or message.invalid_tool_calls:365 message_dict["tool_calls"] = [366 _lc_tool_call_to_openai_tool_call(tc) for tc in message.tool_calls367 ] + [368 _lc_invalid_tool_call_to_openai_tool_call(tc)369 for tc in message.invalid_tool_calls370 ]371 elif "tool_calls" in message.additional_kwargs:372 message_dict["tool_calls"] = message.additional_kwargs["tool_calls"]373 tool_call_supported_props = {"id", "type", "function"}374 message_dict["tool_calls"] = [375 {k: v for k, v in tool_call.items() if k in tool_call_supported_props}376 for tool_call in message_dict["tool_calls"]377 ]378 elif "function_call" in message.additional_kwargs:379 # OpenAI raises 400 if both function_call and tool_calls are present in the380 # same message.381 message_dict["function_call"] = message.additional_kwargs["function_call"]382 else:383 pass384 # If tool calls present, content null value should be None not empty string.385 if "function_call" in message_dict or "tool_calls" in message_dict:386 message_dict["content"] = message_dict["content"] or None387388 audio: dict[str, Any] | None = None389 for block in message.content:390 if (391 isinstance(block, dict)392 and block.get("type") == "audio"393 and (id_ := block.get("id"))394 and api != "responses"395 ):396 # openai doesn't support passing the data back - only the id397 # https://platform.openai.com/docs/guides/audio/multi-turn-conversations398 audio = {"id": id_}399 if not audio and "audio" in message.additional_kwargs:400 raw_audio = message.additional_kwargs["audio"]401 audio = (402 {"id": message.additional_kwargs["audio"]["id"]}403 if "id" in raw_audio404 else raw_audio405 )406 if audio:407 message_dict["audio"] = audio408 elif isinstance(message, SystemMessage):409 message_dict["role"] = message.additional_kwargs.get(410 "__openai_role__", "system"411 )412 elif isinstance(message, FunctionMessage):413 message_dict["role"] = "function"414 elif isinstance(message, ToolMessage):415 message_dict["role"] = "tool"416 message_dict["tool_call_id"] = message.tool_call_id417 message_dict["content"] = _sanitize_chat_completions_content(418 message_dict["content"]419 )420 supported_props = {"content", "role", "tool_call_id"}421 message_dict = {k: v for k, v in message_dict.items() if k in supported_props}422 else:423 msg = f"Got unknown type {message}"424 raise TypeError(msg)425 return message_dict426427428def _convert_delta_to_message_chunk(429 _dict: Mapping[str, Any], default_class: type[BaseMessageChunk]430) -> BaseMessageChunk:431 """Convert to a LangChain message chunk."""432 id_ = _dict.get("id")433 role = cast(str, _dict.get("role"))434 content = cast(str, _dict.get("content") or "")435 additional_kwargs: dict = {}436 if _dict.get("function_call"):437 function_call = dict(_dict["function_call"])438 if "name" in function_call and function_call["name"] is None:439 function_call["name"] = ""440 additional_kwargs["function_call"] = function_call441 tool_call_chunks = []442 if raw_tool_calls := _dict.get("tool_calls"):443 try:444 tool_call_chunks = [445 tool_call_chunk(446 name=rtc["function"].get("name"),447 args=rtc["function"].get("arguments"),448 id=rtc.get("id"),449 index=rtc["index"],450 )451 for rtc in raw_tool_calls452 ]453 except KeyError:454 pass455456 if role == "user" or default_class == HumanMessageChunk:457 return HumanMessageChunk(content=content, id=id_)458 if role == "assistant" or default_class == AIMessageChunk:459 return AIMessageChunk(460 content=content,461 additional_kwargs=additional_kwargs,462 id=id_,463 tool_call_chunks=tool_call_chunks, # type: ignore[arg-type]464 )465 if role in ("system", "developer") or default_class == SystemMessageChunk:466 if role == "developer":467 additional_kwargs = {"__openai_role__": "developer"}468 else:469 additional_kwargs = {}470 return SystemMessageChunk(471 content=content, id=id_, additional_kwargs=additional_kwargs472 )473 if role == "function" or default_class == FunctionMessageChunk:474 return FunctionMessageChunk(content=content, name=_dict["name"], id=id_)475 if role == "tool" or default_class == ToolMessageChunk:476 return ToolMessageChunk(477 content=content, tool_call_id=_dict["tool_call_id"], id=id_478 )479 if role or default_class == ChatMessageChunk:480 return ChatMessageChunk(content=content, role=role, id=id_)481 return default_class(content=content, id=id_) # type: ignore[call-arg]482483484def _update_token_usage(485 overall_token_usage: int | dict, new_usage: int | dict486) -> int | dict:487 # Token usage is either ints or dictionaries488 # `reasoning_tokens` is nested inside `completion_tokens_details`489 if isinstance(new_usage, int):490 if not isinstance(overall_token_usage, int):491 msg = (492 f"Got different types for token usage: "493 f"{type(new_usage)} and {type(overall_token_usage)}"494 )495 raise ValueError(msg)496 return new_usage + overall_token_usage497 if isinstance(new_usage, dict):498 if not isinstance(overall_token_usage, dict):499 msg = (500 f"Got different types for token usage: "501 f"{type(new_usage)} and {type(overall_token_usage)}"502 )503 raise ValueError(msg)504 return {505 k: _update_token_usage(overall_token_usage.get(k, 0), v)506 for k, v in new_usage.items()507 }508 warnings.warn(f"Unexpected type for token usage: {type(new_usage)}")509 return new_usage510511512class OpenAIContextOverflowError(openai.BadRequestError, ContextOverflowError):513 """BadRequestError raised when input exceeds OpenAI's context limit."""514515516class OpenAIAPIContextOverflowError(openai.APIError, ContextOverflowError):517 """APIError raised when input exceeds OpenAI's context limit."""518519520def _handle_openai_bad_request(e: openai.BadRequestError) -> None:521 if (522 "context_length_exceeded" in str(e)523 or "Input tokens exceed the configured limit" in e.message524 ):525 raise OpenAIContextOverflowError(526 message=e.message, response=e.response, body=e.body527 ) from e528 if (529 "'response_format' of type 'json_schema' is not supported with this model"530 ) in e.message:531 message = (532 "This model does not support OpenAI's structured output feature, which "533 "is the default method for `with_structured_output` as of "534 "langchain-openai==0.3. To use `with_structured_output` with this model, "535 'specify `method="function_calling"`.'536 )537 warnings.warn(message)538 raise e539 if "Invalid schema for response_format" in e.message:540 message = (541 "Invalid schema for OpenAI's structured output feature, which is the "542 "default method for `with_structured_output` as of langchain-openai==0.3. "543 'Specify `method="function_calling"` instead or update your schema. '544 "See supported schemas: "545 "https://platform.openai.com/docs/guides/structured-outputs#supported-schemas"546 )547 warnings.warn(message)548 raise e549 raise550551552def _handle_openai_api_error(e: openai.APIError) -> None:553 error_message = str(e)554 if "exceeds the context window" in error_message:555 raise OpenAIAPIContextOverflowError(556 message=e.message, request=e.request, body=e.body557 ) from e558 raise559560561_RESPONSES_API_ONLY_PREFIXES = (562 "gpt-5-pro",563 "gpt-5.2-pro",564 "gpt-5.4-pro",565 "gpt-5.5-pro",566)567568569def _model_prefers_responses_api(model_name: str | None) -> bool:570 if not model_name:571 return False572 return model_name.startswith(_RESPONSES_API_ONLY_PREFIXES) or "codex" in model_name573574575_BM = TypeVar("_BM", bound=BaseModel)576_DictOrPydanticClass: TypeAlias = dict[str, Any] | type[_BM] | type577_DictOrPydantic: TypeAlias = dict | _BM578579580class BaseChatOpenAI(BaseChatModel):581 """Base wrapper around OpenAI large language models for chat.582583 This base class targets584 [official OpenAI API specifications](https://github.com/openai/openai-openapi)585 only. Non-standard response fields added by third-party providers (e.g.,586 `reasoning_content`) are not extracted. Use a provider-specific subclass for587 full provider support.588 """589590 client: Any = Field(default=None, exclude=True)591592 async_client: Any = Field(default=None, exclude=True)593594 root_client: Any = Field(default=None, exclude=True)595596 root_async_client: Any = Field(default=None, exclude=True)597598 model_name: str = Field(default="gpt-3.5-turbo", alias="model")599 """Model name to use."""600601 temperature: float | None = None602 """What sampling temperature to use."""603604 model_kwargs: dict[str, Any] = Field(default_factory=dict)605 """Holds any model parameters valid for `create` call not explicitly specified."""606607 openai_api_key: (608 SecretStr | None | Callable[[], str] | Callable[[], Awaitable[str]]609 ) = Field(610 alias="api_key", default_factory=secret_from_env("OPENAI_API_KEY", default=None)611 )612 """API key to use.613614 Can be inferred from the `OPENAI_API_KEY` environment variable, or specified615 as a string, or sync or async callable that returns a string.616617 ??? example "Specify with environment variable"618619 ```bash620 export OPENAI_API_KEY=...621 ```622 ```python623 from langchain_openai import ChatOpenAI624625 model = ChatOpenAI(model="gpt-5-nano")626 ```627628 ??? example "Specify with a string"629630 ```python631 from langchain_openai import ChatOpenAI632633 model = ChatOpenAI(model="gpt-5-nano", api_key="...")634 ```635636 ??? example "Specify with a sync callable"637638 ```python639 from langchain_openai import ChatOpenAI640641 def get_api_key() -> str:642 # Custom logic to retrieve API key643 return "..."644645 model = ChatOpenAI(model="gpt-5-nano", api_key=get_api_key)646 ```647648 ??? example "Specify with an async callable"649650 ```python651 from langchain_openai import ChatOpenAI652653 async def get_api_key() -> str:654 # Custom async logic to retrieve API key655 return "..."656657 model = ChatOpenAI(model="gpt-5-nano", api_key=get_api_key)658 ```659 """660661 openai_api_base: str | None = Field(default=None, alias="base_url")662 """Base URL path for API requests, leave blank if not using a proxy or service emulator.""" # noqa: E501663664 openai_organization: str | None = Field(default=None, alias="organization")665 """Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""666667 # to support explicit proxy for OpenAI668 openai_proxy: str | None = Field(669 default_factory=from_env("OPENAI_PROXY", default=None)670 )671672 request_timeout: float | tuple[float, float] | Any | None = Field(673 default=None, alias="timeout"674 )675 """Timeout for requests to OpenAI completion API.676677 Can be float, `httpx.Timeout` or `None`.678 """679680 stream_usage: bool | None = None681 """Whether to include usage metadata in streaming output.682683 If enabled, an additional message chunk will be generated during the stream684 including usage metadata.685686 This parameter is enabled unless `openai_api_base` is set or the model is687 initialized with a custom client, as many chat completions APIs do not688 support streaming token usage.689690 !!! version-added "Added in `langchain-openai` 0.3.9"691692 !!! warning "Behavior changed in `langchain-openai` 0.3.35"693694 Enabled for default base URL and client.695 """696697 max_retries: int | None = None698 """Maximum number of retries to make when generating."""699700 presence_penalty: float | None = None701 """Penalizes repeated tokens."""702703 frequency_penalty: float | None = None704 """Penalizes repeated tokens according to frequency."""705706 seed: int | None = None707 """Seed for generation"""708709 logprobs: bool | None = None710 """Whether to return logprobs."""711712 top_logprobs: int | None = None713 """Number of most likely tokens to return at each token position, each with an714 associated log probability.715716 `logprobs` must be set to true if this parameter is used.717 """718719 logit_bias: dict[int, int] | None = None720 """Modify the likelihood of specified tokens appearing in the completion."""721722 streaming: bool = False723 """Whether to stream the results or not."""724725 n: int | None = None726 """Number of chat completions to generate for each prompt."""727728 top_p: float | None = None729 """Total probability mass of tokens to consider at each step."""730731 max_tokens: int | None = Field(default=None)732 """Maximum number of tokens to generate."""733734 reasoning_effort: str | None = None735 """Constrains effort on reasoning for reasoning models.736737 For use with the Chat Completions API. Reasoning models only.738739 Currently supported values are `'minimal'`, `'low'`, `'medium'`, and740 `'high'`. Reducing reasoning effort can result in faster responses and fewer741 tokens used on reasoning in a response.742 """743744 reasoning: dict[str, Any] | None = None745 """Reasoning parameters for reasoning models. None disables reasoning.746747 For use with the Responses API.748749 ```python750 reasoning={751 "effort": None, # Default None; can be "low", "medium", or "high"752 "summary": "auto", # Can be "auto", "concise", or "detailed"753 }754 ```755756 !!! version-added "Added in `langchain-openai` 0.3.24"757 """758759 verbosity: str | None = None760 """Controls the verbosity level of responses for reasoning models.761762 For use with the Responses API.763764 Currently supported values are `'low'`, `'medium'`, and `'high'`.765766 !!! version-added "Added in `langchain-openai` 0.3.28"767 """768769 tiktoken_model_name: str | None = None770 """The model name to pass to tiktoken when using this class.771772 Tiktoken is used to count the number of tokens in documents to constrain773 them to be under a certain limit.774775 By default, when set to `None`, this will be the same as the embedding model name.776 However, there are some cases where you may want to use this `Embedding` class with777 a model name not supported by tiktoken. This can include when using Azure embeddings778 or when using one of the many model providers that expose an OpenAI-like779 API but with different models. In those cases, in order to avoid erroring780 when tiktoken is called, you can specify a model name to use here.781 """782783 default_headers: Mapping[str, str] | None = None784785 default_query: Mapping[str, object] | None = None786787 # Configure a custom httpx client. See the788 # [httpx documentation](https://www.python-httpx.org/api/#client) for more details.789 http_client: Any | None = Field(default=None, exclude=True)790 """Optional `httpx.Client`.791792 Only used for sync invocations. Must specify `http_async_client` as well if793 you'd like a custom client for async invocations.794 """795796 http_async_client: Any | None = Field(default=None, exclude=True)797 """Optional `httpx.AsyncClient`.798799 Only used for async invocations. Must specify `http_client` as well if you'd800 like a custom client for sync invocations.801 """802803 http_socket_options: Sequence[tuple[int, int, int]] | None = Field(804 default=None, exclude=True805 )806 """TCP socket options applied to the httpx transports built by this instance.807808 Defaults to a conservative TCP-keepalive + `TCP_USER_TIMEOUT` profile that809 targets a ~2-minute bound on silent connection hangs (silent mid-stream peer810 loss, gVisor/NAT idle timeouts, silent TCP black holes) on platforms that811 support the full option set. On platforms that only support a subset812 (macOS without `TCP_USER_TIMEOUT`, Windows with only `SO_KEEPALIVE`,813 minimal kernels), unsupported options are silently dropped and the bound814 degrades to whatever the remaining options + OS defaults provide — still815 better than indefinite hang.816817 Accepted values:818819 - `None` (default): use env-driven defaults. Matches the "unset" convention820 used by `http_client` elsewhere on this class.821 - `()` (empty): disable socket-option injection entirely. Inherits the OS822 defaults and restores httpx's native env-proxy auto-detection.823 - A non-empty sequence of `(level, option, value)` tuples: explicit824 override; passed verbatim to the transport (not filtered). Unsupported825 options raise `OSError` at connect time rather than being silently826 dropped — the user chose them explicitly.827828 Environment variables (only consulted when this field is `None`):829 `LANGCHAIN_OPENAI_TCP_KEEPALIVE` (set to `0` to disable entirely — the830 kill-switch), `LANGCHAIN_OPENAI_TCP_KEEPIDLE`,831 `LANGCHAIN_OPENAI_TCP_KEEPINTVL`, `LANGCHAIN_OPENAI_TCP_KEEPCNT`,832 `LANGCHAIN_OPENAI_TCP_USER_TIMEOUT_MS`.833834 Applied per side: if `http_client` is supplied, the sync path uses835 that user-owned client's socket options as-is; the async path still836 gets `http_socket_options` applied to its default builder (and837 vice-versa for `http_async_client`). Supply both to take full control.838839 !!! note "Interaction with env-proxy auto-detection"840841 When a custom `httpx` transport is active, `httpx` disables its842 native env-proxy auto-detection (`HTTP_PROXY` / `HTTPS_PROXY` /843 `ALL_PROXY` / `NO_PROXY` and macOS/Windows system proxy settings).844845 To keep the default shape safe, `ChatOpenAI` detects the846 "proxy-env-shadow" pattern and **skips the custom transport847 entirely** when **all** of the following hold:848849 - `http_socket_options` is left at its default (`None`)850 - No `http_client` or `http_async_client` supplied851 - No `openai_proxy` supplied852 - A proxy env var or system proxy is visible to httpx853854 On that specific shape, the instance falls back to pre-PR behavior855 and httpx's env-proxy auto-detection applies (a one-time `INFO` log856 records the bypass for observability).857858 If you explicitly set `http_socket_options=[...]` while a proxy859 env var is also set, no bypass — you opted into the transport, and860 a one-time `WARNING` records the shadowing. Set861 `http_socket_options=()` or `LANGCHAIN_OPENAI_TCP_KEEPALIVE=0` to862 disable transport injection explicitly, or pass a fully-configured863 `http_async_client` / `http_client` to take full control. The864 `openai_proxy` constructor kwarg is unaffected — socket options865 are applied cleanly through the proxied transport on that path.866 """867868 stream_chunk_timeout: float | None = Field(869 default_factory=lambda: _float_env(870 "LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S", 120.0871 ),872 exclude=True,873 )874 """Per-chunk wall-clock timeout (seconds) on async streaming responses.875876 Applies to async invocations only (`astream`, `ainvoke` with streaming,877 etc.). Sync streaming (`stream`) is not affected.878879 Fires between content chunks yielded by the openai SDK's streaming iterator880 (i.e., each call to `__anext__` on the response). Crucially, this is881 **not** the same as httpx's `timeout.read`:882883 - httpx's read timeout is inter-byte and gets reset every time *any* bytes884 arrive on the socket — including OpenAI's SSE keepalive comments885 (`: keepalive`) that trickle down during long model generations. A886 stream that's silent on *content* but still producing keepalives looks887 alive forever to httpx.888 - `stream_chunk_timeout` measures the gap between *parsed chunks*. The889 openai SDK's SSE parser consumes keepalive comments internally and does890 not emit them as chunks, so keepalives do *not* reset this timer. It891 fires on genuine content silence.892893 When it fires, a `StreamChunkTimeoutError`894 (subclass of `asyncio.TimeoutError`) is raised with a self-describing895 message naming this knob, the env-var override, the model, and the896 number of chunks received before the stall. A WARNING log with897 `extra={"source": "stream_chunk_timeout", "timeout_s": <value>,898 "model_name": <value>, "chunks_received": <value>}` also fires so899 aggregate logging can distinguish app-layer timeouts from900 transport-layer failures.901902 Defaults to 120s. Set to `None` or `0` to disable. Overridable via the903 `LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S` env var. Negative values904 (from either the env var or the constructor kwarg — e.g., hydrated905 from YAML/JSON configs) fall back to the default with a `WARNING` log906 rather than silently disabling the wrapper, so a misconfigured value907 still boots safely and the fallback is visible.908 """909910 stop: list[str] | str | None = Field(default=None, alias="stop_sequences")911 """Default stop sequences."""912913 extra_body: Mapping[str, Any] | None = None914 """Optional additional JSON properties to include in the request parameters915 when making requests to OpenAI compatible APIs, such as vLLM, LM Studio, or916 other providers.917918 This is the recommended way to pass custom parameters that are specific to your919 OpenAI-compatible API provider but not part of the standard OpenAI API.920921 Examples:922 - [LM Studio](https://lmstudio.ai/) TTL parameter: `extra_body={"ttl": 300}`923 - [vLLM](https://github.com/vllm-project/vllm) custom parameters:924 `extra_body={"use_beam_search": True}`925 - Any other provider-specific parameters926927 !!! warning928929 Do not use `model_kwargs` for custom parameters that are not part of the930 standard OpenAI API, as this will cause errors when making API calls. Use931 `extra_body` instead.932 """933934 include_response_headers: bool = False935 """Whether to include response headers in the output message `response_metadata`."""936937 disabled_params: dict[str, Any] | None = Field(default=None)938 """Parameters of the OpenAI client or `chat.completions` endpoint that should be939 disabled for the given model.940941 Should be specified as `{"param": None | ['val1', 'val2']}` where the key is the942 parameter and the value is either None, meaning that parameter should never be943 used, or it's a list of disabled values for the parameter.944945 For example, older models may not support the `'parallel_tool_calls'` parameter at946 all, in which case `disabled_params={"parallel_tool_calls": None}` can be passed947 in.948949 If a parameter is disabled then it will not be used by default in any methods, e.g.950 in `with_structured_output`. However this does not prevent a user from directly951 passed in the parameter during invocation.952 """953954 context_management: list[dict[str, Any]] | None = None955 """Configuration for956 [context management](https://developers.openai.com/api/docs/guides/compaction).957 """958959 include: list[str] | None = None960 """Additional fields to include in generations from Responses API.961962 Supported values:963964 - `'file_search_call.results'`965 - `'message.input_image.image_url'`966 - `'computer_call_output.output.image_url'`967 - `'reasoning.encrypted_content'`968 - `'code_interpreter_call.outputs'`969970 !!! version-added "Added in `langchain-openai` 0.3.24"971 """972973 service_tier: str | None = None974 """Latency tier for request.975976 Options are `'auto'`, `'default'`, or `'flex'`.977978 Relevant for users of OpenAI's scale tier service.979 """980981 store: bool | None = None982 """If `True`, OpenAI may store response data for future use.983984 Defaults to `True` for the Responses API and `False` for the Chat Completions API.985986 !!! version-added "Added in `langchain-openai` 0.3.24"987 """988989 truncation: str | None = None990 """Truncation strategy (Responses API).991992 Can be `'auto'` or `'disabled'` (default).993994 If `'auto'`, model may drop input items from the middle of the message sequence to995 fit the context window.996997 !!! version-added "Added in `langchain-openai` 0.3.24"998 """9991000 use_previous_response_id: bool = False1001 """If `True`, always pass `previous_response_id` using the ID of the most recent1002 response. Responses API only.10031004 Input messages up to the most recent response will be dropped from request1005 payloads.10061007 For example, the following two are equivalent:10081009 ```python1010 model = ChatOpenAI(1011 model="...",1012 use_previous_response_id=True,1013 )1014 model.invoke(1015 [1016 HumanMessage("Hello"),1017 AIMessage("Hi there!", response_metadata={"id": "resp_123"}),1018 HumanMessage("How are you?"),1019 ]1020 )1021 ```10221023 ```python1024 model = ChatOpenAI(model="...", use_responses_api=True)1025 model.invoke([HumanMessage("How are you?")], previous_response_id="resp_123")1026 ```10271028 !!! version-added "Added in `langchain-openai` 0.3.26"1029 """10301031 use_responses_api: bool | None = None1032 """Whether to use the Responses API instead of the Chat API.10331034 If not specified then will be inferred based on invocation params.10351036 !!! version-added "Added in `langchain-openai` 0.3.9"1037 """10381039 output_version: str | None = Field(1040 default_factory=from_env("LC_OUTPUT_VERSION", default=None)1041 )1042 """Version of `AIMessage` output format to use.10431044 This field is used to roll-out new output formats for chat model `AIMessage`1045 responses in a backwards-compatible way.10461047 Supported values:10481049 - `'v0'`: `AIMessage` format as of `langchain-openai 0.3.x`.1050 - `'responses/v1'`: Formats Responses API output items into AIMessage content blocks1051 (Responses API only)1052 - `'v1'`: v1 of LangChain cross-provider standard.10531054 !!! warning "Behavior changed in `langchain-openai` 1.0.0"10551056 Default updated to `"responses/v1"`.1057 """10581059 model_config = ConfigDict(populate_by_name=True)10601061 @property1062 def model(self) -> str:1063 """Same as model_name."""1064 return self.model_name10651066 @model_validator(mode="before")1067 @classmethod1068 def build_extra(cls, values: dict[str, Any]) -> Any:1069 """Build extra kwargs from additional params that were passed in."""1070 all_required_field_names = get_pydantic_field_names(cls)1071 return _build_model_kwargs(values, all_required_field_names)10721073 @field_validator("stream_chunk_timeout", mode="after")1074 @classmethod1075 def _validate_stream_chunk_timeout(cls, value: float | None) -> float | None:1076 """Reject negative constructor values; fall back to the env-driven default.10771078 Matches the env-var path in `_float_env`: a negative value is a typo,1079 not an opt-out (`None`/`0` are the documented off switches). Configs1080 hydrated from YAML/JSON would otherwise silently disable the wrapper1081 and reintroduce the indefinite-stream hang the feature prevents.1082 """1083 if value is not None and value < 0:1084 fallback = _float_env("LANGCHAIN_OPENAI_STREAM_CHUNK_TIMEOUT_S", 120.0)1085 logger.warning(1086 "Invalid `stream_chunk_timeout=%r` (negative); "1087 "falling back to %s. Pass `None` or `0` to disable.",1088 value,1089 fallback,1090 )1091 return fallback1092 return value10931094 @model_validator(mode="before")1095 @classmethod1096 def validate_temperature(cls, values: dict[str, Any]) -> Any:1097 """Validate temperature parameter for different models.10981099 - gpt-5 models (excluding gpt-5-chat) only allow `temperature=1` or unset1100 (Defaults to 1)1101 """1102 model = values.get("model_name") or values.get("model") or ""1103 model_lower = model.lower()11041105 # For o1 models, set temperature=1 if not provided1106 if model_lower.startswith("o1") and "temperature" not in values:1107 values["temperature"] = 111081109 # For gpt-5 models, handle temperature restrictions. Temperature is supported1110 # by gpt-5-chat and gpt-5 models with reasoning_effort='none' or1111 # reasoning={'effort': 'none'}.1112 if (1113 model_lower.startswith("gpt-5")1114 and ("chat" not in model_lower)1115 and values.get("reasoning_effort") != "none"1116 and (values.get("reasoning") or {}).get("effort") != "none"1117 ):1118 temperature = values.get("temperature")1119 if temperature is not None and temperature != 1:1120 # For gpt-5 (non-chat), only temperature=1 is supported1121 # So we remove any non-defaults1122 values.pop("temperature", None)11231124 return values11251126 @model_validator(mode="after")1127 def validate_environment(self) -> Self:1128 """Validate that api key and python package exists in environment."""1129 if self.n is not None and self.n < 1:1130 msg = "n must be at least 1."1131 raise ValueError(msg)1132 if self.n is not None and self.n > 1 and self.streaming:1133 msg = "n must be 1 when streaming."1134 raise ValueError(msg)11351136 # Check OPENAI_ORGANIZATION for backwards compatibility.1137 self.openai_organization = (1138 self.openai_organization1139 or os.getenv("OPENAI_ORG_ID")1140 or os.getenv("OPENAI_ORGANIZATION")1141 )1142 self.openai_api_base = self.openai_api_base or os.getenv("OPENAI_API_BASE")11431144 # Enable stream_usage by default if using default base URL and client1145 if (1146 all(1147 getattr(self, key, None) is None1148 for key in (1149 "stream_usage",1150 "openai_proxy",1151 "openai_api_base",1152 "base_url",1153 "client",1154 "root_client",1155 "async_client",1156 "root_async_client",1157 "http_client",1158 "http_async_client",1159 )1160 )1161 and "OPENAI_BASE_URL" not in os.environ1162 ):1163 self.stream_usage = True11641165 # Resolve API key from SecretStr or Callable1166 sync_api_key_value: str | Callable[[], str] | None = None1167 async_api_key_value: str | Callable[[], Awaitable[str]] | None = None11681169 if self.openai_api_key is not None:1170 # Because OpenAI and AsyncOpenAI clients support either sync or async1171 # callables for the API key, we need to resolve separate values here.1172 sync_api_key_value, async_api_key_value = _resolve_sync_and_async_api_keys(1173 self.openai_api_key1174 )11751176 client_params: dict = {1177 "organization": self.openai_organization,1178 "base_url": self.openai_api_base,1179 "timeout": self.request_timeout,1180 "default_headers": self.default_headers,1181 "default_query": self.default_query,1182 }1183 if self.max_retries is not None:1184 client_params["max_retries"] = self.max_retries11851186 if self.openai_proxy and (self.http_client or self.http_async_client):1187 openai_proxy = self.openai_proxy1188 http_client = self.http_client1189 http_async_client = self.http_async_client1190 msg = (1191 "Cannot specify 'openai_proxy' if one of "1192 "'http_client'/'http_async_client' is already specified. Received:\n"1193 f"{openai_proxy=}\n{http_client=}\n{http_async_client=}"1194 )1195 raise ValueError(msg)1196 if _should_bypass_socket_options_for_proxy_env(1197 http_socket_options=self.http_socket_options,1198 http_client=self.http_client,1199 http_async_client=self.http_async_client,1200 openai_proxy=self.openai_proxy,1201 ):1202 # Default-shape construction + proxy env var visible to httpx:1203 # skip the custom transport so httpx's env-proxy auto-detection1204 # still applies. Users who want kernel-level TCP tuning alongside1205 # an env proxy can opt in explicitly via `http_socket_options`.1206 resolved_socket_options: tuple[tuple[int, int, int], ...] = ()1207 _log_proxy_env_bypass_once()1208 else:1209 resolved_socket_options = _resolve_socket_options(self.http_socket_options)1210 _warn_if_proxy_env_shadowed(1211 resolved_socket_options, openai_proxy=self.openai_proxy1212 )1213 if not self.client:1214 if sync_api_key_value is None:1215 # No valid sync API key, leave client as None and raise informative1216 # error on invocation.1217 self.client = None1218 self.root_client = None1219 else:1220 if self.openai_proxy and not self.http_client:1221 self.http_client = _build_proxied_sync_httpx_client(1222 proxy=self.openai_proxy,1223 verify=global_ssl_context,1224 socket_options=resolved_socket_options,1225 )1226 sync_specific = {1227 "http_client": self.http_client1228 or _get_default_httpx_client(1229 self.openai_api_base,1230 self.request_timeout,1231 resolved_socket_options,1232 ),1233 "api_key": sync_api_key_value,1234 }1235 self.root_client = openai.OpenAI(**client_params, **sync_specific) # type: ignore[arg-type]1236 self.client = self.root_client.chat.completions1237 if not self.async_client:1238 if self.openai_proxy and not self.http_async_client:1239 self.http_async_client = _build_proxied_async_httpx_client(1240 proxy=self.openai_proxy,1241 verify=global_ssl_context,1242 socket_options=resolved_socket_options,1243 )1244 async_specific = {1245 "http_client": self.http_async_client1246 or _get_default_async_httpx_client(1247 self.openai_api_base,1248 self.request_timeout,1249 resolved_socket_options,1250 ),1251 "api_key": async_api_key_value,1252 }1253 self.root_async_client = openai.AsyncOpenAI(1254 **client_params,1255 **async_specific, # type: ignore[arg-type]1256 )1257 self.async_client = self.root_async_client.chat.completions1258 return self12591260 def _resolve_model_profile(self) -> ModelProfile | None:1261 return _get_default_model_profile(self.model_name) or None12621263 @property1264 def _default_params(self) -> dict[str, Any]:1265 """Get the default parameters for calling OpenAI API."""1266 exclude_if_none = {1267 "presence_penalty": self.presence_penalty,1268 "frequency_penalty": self.frequency_penalty,1269 "seed": self.seed,1270 "top_p": self.top_p,1271 "logprobs": self.logprobs,1272 "top_logprobs": self.top_logprobs,1273 "logit_bias": self.logit_bias,1274 "stop": self.stop or None, # Also exclude empty list for this1275 "max_tokens": self.max_tokens,1276 "extra_body": self.extra_body,1277 "n": self.n,1278 "temperature": self.temperature,1279 "reasoning_effort": self.reasoning_effort,1280 "reasoning": self.reasoning,1281 "verbosity": self.verbosity,1282 "context_management": self.context_management,1283 "include": self.include,1284 "service_tier": self.service_tier,1285 "truncation": self.truncation,1286 "store": self.store,1287 }12881289 return {1290 "model": self.model_name,1291 "stream": self.streaming,1292 **{k: v for k, v in exclude_if_none.items() if v is not None},1293 **self.model_kwargs,1294 }12951296 def _combine_llm_outputs(self, llm_outputs: list[dict | None]) -> dict:1297 overall_token_usage: dict = {}1298 system_fingerprint = None1299 for output in llm_outputs:1300 if output is None:1301 # Happens in streaming1302 continue1303 token_usage = output.get("token_usage")1304 if token_usage is not None:1305 for k, v in token_usage.items():1306 if v is None:1307 continue1308 if k in overall_token_usage:1309 overall_token_usage[k] = _update_token_usage(1310 overall_token_usage[k], v1311 )1312 else:1313 overall_token_usage[k] = v1314 if system_fingerprint is None:1315 system_fingerprint = output.get("system_fingerprint")1316 combined = {"token_usage": overall_token_usage, "model_name": self.model_name}1317 if system_fingerprint:1318 combined["system_fingerprint"] = system_fingerprint1319 return combined13201321 def _convert_chunk_to_generation_chunk(1322 self,1323 chunk: dict,1324 default_chunk_class: type,1325 base_generation_info: dict | None,1326 ) -> ChatGenerationChunk | None:1327 if chunk.get("type") == "content.delta": # From beta.chat.completions.stream1328 return None1329 token_usage = chunk.get("usage")1330 choices = (1331 chunk.get("choices", [])1332 # From beta.chat.completions.stream1333 or chunk.get("chunk", {}).get("choices", [])1334 )13351336 usage_metadata: UsageMetadata | None = (1337 _create_usage_metadata(token_usage, chunk.get("service_tier"))1338 if token_usage1339 else None1340 )1341 if len(choices) == 0:1342 # logprobs is implicitly None1343 generation_chunk = ChatGenerationChunk(1344 message=default_chunk_class(content="", usage_metadata=usage_metadata),1345 generation_info=base_generation_info,1346 )1347 if self.output_version == "v1":1348 generation_chunk.message.content = []1349 generation_chunk.message.response_metadata["output_version"] = "v1"13501351 return generation_chunk13521353 choice = choices[0]1354 if choice["delta"] is None:1355 return None13561357 message_chunk = _convert_delta_to_message_chunk(1358 choice["delta"], default_chunk_class1359 )1360 generation_info = {**base_generation_info} if base_generation_info else {}13611362 if finish_reason := choice.get("finish_reason"):1363 generation_info["finish_reason"] = finish_reason1364 if model_name := chunk.get("model"):1365 generation_info["model_name"] = model_name1366 if system_fingerprint := chunk.get("system_fingerprint"):1367 generation_info["system_fingerprint"] = system_fingerprint1368 if service_tier := chunk.get("service_tier"):1369 generation_info["service_tier"] = service_tier13701371 logprobs = choice.get("logprobs")1372 if logprobs:1373 generation_info["logprobs"] = logprobs13741375 if usage_metadata and isinstance(message_chunk, AIMessageChunk):1376 message_chunk.usage_metadata = usage_metadata13771378 message_chunk.response_metadata["model_provider"] = "openai"1379 return ChatGenerationChunk(1380 message=message_chunk, generation_info=generation_info or None1381 )13821383 def _ensure_sync_client_available(self) -> None:1384 """Check that sync client is available, raise error if not."""1385 if self.client is None:1386 msg = (1387 "Sync client is not available. This happens when an async callable "1388 "was provided for the API key. Use async methods (ainvoke, astream) "1389 "instead, or provide a string or sync callable for the API key."1390 )1391 raise ValueError(msg)13921393 def _stream_responses(1394 self,1395 messages: list[BaseMessage],1396 stop: list[str] | None = None,1397 run_manager: CallbackManagerForLLMRun | None = None,1398 **kwargs: Any,1399 ) -> Iterator[ChatGenerationChunk]:1400 self._ensure_sync_client_available()1401 kwargs["stream"] = True1402 payload = self._get_request_payload(messages, stop=stop, **kwargs)1403 try:1404 if self.include_response_headers:1405 raw_context_manager = (1406 self.root_client.with_raw_response.responses.create(**payload)1407 )1408 context_manager = raw_context_manager.parse()1409 headers = {"headers": dict(raw_context_manager.headers)}1410 else:1411 context_manager = self.root_client.responses.create(**payload)1412 headers = {}1413 original_schema_obj = kwargs.get("response_format")14141415 with context_manager as response:1416 is_first_chunk = True1417 current_index = -11418 current_output_index = -11419 current_sub_index = -11420 has_reasoning = False1421 for chunk in response:1422 metadata = headers if is_first_chunk else {}1423 (1424 current_index,1425 current_output_index,1426 current_sub_index,1427 generation_chunk,1428 ) = _convert_responses_chunk_to_generation_chunk(1429 chunk,1430 current_index,1431 current_output_index,1432 current_sub_index,1433 schema=original_schema_obj,1434 metadata=metadata,1435 has_reasoning=has_reasoning,1436 output_version=self.output_version,1437 )1438 if generation_chunk:1439 if run_manager:1440 run_manager.on_llm_new_token(1441 generation_chunk.text, chunk=generation_chunk1442 )1443 is_first_chunk = False1444 if "reasoning" in generation_chunk.message.additional_kwargs:1445 has_reasoning = True1446 yield generation_chunk1447 except openai.BadRequestError as e:1448 _handle_openai_bad_request(e)1449 except openai.APIError as e:1450 _handle_openai_api_error(e)14511452 async def _astream_responses(1453 self,1454 messages: list[BaseMessage],1455 stop: list[str] | None = None,1456 run_manager: AsyncCallbackManagerForLLMRun | None = None,1457 **kwargs: Any,1458 ) -> AsyncIterator[ChatGenerationChunk]:1459 kwargs["stream"] = True1460 payload = self._get_request_payload(messages, stop=stop, **kwargs)1461 try:1462 if self.include_response_headers:1463 raw_context_manager = (1464 await self.root_async_client.with_raw_response.responses.create(1465 **payload1466 )1467 )1468 context_manager = raw_context_manager.parse()1469 headers = {"headers": dict(raw_context_manager.headers)}1470 else:1471 context_manager = await self.root_async_client.responses.create(1472 **payload1473 )1474 headers = {}1475 original_schema_obj = kwargs.get("response_format")14761477 async with context_manager as response:1478 is_first_chunk = True1479 current_index = -11480 current_output_index = -11481 current_sub_index = -11482 has_reasoning = False1483 async for chunk in _astream_with_chunk_timeout(1484 response,1485 self.stream_chunk_timeout,1486 model_name=self.model_name,1487 ):1488 metadata = headers if is_first_chunk else {}1489 (1490 current_index,1491 current_output_index,1492 current_sub_index,1493 generation_chunk,1494 ) = _convert_responses_chunk_to_generation_chunk(1495 chunk,1496 current_index,1497 current_output_index,1498 current_sub_index,1499 schema=original_schema_obj,1500 metadata=metadata,1501 has_reasoning=has_reasoning,1502 output_version=self.output_version,1503 )1504 if generation_chunk:1505 if run_manager:1506 await run_manager.on_llm_new_token(1507 generation_chunk.text, chunk=generation_chunk1508 )1509 is_first_chunk = False1510 if "reasoning" in generation_chunk.message.additional_kwargs:1511 has_reasoning = True1512 yield generation_chunk1513 except openai.BadRequestError as e:1514 _handle_openai_bad_request(e)1515 except openai.APIError as e:1516 _handle_openai_api_error(e)15171518 def _should_stream_usage(1519 self, stream_usage: bool | None = None, **kwargs: Any1520 ) -> bool:1521 """Determine whether to include usage metadata in streaming output.15221523 For backwards compatibility, we check for `stream_options` passed1524 explicitly to kwargs or in the `model_kwargs` and override `self.stream_usage`.1525 """1526 stream_usage_sources = [ # order of precedence1527 stream_usage,1528 kwargs.get("stream_options", {}).get("include_usage"),1529 self.model_kwargs.get("stream_options", {}).get("include_usage"),1530 self.stream_usage,1531 ]1532 for source in stream_usage_sources:1533 if isinstance(source, bool):1534 return source1535 return self.stream_usage or False15361537 def _stream(1538 self,1539 messages: list[BaseMessage],1540 stop: list[str] | None = None,1541 run_manager: CallbackManagerForLLMRun | None = None,1542 *,1543 stream_usage: bool | None = None,1544 **kwargs: Any,1545 ) -> Iterator[ChatGenerationChunk]:1546 self._ensure_sync_client_available()1547 kwargs["stream"] = True1548 stream_usage = self._should_stream_usage(stream_usage, **kwargs)1549 if stream_usage:1550 kwargs["stream_options"] = {"include_usage": stream_usage}1551 payload = self._get_request_payload(messages, stop=stop, **kwargs)1552 default_chunk_class: type[BaseMessageChunk] = AIMessageChunk1553 base_generation_info = {}15541555 try:1556 if "response_format" in payload:1557 if self.include_response_headers:1558 warnings.warn(1559 "Cannot currently include response headers when "1560 "response_format is specified."1561 )1562 payload.pop("stream")1563 response_stream = self.root_client.beta.chat.completions.stream(1564 **payload1565 )1566 context_manager = response_stream1567 else:1568 if self.include_response_headers:1569 raw_response = self.client.with_raw_response.create(**payload)1570 response = raw_response.parse()1571 base_generation_info = {"headers": dict(raw_response.headers)}1572 else:1573 response = self.client.create(**payload)1574 context_manager = response1575 with context_manager as response:1576 is_first_chunk = True1577 for chunk in response:1578 if not isinstance(chunk, dict):1579 chunk = chunk.model_dump()1580 generation_chunk = self._convert_chunk_to_generation_chunk(1581 chunk,1582 default_chunk_class,1583 base_generation_info if is_first_chunk else {},1584 )1585 if generation_chunk is None:1586 continue1587 default_chunk_class = generation_chunk.message.__class__1588 logprobs = (generation_chunk.generation_info or {}).get("logprobs")1589 if run_manager:1590 run_manager.on_llm_new_token(1591 generation_chunk.text,1592 chunk=generation_chunk,1593 logprobs=logprobs,1594 )1595 is_first_chunk = False1596 yield generation_chunk1597 except openai.BadRequestError as e:1598 _handle_openai_bad_request(e)1599 except openai.APIError as e:1600 _handle_openai_api_error(e)1601 if hasattr(response, "get_final_completion") and "response_format" in payload:1602 final_completion = response.get_final_completion()1603 generation_chunk = self._get_generation_chunk_from_completion(1604 final_completion1605 )1606 if run_manager:1607 run_manager.on_llm_new_token(1608 generation_chunk.text, chunk=generation_chunk1609 )1610 yield generation_chunk16111612 def _generate(1613 self,1614 messages: list[BaseMessage],1615 stop: list[str] | None = None,1616 run_manager: CallbackManagerForLLMRun | None = None,1617 **kwargs: Any,1618 ) -> ChatResult:1619 self._ensure_sync_client_available()1620 payload = self._get_request_payload(messages, stop=stop, **kwargs)1621 generation_info = None1622 raw_response = None1623 try:1624 if "response_format" in payload:1625 payload.pop("stream")1626 raw_response = (1627 self.root_client.chat.completions.with_raw_response.parse(**payload)1628 )1629 response = raw_response.parse()1630 elif self._use_responses_api(payload):1631 original_schema_obj = kwargs.get("response_format")1632 if original_schema_obj and _is_pydantic_class(original_schema_obj):1633 raw_response = self.root_client.responses.with_raw_response.parse(1634 **payload1635 )1636 else:1637 raw_response = self.root_client.responses.with_raw_response.create(1638 **payload1639 )1640 response = raw_response.parse()1641 if self.include_response_headers:1642 generation_info = {"headers": dict(raw_response.headers)}1643 return _construct_lc_result_from_responses_api(1644 response,1645 schema=original_schema_obj,1646 metadata=generation_info,1647 output_version=self.output_version,1648 )1649 else:1650 raw_response = self.client.with_raw_response.create(**payload)1651 response = raw_response.parse()1652 except openai.BadRequestError as e:1653 _handle_openai_bad_request(e)1654 except openai.APIError as e:1655 _handle_openai_api_error(e)1656 except Exception as e:1657 if raw_response is not None and hasattr(raw_response, "http_response"):1658 e.response = raw_response.http_response # type: ignore[attr-defined]1659 raise e1660 if (1661 self.include_response_headers1662 and raw_response is not None1663 and hasattr(raw_response, "headers")1664 ):1665 generation_info = {"headers": dict(raw_response.headers)}1666 return self._create_chat_result(response, generation_info)16671668 def _use_responses_api(self, payload: dict) -> bool:1669 if isinstance(self.use_responses_api, bool):1670 return self.use_responses_api1671 if (1672 self.output_version == "responses/v1"1673 or self.context_management is not None1674 or self.include is not None1675 or self.reasoning is not None1676 or self.truncation is not None1677 or self.use_previous_response_id1678 or _model_prefers_responses_api(self.model_name)1679 ):1680 return True1681 return _use_responses_api(payload)16821683 def _get_request_payload(1684 self,1685 input_: LanguageModelInput,1686 *,1687 stop: list[str] | None = None,1688 **kwargs: Any,1689 ) -> dict:1690 messages = self._convert_input(input_).to_messages()1691 if stop is not None:1692 kwargs["stop"] = stop16931694 payload = {**self._default_params, **kwargs}16951696 if self._use_responses_api(payload):1697 if self.use_previous_response_id:1698 last_messages, previous_response_id = _get_last_messages(messages)1699 payload_to_use = last_messages if previous_response_id else messages1700 if previous_response_id:1701 payload["previous_response_id"] = previous_response_id1702 payload = _construct_responses_api_payload(payload_to_use, payload)1703 else:1704 payload = _construct_responses_api_payload(messages, payload)1705 else:1706 payload["messages"] = [1707 _convert_message_to_dict(_convert_from_v1_to_chat_completions(m))1708 if isinstance(m, AIMessage)1709 else _convert_message_to_dict(m)1710 for m in messages1711 ]1712 return payload17131714 def _create_chat_result(1715 self,1716 response: dict | openai.BaseModel,1717 generation_info: dict | None = None,1718 ) -> ChatResult:1719 generations = []17201721 response_dict = (1722 response1723 if isinstance(response, dict)1724 # `parsed` may hold arbitrary Pydantic models from structured output.1725 # Exclude it from this dump and copy it from the typed response below.1726 else response.model_dump(1727 exclude={"choices": {"__all__": {"message": {"parsed"}}}}1728 )1729 )1730 # Sometimes the AI Model calling will get error, we should raise it (this is1731 # typically followed by a null value for `choices`, which we raise for1732 # separately below).1733 if response_dict.get("error"):1734 raise ValueError(response_dict.get("error"))17351736 # Raise informative error messages for non-OpenAI chat completions APIs1737 # that return malformed responses.1738 try:1739 choices = response_dict["choices"]1740 except KeyError as e:1741 msg = f"Response missing 'choices' key: {response_dict.keys()}"1742 raise KeyError(msg) from e17431744 if choices is None:1745 # Some OpenAI-compatible APIs (e.g., vLLM) may return null choices1746 # when the response format differs or an error occurs without1747 # populating the error field. Provide a more helpful error message.1748 msg = (1749 "Received response with null value for 'choices'. "1750 "This can happen when using OpenAI-compatible APIs (e.g., vLLM) "1751 "that return a response in an unexpected format. "1752 f"Full response keys: {list(response_dict.keys())}"1753 )1754 raise TypeError(msg)17551756 token_usage = response_dict.get("usage")1757 service_tier = response_dict.get("service_tier")17581759 for res in choices:1760 message = _convert_dict_to_message(res["message"])1761 if token_usage and isinstance(message, AIMessage):1762 message.usage_metadata = _create_usage_metadata(1763 token_usage, service_tier1764 )1765 generation_info = generation_info or {}1766 generation_info["finish_reason"] = (1767 res.get("finish_reason")1768 if res.get("finish_reason") is not None1769 else generation_info.get("finish_reason")1770 )1771 if "logprobs" in res:1772 generation_info["logprobs"] = res["logprobs"]1773 gen = ChatGeneration(message=message, generation_info=generation_info)1774 generations.append(gen)1775 llm_output = {1776 "token_usage": token_usage,1777 "model_provider": "openai",1778 "model_name": response_dict.get("model", self.model_name),1779 "system_fingerprint": response_dict.get("system_fingerprint", ""),1780 }1781 if "id" in response_dict:1782 llm_output["id"] = response_dict["id"]1783 if service_tier:1784 llm_output["service_tier"] = service_tier17851786 if isinstance(response, openai.BaseModel) and getattr(1787 response, "choices", None1788 ):1789 message = response.choices[0].message # type: ignore[attr-defined]1790 if hasattr(message, "parsed"):1791 generations[0].message.additional_kwargs["parsed"] = message.parsed1792 if hasattr(message, "refusal"):1793 generations[0].message.additional_kwargs["refusal"] = message.refusal17941795 return ChatResult(generations=generations, llm_output=llm_output)17961797 async def _astream(1798 self,1799 messages: list[BaseMessage],1800 stop: list[str] | None = None,1801 run_manager: AsyncCallbackManagerForLLMRun | None = None,1802 *,1803 stream_usage: bool | None = None,1804 **kwargs: Any,1805 ) -> AsyncIterator[ChatGenerationChunk]:1806 kwargs["stream"] = True1807 stream_usage = self._should_stream_usage(stream_usage, **kwargs)1808 if stream_usage:1809 kwargs["stream_options"] = {"include_usage": stream_usage}1810 payload = self._get_request_payload(messages, stop=stop, **kwargs)1811 default_chunk_class: type[BaseMessageChunk] = AIMessageChunk1812 base_generation_info = {}18131814 try:1815 if "response_format" in payload:1816 if self.include_response_headers:1817 warnings.warn(1818 "Cannot currently include response headers when "1819 "response_format is specified."1820 )1821 payload.pop("stream")1822 response_stream = self.root_async_client.beta.chat.completions.stream(1823 **payload1824 )1825 context_manager = response_stream1826 else:1827 if self.include_response_headers:1828 raw_response = await self.async_client.with_raw_response.create(1829 **payload1830 )1831 response = raw_response.parse()1832 base_generation_info = {"headers": dict(raw_response.headers)}1833 else:1834 response = await self.async_client.create(**payload)1835 context_manager = response1836 async with context_manager as response:1837 is_first_chunk = True1838 async for chunk in _astream_with_chunk_timeout(1839 response,1840 self.stream_chunk_timeout,1841 model_name=self.model_name,1842 ):1843 if not isinstance(chunk, dict):1844 chunk = chunk.model_dump()1845 generation_chunk = self._convert_chunk_to_generation_chunk(1846 chunk,1847 default_chunk_class,1848 base_generation_info if is_first_chunk else {},1849 )1850 if generation_chunk is None:1851 continue1852 default_chunk_class = generation_chunk.message.__class__1853 logprobs = (generation_chunk.generation_info or {}).get("logprobs")1854 if run_manager:1855 await run_manager.on_llm_new_token(1856 generation_chunk.text,1857 chunk=generation_chunk,1858 logprobs=logprobs,1859 )1860 is_first_chunk = False1861 yield generation_chunk1862 except openai.BadRequestError as e:1863 _handle_openai_bad_request(e)1864 except openai.APIError as e:1865 _handle_openai_api_error(e)1866 if hasattr(response, "get_final_completion") and "response_format" in payload:1867 final_completion = await response.get_final_completion()1868 generation_chunk = self._get_generation_chunk_from_completion(1869 final_completion1870 )1871 if run_manager:1872 await run_manager.on_llm_new_token(1873 generation_chunk.text, chunk=generation_chunk1874 )1875 yield generation_chunk18761877 async def _agenerate(1878 self,1879 messages: list[BaseMessage],1880 stop: list[str] | None = None,1881 run_manager: AsyncCallbackManagerForLLMRun | None = None,1882 **kwargs: Any,1883 ) -> ChatResult:1884 payload = self._get_request_payload(messages, stop=stop, **kwargs)1885 generation_info = None1886 raw_response = None1887 try:1888 if "response_format" in payload:1889 payload.pop("stream")1890 raw_response = await self.root_async_client.chat.completions.with_raw_response.parse( # noqa: E5011891 **payload1892 )1893 response = raw_response.parse()1894 elif self._use_responses_api(payload):1895 original_schema_obj = kwargs.get("response_format")1896 if original_schema_obj and _is_pydantic_class(original_schema_obj):1897 raw_response = (1898 await self.root_async_client.responses.with_raw_response.parse(1899 **payload1900 )1901 )1902 else:1903 raw_response = (1904 await self.root_async_client.responses.with_raw_response.create(1905 **payload1906 )1907 )1908 response = raw_response.parse()1909 if self.include_response_headers:1910 generation_info = {"headers": dict(raw_response.headers)}1911 return _construct_lc_result_from_responses_api(1912 response,1913 schema=original_schema_obj,1914 metadata=generation_info,1915 output_version=self.output_version,1916 )1917 else:1918 raw_response = await self.async_client.with_raw_response.create(1919 **payload1920 )1921 response = raw_response.parse()1922 except openai.BadRequestError as e:1923 _handle_openai_bad_request(e)1924 except openai.APIError as e:1925 _handle_openai_api_error(e)1926 except Exception as e:1927 if raw_response is not None and hasattr(raw_response, "http_response"):1928 e.response = raw_response.http_response # type: ignore[attr-defined]1929 raise e1930 if (1931 self.include_response_headers1932 and raw_response is not None1933 and hasattr(raw_response, "headers")1934 ):1935 generation_info = {"headers": dict(raw_response.headers)}1936 return await run_in_executor(1937 None, self._create_chat_result, response, generation_info1938 )19391940 @property1941 def _identifying_params(self) -> dict[str, Any]:1942 """Get the identifying parameters."""1943 return {"model_name": self.model_name, **self._default_params}19441945 def _get_invocation_params(1946 self, stop: list[str] | None = None, **kwargs: Any1947 ) -> dict[str, Any]:1948 """Get the parameters used to invoke the model."""1949 params = {1950 "model": self.model_name,1951 **super()._get_invocation_params(stop=stop),1952 **self._default_params,1953 **kwargs,1954 }1955 # Redact headers from built-in remote MCP tool invocations1956 if (tools := params.get("tools")) and isinstance(tools, list):1957 params["tools"] = [1958 ({**tool, "headers": "**REDACTED**"} if "headers" in tool else tool)1959 if isinstance(tool, dict) and tool.get("type") == "mcp"1960 else tool1961 for tool in tools1962 ]19631964 return params19651966 def _get_ls_params(1967 self, stop: list[str] | None = None, **kwargs: Any1968 ) -> LangSmithParams:1969 """Get standard params for tracing."""1970 params = self._get_invocation_params(stop=stop, **kwargs)1971 ls_params = LangSmithParams(1972 ls_provider="openai",1973 ls_model_name=params.get("model", self.model_name),1974 ls_model_type="chat",1975 ls_temperature=params.get("temperature", self.temperature),1976 )1977 if ls_max_tokens := params.get("max_tokens", self.max_tokens) or params.get(1978 "max_completion_tokens", self.max_tokens1979 ):1980 ls_params["ls_max_tokens"] = ls_max_tokens1981 if ls_stop := stop or params.get("stop", None):1982 ls_params["ls_stop"] = ls_stop1983 return ls_params19841985 @property1986 def _llm_type(self) -> str:1987 """Return type of chat model.19881989 Will always return `'openai-chat'` regardless of the specific model name.1990 """1991 return "openai-chat"19921993 def _get_encoding_model(self) -> tuple[str, tiktoken.Encoding]:1994 if self.tiktoken_model_name is not None:1995 model = self.tiktoken_model_name1996 else:1997 model = self.model_name19981999 try:2000 encoding = tiktoken.encoding_for_model(model)
Findings
✓ No findings reported for this file.