1"""Chat models for conversational AI."""23from __future__ import annotations45import asyncio6import builtins # noqa: TC0037import contextlib8import inspect9import json10from abc import ABC, abstractmethod11from collections.abc import AsyncIterator, Callable, Iterator, Sequence12from functools import cached_property13from operator import itemgetter14from typing import TYPE_CHECKING, Any, Literal, cast, overload1516from langchain_protocol.protocol import MessageFinishData17from pydantic import BaseModel, ConfigDict, Field, model_validator18from typing_extensions import Self, override1920from langchain_core._api import beta, deprecated, suppress_langchain_deprecation_warning21from langchain_core.caches import BaseCache22from langchain_core.callbacks import (23 AsyncCallbackManager,24 AsyncCallbackManagerForLLMRun,25 CallbackManager,26 CallbackManagerForLLMRun,27 Callbacks,28)29from langchain_core.globals import get_llm_cache30from langchain_core.language_models._compat_bridge import (31 achunks_to_events,32 amessage_to_events,33 chunks_to_events,34 message_to_events,35)36from langchain_core.language_models._utils import (37 _filter_invocation_params_for_tracing,38 _normalize_messages,39 _update_message_content_to_blocks,40)41from langchain_core.language_models.base import (42 BaseLanguageModel,43 LangSmithParams,44 LanguageModelInput,45)46from langchain_core.language_models.chat_model_stream import (47 AsyncChatModelStream,48 ChatModelStream,49)50from langchain_core.language_models.model_profile import (51 ModelProfile,52 _warn_unknown_profile_keys,53)54from langchain_core.load import dumpd, dumps55from langchain_core.messages import (56 AIMessage,57 AIMessageChunk,58 AnyMessage,59 BaseMessage,60 convert_to_messages,61 is_data_content_block,62 message_chunk_to_message,63)64from langchain_core.messages import content as types65from langchain_core.messages.block_translators.openai import (66 convert_to_openai_image_block,67)68from langchain_core.output_parsers.openai_tools import (69 JsonOutputKeyToolsParser,70 PydanticToolsParser,71)72from langchain_core.outputs import (73 ChatGeneration,74 ChatGenerationChunk,75 ChatResult,76 Generation,77 LLMResult,78 RunInfo,79)80from langchain_core.outputs.chat_generation import merge_chat_generation_chunks81from langchain_core.prompt_values import ChatPromptValue, PromptValue, StringPromptValue82from langchain_core.rate_limiters import BaseRateLimiter83from langchain_core.runnables import RunnableBinding, RunnableMap, RunnablePassthrough84from langchain_core.runnables.config import ensure_config, run_in_executor85from langchain_core.tracers._streaming import (86 _StreamingCallbackHandler,87 _V2StreamingCallbackHandler,88)89from langchain_core.utils.function_calling import (90 convert_to_json_schema,91 convert_to_openai_tool,92)93from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass94from langchain_core.utils.utils import LC_ID_PREFIX, from_env9596if TYPE_CHECKING:97 import uuid98 from collections.abc import Awaitable99100 from langchain_protocol.protocol import MessagesData101102 from langchain_core.output_parsers.base import OutputParserLike103 from langchain_core.runnables import Runnable, RunnableConfig104 from langchain_core.runnables.schema import StreamEvent105 from langchain_core.tools import BaseTool106107108def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:109 if hasattr(error, "response"):110 response = error.response111 metadata: dict = {}112 if hasattr(response, "json"):113 try:114 metadata["body"] = response.json()115 except Exception:116 try:117 metadata["body"] = getattr(response, "text", None)118 except Exception:119 metadata["body"] = None120 if hasattr(response, "headers"):121 try:122 metadata["headers"] = dict(response.headers)123 except Exception:124 metadata["headers"] = None125 if hasattr(response, "status_code"):126 metadata["status_code"] = response.status_code127 if hasattr(error, "request_id"):128 metadata["request_id"] = error.request_id129 generations = [130 ChatGeneration(message=AIMessage(content="", response_metadata=metadata))131 ]132 else:133 generations = []134135 return generations136137138def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:139 """Format messages for tracing in `on_chat_model_start`.140141 - Update image content blocks to OpenAI Chat Completions format (backward142 compatibility).143 - Add `type` key to content blocks that have a single key.144145 Args:146 messages: List of messages to format.147148 Returns:149 List of messages formatted for tracing.150151 """152 messages_to_trace = []153 for message in messages:154 message_to_trace = message155 if isinstance(message.content, list):156 for idx, block in enumerate(message.content):157 if isinstance(block, dict):158 # Update image content blocks to OpenAI # Chat Completions format.159 if (160 block.get("type") == "image"161 and is_data_content_block(block)162 and not ("file_id" in block or block.get("source_type") == "id")163 ):164 if message_to_trace is message:165 # Shallow copy166 message_to_trace = message.model_copy()167 message_to_trace.content = list(message_to_trace.content)168169 message_to_trace.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy170 convert_to_openai_image_block(block)171 )172 elif (173 block.get("type") == "file"174 and is_data_content_block(block) # v0 (image/audio/file) or v1175 and "base64" in block176 # Backward compat: convert v1 base64 blocks to v0177 ):178 if message_to_trace is message:179 # Shallow copy180 message_to_trace = message.model_copy()181 message_to_trace.content = list(message_to_trace.content)182183 message_to_trace.content[idx] = { # type: ignore[index]184 **{k: v for k, v in block.items() if k != "base64"},185 "data": block["base64"],186 "source_type": "base64",187 }188 elif len(block) == 1 and "type" not in block:189 # Tracing assumes all content blocks have a "type" key. Here190 # we add this key if it is missing, and there's an obvious191 # choice for the type (e.g., a single key in the block).192 if message_to_trace is message:193 # Shallow copy194 message_to_trace = message.model_copy()195 message_to_trace.content = list(message_to_trace.content)196 key = next(iter(block))197 message_to_trace.content[idx] = { # type: ignore[index]198 "type": key,199 key: block[key],200 }201 messages_to_trace.append(message_to_trace)202203 return messages_to_trace204205206def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult:207 """Generate from a stream.208209 Args:210 stream: Iterator of `ChatGenerationChunk`.211212 Raises:213 ValueError: If no generations are found in the stream.214215 Returns:216 Chat result.217218 """219 generation = next(stream, None)220 if generation:221 generation += list(stream)222 if generation is None:223 msg = "No generations found in stream."224 raise ValueError(msg)225 return ChatResult(226 generations=[227 ChatGeneration(228 message=message_chunk_to_message(generation.message),229 generation_info=generation.generation_info,230 )231 ]232 )233234235async def agenerate_from_stream(236 stream: AsyncIterator[ChatGenerationChunk],237) -> ChatResult:238 """Async generate from a stream.239240 Args:241 stream: AsyncIterator of `ChatGenerationChunk`.242243 Returns:244 Chat result.245246 """247 chunks = [chunk async for chunk in stream]248 return await run_in_executor(None, generate_from_stream, iter(chunks))249250251def _format_ls_structured_output(ls_structured_output_format: dict | None) -> dict:252 if ls_structured_output_format:253 try:254 ls_structured_output_format_dict = {255 "ls_structured_output_format": {256 "kwargs": ls_structured_output_format.get("kwargs", {}),257 "schema": convert_to_json_schema(258 ls_structured_output_format["schema"]259 ),260 }261 }262 except ValueError:263 ls_structured_output_format_dict = {}264 else:265 ls_structured_output_format_dict = {}266267 return ls_structured_output_format_dict268269270class BaseChatModel(BaseLanguageModel[AIMessage], ABC):271 r"""Base class for chat models.272273 Key imperative methods:274 Methods that actually call the underlying model.275276 This table provides a brief overview of the main imperative methods. Please see the base `Runnable` reference for full documentation.277278 | Method | Input | Output | Description |279 | ---------------------- | ------------------------------------------------------------ | ---------------------------------------------------------- | -------------------------------------------------------------------------------- |280 | `invoke` | `str` \| `list[dict | tuple | BaseMessage]` \| `PromptValue` | `BaseMessage` | A single chat model call. |281 | `ainvoke` | `'''` | `BaseMessage` | Defaults to running `invoke` in an async executor. |282 | `stream` | `'''` | `Iterator[BaseMessageChunk]` | Defaults to yielding output of `invoke`. |283 | `astream` | `'''` | `AsyncIterator[BaseMessageChunk]` | Defaults to yielding output of `ainvoke`. |284 | `astream_events` | `'''` | `AsyncIterator[StreamEvent]` | Event types: `on_chat_model_start`, `on_chat_model_stream`, `on_chat_model_end`. |285 | `batch` | `list[''']` | `list[BaseMessage]` | Defaults to running `invoke` in concurrent threads. |286 | `abatch` | `list[''']` | `list[BaseMessage]` | Defaults to running `ainvoke` in concurrent threads. |287 | `batch_as_completed` | `list[''']` | `Iterator[tuple[int, Union[BaseMessage, Exception]]]` | Defaults to running `invoke` in concurrent threads. |288 | `abatch_as_completed` | `list[''']` | `AsyncIterator[tuple[int, Union[BaseMessage, Exception]]]` | Defaults to running `ainvoke` in concurrent threads. |289290 Key declarative methods:291 Methods for creating another `Runnable` using the chat model.292293 This table provides a brief overview of the main declarative methods. Please see the reference for each method for full documentation.294295 | Method | Description |296 | ---------------------------- | ------------------------------------------------------------------------------------------ |297 | `bind_tools` | Create chat model that can call tools. |298 | `with_structured_output` | Create wrapper that structures model output using schema. |299 | `with_retry` | Create wrapper that retries model calls on failure. |300 | `with_fallbacks` | Create wrapper that falls back to other models on failure. |301 | `configurable_fields` | Specify init args of the model that can be configured at runtime via the `RunnableConfig`. |302 | `configurable_alternatives` | Specify alternative models which can be swapped in at runtime via the `RunnableConfig`. |303304 Creating custom chat model:305 Custom chat model implementations should inherit from this class.306 Please reference the table below for information about which307 methods and properties are required or optional for implementations.308309 | Method/Property | Description | Required |310 | -------------------------------- | ------------------------------------------------------------------ | ----------------- |311 | `_generate` | Use to generate a chat result from a prompt | Required |312 | `_llm_type` (property) | Used to uniquely identify the type of the model. Used for logging. | Required |313 | `_identifying_params` (property) | Represent model parameterization for tracing purposes. | Optional |314 | `_stream` | Use to implement streaming | Optional |315 | `_agenerate` | Use to implement a native async method | Optional |316 | `_astream` | Use to implement async version of `_stream` | Optional |317318 """ # noqa: E501319320 rate_limiter: BaseRateLimiter | None = Field(default=None, exclude=True)321 "An optional rate limiter to use for limiting the number of requests."322323 disable_streaming: bool | Literal["tool_calling"] = False324 """Whether to disable streaming for this model.325326 If streaming is bypassed, then `stream`/`astream`/`astream_events` will327 defer to `invoke`/`ainvoke`.328329 - If `True`, will always bypass streaming case.330 - If `'tool_calling'`, will bypass streaming case only when the model is called331 with a `tools` keyword argument. In other words, LangChain will automatically332 switch to non-streaming behavior (`invoke`) only when the tools argument is333 provided. This offers the best of both worlds.334 - If `False` (Default), will always use streaming case if available.335336 The main reason for this flag is that code might be written using `stream` and337 a user may want to swap out a given model for another model whose implementation338 does not properly support streaming.339 """340341 output_version: str | None = Field(342 default_factory=from_env("LC_OUTPUT_VERSION", default=None)343 )344 """Version of `AIMessage` output format to store in message content.345346 `AIMessage.content_blocks` will lazily parse the contents of `content` into a347 standard format. This flag can be used to additionally store the standard format348 in message content, e.g., for serialization purposes.349350 Supported values:351352 - `'v0'`: provider-specific format in content (can lazily-parse with353 `content_blocks`)354 - `'v1'`: standardized format in content (consistent with `content_blocks`)355356 Partner packages (e.g.,357 [`langchain-openai`](https://pypi.org/project/langchain-openai)) can also use this358 field to roll out new content formats in a backward-compatible way.359360 !!! version-added "Added in `langchain-core` 1.0.0"361362 """363364 profile: ModelProfile | None = Field(default=None, exclude=True)365 """Profile detailing model capabilities.366367 !!! warning "Beta feature"368369 This is a beta feature. The format of model profiles is subject to change.370371 If not specified, automatically loaded from the provider package on initialization372 if data is available.373374 Example profile data includes context window sizes, supported modalities, or support375 for tool calling, structured output, and other features.376377 !!! version-added "Added in `langchain-core` 1.1.0"378 """379380 model_config = ConfigDict(381 arbitrary_types_allowed=True,382 )383384 def _resolve_model_profile(self) -> ModelProfile | None:385 """Return the default model profile, or `None` if unavailable.386387 Override this in subclasses instead of `_set_model_profile`. The base388 validator calls it automatically and handles assignment. This avoids389 coupling partner code to Pydantic validator mechanics.390391 Each partner needs its own override because things can vary per-partner,392 such as the attribute that identifies the model (e.g., `model`,393 `model_name`, `model_id`, `deployment_name`) and the partner-local394 `_get_default_model_profile` function that reads from each partner's own395 profile data.396 """397 # TODO: consider adding a `_model_identifier` property on BaseChatModel398 # to standardize how partners identify their model, which could allow a399 # default implementation here that calls a shared400 # profile-loading mechanism.401 return None402403 @model_validator(mode="after")404 def _set_model_profile(self) -> Self:405 """Populate `profile` from `_resolve_model_profile` if not provided.406407 Partners should override `_resolve_model_profile` rather than this408 validator. Overriding this with a new `@model_validator` replaces the409 base validator (Pydantic v2 behavior), bypassing the standard resolution410 path. A plain method override does not prevent the base validator from411 running.412 """413 if self.profile is None:414 # Suppress errors from partner overrides (e.g., missing profile415 # files, broken imports) so model construction never fails over an416 # optional field.417 with contextlib.suppress(Exception):418 self.profile = self._resolve_model_profile()419 return self420421 # NOTE: _check_profile_keys must be defined AFTER _set_model_profile.422 # Pydantic v2 runs mode="after" validators in definition order.423 @model_validator(mode="after")424 def _check_profile_keys(self) -> Self:425 """Warn on unrecognized profile keys."""426 # isinstance guard: ModelProfile is a TypedDict (always a dict), but427 # protects against unexpected types from partner overrides.428 if self.profile and isinstance(self.profile, dict):429 _warn_unknown_profile_keys(self.profile)430 return self431432 @cached_property433 def _serialized(self) -> builtins.dict[str, Any]:434 # self is always a Serializable object in this case, thus the result is435 # guaranteed to be a dict since dumpd uses the default callback, which uses436 # obj.to_json which always returns TypedDict subclasses437 return cast("builtins.dict[str, Any]", dumpd(self))438439 # --- Runnable methods ---440441 @property442 @override443 def OutputType(self) -> Any:444 """Get the output type for this `Runnable`."""445 return AnyMessage446447 def _convert_input(self, model_input: LanguageModelInput) -> PromptValue:448 if isinstance(model_input, PromptValue):449 return model_input450 if isinstance(model_input, str):451 return StringPromptValue(text=model_input)452 if isinstance(model_input, Sequence):453 return ChatPromptValue(messages=convert_to_messages(model_input))454 msg = (455 f"Invalid input type {type(model_input)}. "456 "Must be a PromptValue, str, or list of BaseMessages."457 )458 raise ValueError(msg)459460 @override461 def invoke(462 self,463 input: LanguageModelInput,464 config: RunnableConfig | None = None,465 *,466 stop: list[str] | None = None,467 **kwargs: Any,468 ) -> AIMessage:469 config = ensure_config(config)470 return cast(471 "AIMessage",472 cast(473 "ChatGeneration",474 self.generate_prompt(475 [self._convert_input(input)],476 stop=stop,477 callbacks=config.get("callbacks"),478 tags=config.get("tags"),479 metadata=config.get("metadata"),480 run_name=config.get("run_name"),481 run_id=config.pop("run_id", None),482 **kwargs,483 ).generations[0][0],484 ).message,485 )486487 @override488 async def ainvoke(489 self,490 input: LanguageModelInput,491 config: RunnableConfig | None = None,492 *,493 stop: list[str] | None = None,494 **kwargs: Any,495 ) -> AIMessage:496 config = ensure_config(config)497 llm_result = await self.agenerate_prompt(498 [self._convert_input(input)],499 stop=stop,500 callbacks=config.get("callbacks"),501 tags=config.get("tags"),502 metadata=config.get("metadata"),503 run_name=config.get("run_name"),504 run_id=config.pop("run_id", None),505 **kwargs,506 )507 return cast(508 "AIMessage", cast("ChatGeneration", llm_result.generations[0][0]).message509 )510511 def _streaming_disabled(self, **kwargs: Any) -> bool:512 """Return whether streaming is hard-disabled for this call.513514 Shared opt-outs honored by both `_should_stream` and515 `_should_use_protocol_streaming` — these override any affirmative trigger516 (attached handler, `stream=True`, etc.):517518 - `self.disable_streaming is True`519 - `self.disable_streaming == "tool_calling"` with `tools` passed520 - `stream=<falsy>` in call kwargs521 - `self.streaming is False` on the instance522 """523 if self.disable_streaming is True:524 return True525 # We assume tools are passed in via "tools" kwarg in all models.526 if self.disable_streaming == "tool_calling" and kwargs.get("tools"):527 return True528 if "stream" in kwargs and not kwargs["stream"]:529 return True530 return (531 "streaming" in self.model_fields_set532 and getattr(self, "streaming", None) is False533 )534535 def _should_stream(536 self,537 *,538 async_api: bool,539 run_manager: CallbackManagerForLLMRun540 | AsyncCallbackManagerForLLMRun541 | None = None,542 **kwargs: Any,543 ) -> bool:544 """Determine if a given model call should hit the streaming API."""545 sync_not_implemented = type(self)._stream == BaseChatModel._stream # noqa: SLF001546 async_not_implemented = type(self)._astream == BaseChatModel._astream # noqa: SLF001547548 # Check if streaming is implemented.549 if (not async_api) and sync_not_implemented:550 return False551 # Note, since async falls back to sync we check both here.552 if async_api and async_not_implemented and sync_not_implemented:553 return False554555 if self._streaming_disabled(**kwargs):556 return False557558 # Affirmative: explicit `stream=<truthy>` kwarg.559 if kwargs.get("stream"):560 return True561562 # Affirmative: instance-level `streaming=True` attribute.563 if (564 "streaming" in self.model_fields_set565 and getattr(self, "streaming", None) is True566 ):567 return True568569 # Affirmative: a v1 streaming callback handler is attached.570 handlers = run_manager.handlers if run_manager else []571 return any(isinstance(h, _StreamingCallbackHandler) for h in handlers)572573 def _should_use_protocol_streaming(574 self,575 *,576 async_api: bool,577 run_manager: CallbackManagerForLLMRun578 | AsyncCallbackManagerForLLMRun579 | None = None,580 **kwargs: Any,581 ) -> bool:582 """Determine whether an invoke should route through the v2 event path.583584 Runs alongside `_should_stream` inside `_generate_with_cache` /585 `_agenerate_with_cache` — after the run manager is open — and586 wins over the v1 streaming branch when a handler has declared587 itself a `_V2StreamingCallbackHandler`. Parallel to588 `_should_stream` rather than a delegation — v1 and v2 have589 disjoint affirmative triggers.590591 Args:592 async_api: Whether the caller is on the async path.593 run_manager: The active LLM run manager.594 **kwargs: Call kwargs; inspected for `disable_streaming`595 semantics and an explicit `stream=False` override.596597 Returns:598 `True` if any attached handler inherits599 `_V2StreamingCallbackHandler` and the model can drive the v2600 event generator (natively or via the `_stream` compat601 bridge).602 """603 # Opt-in: only route through v2 when a v2 handler is attached.604 handlers = run_manager.handlers if run_manager else []605 if not any(isinstance(h, _V2StreamingCallbackHandler) for h in handlers):606 return False607608 # Need a source of v2 events on the requested flavor. A native609 # `_(a)stream_chat_model_events` hook bypasses the bridge;610 # otherwise the bridge wraps `_stream` / `_astream`. Async can611 # fall back to sync.612 #613 # `cls._stream is not BaseChatModel._stream` is an identity614 # check for "subclass overrode `_stream`" — same pattern as615 # `_should_stream`.616 cls = type(self)617 has_native_sync = getattr(cls, "_stream_chat_model_events", None) is not None618 has_native_async = getattr(cls, "_astream_chat_model_events", None) is not None619 overrides_sync = cls._stream is not BaseChatModel._stream620 overrides_async = cls._astream is not BaseChatModel._astream621 has_sync_source = has_native_sync or overrides_sync622 has_async_source = has_native_async or overrides_async623 has_source = (624 (has_sync_source or has_async_source) if async_api else has_sync_source625 )626 if not has_source:627 return False628629 return not self._streaming_disabled(**kwargs)630631 def _iter_v2_events(632 self,633 messages: list[BaseMessage],634 *,635 run_manager: CallbackManagerForLLMRun,636 stream: ChatModelStream,637 stop: list[str] | None = None,638 **kwargs: Any,639 ) -> Iterator[MessagesData]:640 """Drive the v2 event generator with per-event dispatch.641642 Shared between the `stream_events(version="v3")` pump and the643 invoke-time v2 branch in `_generate_with_cache`. Picks the native644 `_stream_chat_model_events` hook when the subclass provides one,645 else bridges `_stream` chunks via `chunks_to_events`. Each event646 is dispatched into `stream` and fired as `on_stream_event` on647 the run manager. Run-lifecycle callbacks648 (`on_chat_model_start` / `on_llm_end` / `on_llm_error`) and649 rate-limiter acquisition are the caller's responsibility.650651 Args:652 messages: Normalized input messages.653 run_manager: Active LLM run manager; receives654 `on_stream_event` per event.655 stream: Accumulator owned by the caller; receives each656 event via `stream.dispatch`.657 stop: Optional stop sequences.658 **kwargs: Forwarded to the event producer.659660 Yields:661 Each protocol event produced by the model.662 """663 native = cast(664 "Callable[..., Iterator[MessagesData]] | None",665 getattr(self, "_stream_chat_model_events", None),666 )667 if native is not None:668 event_iter: Iterator[MessagesData] = native(669 messages, stop=stop, run_manager=run_manager, **kwargs670 )671 else:672 event_iter = chunks_to_events(673 self._stream(messages, stop=stop, run_manager=run_manager, **kwargs),674 message_id=stream.message_id,675 )676 for event in event_iter:677 stream.dispatch(event)678 run_manager.on_stream_event(event)679 yield event680681 async def _aiter_v2_events(682 self,683 messages: list[BaseMessage],684 *,685 run_manager: AsyncCallbackManagerForLLMRun,686 stream: AsyncChatModelStream,687 stop: list[str] | None = None,688 **kwargs: Any,689 ) -> AsyncIterator[MessagesData]:690 """Async counterpart to `_iter_v2_events`.691692 See `_iter_v2_events` for the shared contract.693 """694 native = cast(695 "Callable[..., AsyncIterator[MessagesData]] | None",696 getattr(self, "_astream_chat_model_events", None),697 )698 if native is not None:699 event_iter: AsyncIterator[MessagesData] = native(700 messages, stop=stop, run_manager=run_manager, **kwargs701 )702 else:703 event_iter = achunks_to_events(704 self._astream(messages, stop=stop, run_manager=run_manager, **kwargs),705 message_id=stream.message_id,706 )707 async for event in event_iter:708 stream.dispatch(event)709 await run_manager.on_stream_event(event)710 yield event711712 @override713 def stream(714 self,715 input: LanguageModelInput,716 config: RunnableConfig | None = None,717 *,718 stop: list[str] | None = None,719 **kwargs: Any,720 ) -> Iterator[AIMessageChunk]:721 if not self._should_stream(async_api=False, **{**kwargs, "stream": True}):722 # Model doesn't implement streaming, so use default implementation723 yield cast(724 "AIMessageChunk",725 self.invoke(input, config=config, stop=stop, **kwargs),726 )727 else:728 config = ensure_config(config)729 messages = self._convert_input(input).to_messages()730 ls_structured_output_format = kwargs.pop(731 "ls_structured_output_format", None732 ) or kwargs.pop("structured_output_format", None)733 ls_structured_output_format_dict = _format_ls_structured_output(734 ls_structured_output_format735 )736737 params = self._get_invocation_params(stop=stop, **kwargs)738 options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}739 inheritable_metadata = {740 **(config.get("metadata") or {}),741 **self._get_ls_params_with_defaults(stop=stop, **kwargs),742 }743 callback_manager = CallbackManager.configure(744 config.get("callbacks"),745 self.callbacks,746 self.verbose,747 config.get("tags"),748 self.tags,749 inheritable_metadata,750 self.metadata,751 langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(752 params753 ),754 )755 (run_manager,) = callback_manager.on_chat_model_start(756 self._serialized,757 [_format_for_tracing(messages)],758 invocation_params=params,759 options=options,760 name=config.get("run_name"),761 run_id=config.pop("run_id", None),762 batch_size=1,763 )764765 chunks: list[ChatGenerationChunk] = []766767 if self.rate_limiter:768 self.rate_limiter.acquire(blocking=True)769770 try:771 input_messages = _normalize_messages(messages)772 run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))773 yielded = False774 index = -1775 index_type = ""776 for chunk in self._stream(input_messages, stop=stop, **kwargs):777 if chunk.message.id is None:778 chunk.message.id = run_id779 chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)780 if self.output_version == "v1":781 # Overwrite .content with .content_blocks782 chunk.message = _update_message_content_to_blocks(783 chunk.message, "v1"784 )785 for block in cast(786 "list[types.ContentBlock]", chunk.message.content787 ):788 if block["type"] != index_type:789 index_type = block["type"]790 index += 1791 if "index" not in block:792 block["index"] = index793 run_manager.on_llm_new_token(794 cast("str", chunk.message.content), chunk=chunk795 )796 chunks.append(chunk)797 yield cast("AIMessageChunk", chunk.message)798 yielded = True799800 # Yield a final empty chunk with chunk_position="last" if not yet801 # yielded802 if (803 yielded804 and isinstance(chunk.message, AIMessageChunk)805 and not chunk.message.chunk_position806 ):807 empty_content: str | list = (808 "" if isinstance(chunk.message.content, str) else []809 )810 msg_chunk = AIMessageChunk(811 content=empty_content, chunk_position="last", id=run_id812 )813 run_manager.on_llm_new_token(814 "", chunk=ChatGenerationChunk(message=msg_chunk)815 )816 yield msg_chunk817 except BaseException as e:818 generations_with_error_metadata = _generate_response_from_error(e)819 chat_generation_chunk = merge_chat_generation_chunks(chunks)820 if chat_generation_chunk:821 generations = [822 [chat_generation_chunk],823 generations_with_error_metadata,824 ]825 else:826 generations = [generations_with_error_metadata]827 run_manager.on_llm_error(828 e,829 response=LLMResult(generations=generations),830 )831 raise832833 generation = merge_chat_generation_chunks(chunks)834 if generation is None:835 err = ValueError("No generation chunks were returned")836 run_manager.on_llm_error(err, response=LLMResult(generations=[]))837 raise err838839 run_manager.on_llm_end(LLMResult(generations=[[generation]]))840841 @override842 async def astream(843 self,844 input: LanguageModelInput,845 config: RunnableConfig | None = None,846 *,847 stop: list[str] | None = None,848 **kwargs: Any,849 ) -> AsyncIterator[AIMessageChunk]:850 if not self._should_stream(async_api=True, **{**kwargs, "stream": True}):851 # No async or sync stream is implemented, so fall back to ainvoke852 yield cast(853 "AIMessageChunk",854 await self.ainvoke(input, config=config, stop=stop, **kwargs),855 )856 return857858 config = ensure_config(config)859 messages = self._convert_input(input).to_messages()860861 ls_structured_output_format = kwargs.pop(862 "ls_structured_output_format", None863 ) or kwargs.pop("structured_output_format", None)864 ls_structured_output_format_dict = _format_ls_structured_output(865 ls_structured_output_format866 )867868 params = self._get_invocation_params(stop=stop, **kwargs)869 options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}870 inheritable_metadata = {871 **(config.get("metadata") or {}),872 **self._get_ls_params_with_defaults(stop=stop, **kwargs),873 }874 callback_manager = AsyncCallbackManager.configure(875 config.get("callbacks"),876 self.callbacks,877 self.verbose,878 config.get("tags"),879 self.tags,880 inheritable_metadata,881 self.metadata,882 langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(883 params884 ),885 )886 (run_manager,) = await callback_manager.on_chat_model_start(887 self._serialized,888 [_format_for_tracing(messages)],889 invocation_params=params,890 options=options,891 name=config.get("run_name"),892 run_id=config.pop("run_id", None),893 batch_size=1,894 )895896 if self.rate_limiter:897 await self.rate_limiter.aacquire(blocking=True)898899 chunks: list[ChatGenerationChunk] = []900901 try:902 input_messages = _normalize_messages(messages)903 run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))904 yielded = False905 index = -1906 index_type = ""907 async for chunk in self._astream(908 input_messages,909 stop=stop,910 **kwargs,911 ):912 if chunk.message.id is None:913 chunk.message.id = run_id914 chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)915 if self.output_version == "v1":916 # Overwrite .content with .content_blocks917 chunk.message = _update_message_content_to_blocks(918 chunk.message, "v1"919 )920 for block in cast(921 "list[types.ContentBlock]", chunk.message.content922 ):923 if block["type"] != index_type:924 index_type = block["type"]925 index += 1926 if "index" not in block:927 block["index"] = index928 await run_manager.on_llm_new_token(929 cast("str", chunk.message.content), chunk=chunk930 )931 chunks.append(chunk)932 yield cast("AIMessageChunk", chunk.message)933 yielded = True934935 # Yield a final empty chunk with chunk_position="last" if not yet yielded936 if (937 yielded938 and isinstance(chunk.message, AIMessageChunk)939 and not chunk.message.chunk_position940 ):941 empty_content: str | list = (942 "" if isinstance(chunk.message.content, str) else []943 )944 msg_chunk = AIMessageChunk(945 content=empty_content, chunk_position="last", id=run_id946 )947 await run_manager.on_llm_new_token(948 "", chunk=ChatGenerationChunk(message=msg_chunk)949 )950 yield msg_chunk951 except BaseException as e:952 generations_with_error_metadata = _generate_response_from_error(e)953 chat_generation_chunk = merge_chat_generation_chunks(chunks)954 if chat_generation_chunk:955 generations = [[chat_generation_chunk], generations_with_error_metadata]956 else:957 generations = [generations_with_error_metadata]958 await run_manager.on_llm_error(959 e,960 response=LLMResult(generations=generations),961 )962 raise963964 generation = merge_chat_generation_chunks(chunks)965 if not generation:966 err = ValueError("No generation chunks were returned")967 await run_manager.on_llm_error(err, response=LLMResult(generations=[]))968 raise err969970 await run_manager.on_llm_end(971 LLMResult(generations=[[generation]]),972 )973974 # --- stream_events v3 ---975976 @beta()977 def _chat_model_stream_v3(978 self,979 input: LanguageModelInput,980 config: RunnableConfig | None = None,981 *,982 stop: list[str] | None = None,983 **kwargs: Any,984 ) -> ChatModelStream:985 """Internal v3 sync streaming implementation.986987 Public entry point: `stream_events(version='v3')`.988 """989 config = ensure_config(config)990 messages = self._convert_input(input).to_messages()991 input_messages = _normalize_messages(messages)992993 # Strip tracing-only kwargs before forwarding to `_stream` — matches994 # `stream()` / `astream()`. Provider clients reject unknown kwargs,995 # so `.with_structured_output().stream_events(version="v3", ...)`996 # and any other binding that carries `ls_structured_output_format`997 # / `structured_output_format` would raise without this pop.998 ls_structured_output_format = kwargs.pop(999 "ls_structured_output_format", None1000 ) or kwargs.pop("structured_output_format", None)1001 ls_structured_output_format_dict = _format_ls_structured_output(1002 ls_structured_output_format1003 )10041005 params = self._get_invocation_params(stop=stop, **kwargs)1006 options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}1007 inheritable_metadata = {1008 **(config.get("metadata") or {}),1009 **self._get_ls_params_with_defaults(stop=stop, **kwargs),1010 }1011 callback_manager = CallbackManager.configure(1012 config.get("callbacks"),1013 self.callbacks,1014 self.verbose,1015 config.get("tags"),1016 self.tags,1017 inheritable_metadata,1018 self.metadata,1019 langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(1020 params1021 ),1022 )1023 stream = ChatModelStream()1024 run_manager: CallbackManagerForLLMRun | None = None1025 event_iter_ref: Iterator[MessagesData] | None = None1026 rate_limiter_acquired = self.rate_limiter is None1027 run_name = config.get("run_name")1028 run_id = config.pop("run_id", None)10291030 def ensure_started() -> None:1031 nonlocal event_iter_ref, run_manager1032 if event_iter_ref is not None:1033 return10341035 (run_manager,) = callback_manager.on_chat_model_start(1036 self._serialized,1037 [_format_for_tracing(messages)],1038 invocation_params=params,1039 options=options,1040 name=run_name,1041 run_id=run_id,1042 batch_size=1,1043 )1044 stream.set_message_id("-".join((LC_ID_PREFIX, str(run_manager.run_id))))1045 event_iter_ref = iter(1046 self._iter_v2_events(1047 input_messages,1048 run_manager=run_manager,1049 stream=stream,1050 stop=stop,1051 **kwargs,1052 )1053 )10541055 def pump_one() -> bool:1056 nonlocal rate_limiter_acquired1057 ensure_started()1058 if not rate_limiter_acquired:1059 assert self.rate_limiter is not None # noqa: S1011060 self.rate_limiter.acquire(blocking=True)1061 rate_limiter_acquired = True1062 assert event_iter_ref is not None # noqa: S1011063 assert run_manager is not None # noqa: S1011064 try:1065 next(event_iter_ref)1066 except StopIteration:1067 if not stream.done:1068 if stream.has_events:1069 # Native event producers may omit the terminal1070 # `message-finish`. Close the lifecycle here so1071 # `on_llm_end` still observes the assembled1072 # message. A truly empty stream remains an error1073 # for parity with `stream()`.1074 stream.dispatch(MessageFinishData(event="message-finish"))1075 else:1076 err = ValueError("No generation chunks were returned")1077 stream.fail(err)1078 run_manager.on_llm_error(1079 err,1080 response=LLMResult(generations=[]),1081 )1082 return False1083 if stream.done and stream.output_message is not None:1084 run_manager.on_llm_end(1085 LLMResult(1086 generations=[1087 [ChatGeneration(message=stream.output_message)],1088 ],1089 ),1090 )1091 return False1092 except BaseException as exc:1093 stream.fail(exc)1094 run_manager.on_llm_error(1095 exc,1096 response=LLMResult(generations=[]),1097 )1098 return False1099 if stream.done and stream.output_message is not None:1100 run_manager.on_llm_end(1101 LLMResult(1102 generations=[1103 [ChatGeneration(message=stream.output_message)],1104 ],1105 ),1106 )1107 return True11081109 stream.set_start(ensure_started)1110 stream.bind_pump(pump_one)1111 return stream11121113 @beta()1114 async def _achat_model_stream_v3(1115 self,1116 input: LanguageModelInput,1117 config: RunnableConfig | None = None,1118 *,1119 stop: list[str] | None = None,1120 **kwargs: Any,1121 ) -> AsyncChatModelStream:1122 """Internal v3 async streaming implementation.11231124 Public entry point: `astream_events(version='v3')`.1125 """1126 config = ensure_config(config)1127 messages = self._convert_input(input).to_messages()1128 input_messages = _normalize_messages(messages)11291130 # Strip tracing-only kwargs before forwarding — see the sync v31131 # implementation for the full rationale.1132 ls_structured_output_format = kwargs.pop(1133 "ls_structured_output_format", None1134 ) or kwargs.pop("structured_output_format", None)1135 ls_structured_output_format_dict = _format_ls_structured_output(1136 ls_structured_output_format1137 )11381139 params = self._get_invocation_params(stop=stop, **kwargs)1140 options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}1141 inheritable_metadata = {1142 **(config.get("metadata") or {}),1143 **self._get_ls_params_with_defaults(stop=stop, **kwargs),1144 }1145 callback_manager = AsyncCallbackManager.configure(1146 config.get("callbacks"),1147 self.callbacks,1148 self.verbose,1149 config.get("tags"),1150 self.tags,1151 inheritable_metadata,1152 self.metadata,1153 langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(1154 params1155 ),1156 )1157 stream = AsyncChatModelStream()1158 run_manager: AsyncCallbackManagerForLLMRun | None = None1159 run_name = config.get("run_name")1160 run_id = config.pop("run_id", None)1161 start_lock = asyncio.Lock()11621163 async def _produce() -> None:1164 assert run_manager is not None # noqa: S1011165 try:1166 if self.rate_limiter:1167 await self.rate_limiter.aacquire(blocking=True)11681169 async for _event in self._aiter_v2_events(1170 input_messages,1171 run_manager=run_manager,1172 stream=stream,1173 stop=stop,1174 **kwargs,1175 ):1176 pass1177 if not stream.done:1178 if stream.has_events:1179 # Native event producers may omit the terminal1180 # `message-finish`. Close the lifecycle here so1181 # `on_llm_end` sees the finalized message. A1182 # truly empty stream remains an error for parity1183 # with `astream()`.1184 stream.dispatch(MessageFinishData(event="message-finish"))1185 else:1186 err = ValueError("No generation chunks were returned")1187 stream.fail(err)1188 await run_manager.on_llm_error(1189 err,1190 response=LLMResult(generations=[]),1191 )1192 return1193 if stream.done and stream.output_message is not None:1194 await run_manager.on_llm_end(1195 LLMResult(1196 generations=[1197 [ChatGeneration(message=stream.output_message)],1198 ],1199 ),1200 )1201 except asyncio.CancelledError as exc:1202 stream.fail(exc)1203 # Close the callback lifecycle so tracing observes a1204 # matching end event for the earlier `on_chat_model_start`.1205 # `on_llm_error` is `@shielded`, so the callback runs to1206 # completion in the background even though the `await`1207 # here re-raises our cancellation.1208 with contextlib.suppress(Exception):1209 await run_manager.on_llm_error(1210 exc,1211 response=LLMResult(generations=[]),1212 )1213 raise1214 except BaseException as exc:1215 stream.fail(exc)1216 await run_manager.on_llm_error(1217 exc,1218 response=LLMResult(generations=[]),1219 )12201221 async def ensure_started() -> None:1222 nonlocal run_manager1223 if stream._producer_task is not None: # noqa: SLF0011224 return12251226 async with start_lock:1227 if stream._producer_task is not None: # noqa: SLF0011228 return12291230 (run_manager,) = await callback_manager.on_chat_model_start(1231 self._serialized,1232 [_format_for_tracing(messages)],1233 invocation_params=params,1234 options=options,1235 name=run_name,1236 run_id=run_id,1237 batch_size=1,1238 )1239 stream.set_message_id("-".join((LC_ID_PREFIX, str(run_manager.run_id))))1240 stream._producer_task = asyncio.get_running_loop().create_task( # noqa: SLF0011241 _produce()1242 )12431244 async def _on_aclose_fail(exc: BaseException) -> None:1245 assert run_manager is not None # noqa: S1011246 # Invoked by `stream.aclose()` only when the producer was1247 # cancelled before `_produce` ran — so `on_llm_error` from1248 # the CancelledError handler never fired. Shielded by the1249 # callback manager; runs to completion even if our caller1250 # is being cancelled.1251 await run_manager.on_llm_error(1252 exc,1253 response=LLMResult(generations=[]),1254 )12551256 stream.set_start(ensure_started)1257 stream._on_aclose_fail = _on_aclose_fail # noqa: SLF0011258 return stream12591260 @overload # type: ignore[override]1261 def stream_events(1262 self,1263 input: LanguageModelInput,1264 config: RunnableConfig | None = None,1265 *,1266 version: Literal["v1", "v2"] = "v2",1267 **kwargs: Any,1268 ) -> Iterator[StreamEvent]: ...12691270 @overload1271 def stream_events(1272 self,1273 input: LanguageModelInput,1274 config: RunnableConfig | None = None,1275 *,1276 version: Literal["v3"],1277 stop: list[str] | None = None,1278 **kwargs: Any,1279 ) -> ChatModelStream: ...12801281 def stream_events(1282 self,1283 input: LanguageModelInput,1284 config: RunnableConfig | None = None,1285 *,1286 version: Literal["v1", "v2", "v3"] = "v2",1287 stop: list[str] | None = None,1288 **kwargs: Any,1289 ) -> Iterator[StreamEvent] | ChatModelStream:1290 """Stream events from this chat model.12911292 For `version="v1"` / `"v2"`, yields `StreamEvent` dicts (see1293 `Runnable.stream_events`). For `version="v3"`, returns a1294 `ChatModelStream` exposing typed projections (`.text`,1295 `.reasoning`, `.tool_calls`, `.output`).12961297 !!! warning "Beta"12981299 `version="v3"` is in beta. The protocol shape, return type,1300 and surface area may change in future releases. Calling it1301 emits a `LangChainBetaWarning` at runtime.13021303 !!! note "v3 always produces v1-shaped content"13041305 `ChatModelStream.output.content` is always a list of v11306 content blocks (text / reasoning / tool_call / image / …),1307 regardless of the model's `output_version` attribute. The1308 setting only affects the legacy `stream()` / `astream()` /1309 `invoke()` paths. If you're mixing1310 `stream_events(version="v3")` with those paths in the same1311 pipeline and need a consistent output shape across them,1312 set `output_version="v1"` on the model.13131314 Args:1315 input: The model input.1316 config: Optional runnable config.1317 version: Streaming-event schema version. `"v3"` selects the1318 content-block-centric streaming protocol.1319 stop: Optional stop sequences. Only used for `version="v3"`;1320 ignored otherwise.1321 **kwargs: Additional keyword arguments. For `version="v3"`,1322 forwarded to the model.13231324 Returns:1325 For `version="v3"`, a `ChatModelStream` with typed1326 projections. Otherwise an `Iterator[StreamEvent]`.1327 """1328 if version == "v3":1329 return self._chat_model_stream_v3(input, config, stop=stop, **kwargs)1330 return super().stream_events(1331 input, config, version=version, stop=stop, **kwargs1332 )13331334 @overload1335 def astream_events(1336 self,1337 input: LanguageModelInput,1338 config: RunnableConfig | None = None,1339 *,1340 version: Literal["v1", "v2"] = "v2",1341 **kwargs: Any,1342 ) -> AsyncIterator[StreamEvent]: ...13431344 @overload1345 def astream_events(1346 self,1347 input: LanguageModelInput,1348 config: RunnableConfig | None = None,1349 *,1350 version: Literal["v3"],1351 stop: list[str] | None = None,1352 **kwargs: Any,1353 ) -> Awaitable[AsyncChatModelStream]: ...13541355 def astream_events(1356 self,1357 input: LanguageModelInput,1358 config: RunnableConfig | None = None,1359 *,1360 version: Literal["v1", "v2", "v3"] = "v2",1361 stop: list[str] | None = None,1362 **kwargs: Any,1363 ) -> AsyncIterator[StreamEvent] | Awaitable[AsyncChatModelStream]:1364 """Async variant of `stream_events`. See `stream_events` for full docs."""1365 if version == "v3":1366 return self._achat_model_stream_v3(input, config, stop=stop, **kwargs)1367 # v1/v2: forward to Runnable.astream_events (async generator).1368 return super().astream_events(1369 input, config, version=version, stop=stop, **kwargs1370 )13711372 # --- Custom methods ---13731374 def _combine_llm_outputs(1375 self, _llm_outputs: list[builtins.dict | None], /1376 ) -> builtins.dict:1377 return {}13781379 def _convert_cached_generations(self, cache_val: list) -> list[ChatGeneration]:1380 """Convert cached Generation objects to ChatGeneration objects.13811382 Handle case where cache contains Generation objects instead of1383 ChatGeneration objects. This can happen due to serialization/deserialization1384 issues or legacy cache data (see #22389).13851386 Args:1387 cache_val: List of cached generation objects.13881389 Returns:1390 List of ChatGeneration objects.13911392 """1393 converted_generations = []1394 for gen in cache_val:1395 if isinstance(gen, Generation) and not isinstance(gen, ChatGeneration):1396 # Convert Generation to ChatGeneration by creating AIMessage1397 # from the text content1398 chat_gen = ChatGeneration(1399 message=AIMessage(content=gen.text),1400 generation_info=gen.generation_info,1401 )1402 converted_generations.append(chat_gen)1403 else:1404 # Already a ChatGeneration or other expected type1405 if hasattr(gen, "message") and isinstance(gen.message, AIMessage):1406 # We zero out cost on cache hits1407 gen.message = gen.message.model_copy(1408 update={1409 "usage_metadata": {1410 **(gen.message.usage_metadata or {}),1411 "total_cost": 0,1412 }1413 }1414 )1415 converted_generations.append(gen)1416 return converted_generations14171418 def _replay_v2_events_for_cache_hit(1419 self,1420 generations: list[ChatGeneration],1421 *,1422 run_manager: CallbackManagerForLLMRun | None,1423 **kwargs: Any,1424 ) -> None:1425 """Replay cached messages as v2 events when a v2 handler is attached.14261427 A warm cache must produce the same `on_stream_event` stream as a1428 cold call so LangGraph-style consumers do not observe behavior1429 that depends on cache state. Gated by1430 `_should_use_protocol_streaming` so a `disable_streaming` config1431 that suppresses v2 on cold calls also suppresses it here.1432 """1433 if run_manager is None or not self._should_use_protocol_streaming(1434 async_api=False, run_manager=run_manager, **kwargs1435 ):1436 return1437 message_id = f"{LC_ID_PREFIX}-{run_manager.run_id}"1438 for gen in generations:1439 msg = getattr(gen, "message", None)1440 if not isinstance(msg, AIMessage):1441 continue1442 for event in message_to_events(msg, message_id=message_id):1443 run_manager.on_stream_event(event)14441445 async def _areplay_v2_events_for_cache_hit(1446 self,1447 generations: list[ChatGeneration],1448 *,1449 run_manager: AsyncCallbackManagerForLLMRun | None,1450 **kwargs: Any,1451 ) -> None:1452 """Async counterpart to `_replay_v2_events_for_cache_hit`."""1453 if run_manager is None or not self._should_use_protocol_streaming(1454 async_api=True, run_manager=run_manager, **kwargs1455 ):1456 return1457 message_id = f"{LC_ID_PREFIX}-{run_manager.run_id}"1458 for gen in generations:1459 msg = getattr(gen, "message", None)1460 if not isinstance(msg, AIMessage):1461 continue1462 async for event in amessage_to_events(msg, message_id=message_id):1463 await run_manager.on_stream_event(event)14641465 def _get_invocation_params(1466 self,1467 stop: list[str] | None = None,1468 **kwargs: Any,1469 ) -> builtins.dict:1470 params = self._dict_for_compat()1471 params["stop"] = stop1472 return {**params, **kwargs}14731474 def _get_ls_params(1475 self,1476 stop: list[str] | None = None,1477 **kwargs: Any,1478 ) -> LangSmithParams:1479 """Get standard params for LangSmith tracing.14801481 Subclasses **should override** this method to populate `ls_provider`1482 and `ls_model_name` from provider-specific attributes (e.g. `self.model`,1483 `self.model_name`, `self.model_id`) and to honor per-call overrides1484 passed via `kwargs["model"]` so that runtime `bind`/`invoke` model1485 changes are reflected in traces.14861487 The implementation here is a best-effort fallback for subclasses that1488 do not override it. It is not part of a stable contract and the1489 derivation rules may change:14901491 - `ls_provider` is derived from the class name by stripping a leading1492 or trailing `"Chat"` and lowercasing the remainder. This produces1493 ugly values for multi-word providers (e.g. `ChatGoogleGenerativeAI`1494 would become `"googlegenerativeai"`).14951496 Override to set a stable, conventional value1497 such as `"google_genai"`.1498 - `ls_model_name` is resolved from `kwargs["model"]`, then1499 `self.model`, then `self.model_name`.15001501 Subclasses whose model attribute has a different name1502 (`model_id`, `deployment_name`, ...) must override.1503 """1504 # get default provider from class name1505 default_provider = self.__class__.__name__1506 if default_provider.startswith("Chat"):1507 default_provider = default_provider[4:].lower()1508 elif default_provider.endswith("Chat"):1509 default_provider = default_provider[:-4]1510 default_provider = default_provider.lower()15111512 ls_params = LangSmithParams(ls_provider=default_provider, ls_model_type="chat")1513 if stop:1514 ls_params["ls_stop"] = stop15151516 # model1517 if "model" in kwargs and isinstance(kwargs["model"], str):1518 ls_params["ls_model_name"] = kwargs["model"]1519 elif hasattr(self, "model") and isinstance(self.model, str):1520 ls_params["ls_model_name"] = self.model1521 elif hasattr(self, "model_name") and isinstance(self.model_name, str):1522 ls_params["ls_model_name"] = self.model_name15231524 # temperature1525 if "temperature" in kwargs and isinstance(kwargs["temperature"], (int, float)):1526 ls_params["ls_temperature"] = kwargs["temperature"]1527 elif hasattr(self, "temperature") and isinstance(1528 self.temperature, (int, float)1529 ):1530 ls_params["ls_temperature"] = self.temperature15311532 # max_tokens1533 if "max_tokens" in kwargs and isinstance(kwargs["max_tokens"], int):1534 ls_params["ls_max_tokens"] = kwargs["max_tokens"]1535 elif hasattr(self, "max_tokens") and isinstance(self.max_tokens, int):1536 ls_params["ls_max_tokens"] = self.max_tokens15371538 return ls_params15391540 def _get_ls_params_with_defaults(1541 self,1542 stop: list[str] | None = None,1543 **kwargs: Any,1544 ) -> LangSmithParams:1545 """Wrap _get_ls_params to always include ls_integration."""1546 ls_params = self._get_ls_params(stop=stop, **kwargs)1547 ls_params["ls_integration"] = "langchain_chat_model"1548 return ls_params15491550 def _get_llm_string(self, stop: list[str] | None = None, **kwargs: Any) -> str:1551 if self.is_lc_serializable():1552 params = {**kwargs, "stop": stop}1553 param_string = str(sorted(params.items()))1554 # This code is not super efficient as it goes back and forth between1555 # json and dict.1556 serialized_repr = self._serialized1557 _cleanup_llm_representation(serialized_repr, 1)1558 llm_string = json.dumps(serialized_repr, sort_keys=True)1559 return llm_string + "---" + param_string1560 params = self._get_invocation_params(stop=stop, **kwargs)1561 params = {**params, **kwargs}1562 return str(sorted(params.items()))15631564 def generate(1565 self,1566 messages: list[list[BaseMessage]],1567 stop: list[str] | None = None,1568 callbacks: Callbacks = None,1569 *,1570 tags: list[str] | None = None,1571 metadata: builtins.dict[str, Any] | None = None,1572 run_name: str | None = None,1573 run_id: uuid.UUID | None = None,1574 **kwargs: Any,1575 ) -> LLMResult:1576 """Pass a sequence of prompts to the model and return model generations.15771578 This method should make use of batched calls for models that expose a batched1579 API.15801581 Use this method when you want to:15821583 1. Take advantage of batched calls,1584 2. Need more output from the model than just the top generated value,1585 3. Are building chains that are agnostic to the underlying language model1586 type (e.g., pure text completion models vs chat models).15871588 Args:1589 messages: List of list of messages.1590 stop: Stop words to use when generating.15911592 Model output is cut off at the first occurrence of any of these1593 substrings.1594 callbacks: `Callbacks` to pass through.15951596 Used for executing additional functionality, such as logging or1597 streaming, throughout generation.1598 tags: The tags to apply.1599 metadata: The metadata to apply.1600 run_name: The name of the run.1601 run_id: The ID of the run.1602 **kwargs: Arbitrary additional keyword arguments.16031604 These are usually passed to the model provider API call.16051606 Returns:1607 An `LLMResult`, which contains a list of candidate `Generations` for each1608 input prompt and additional model provider-specific output.16091610 """1611 ls_structured_output_format = kwargs.pop(1612 "ls_structured_output_format", None1613 ) or kwargs.pop("structured_output_format", None)1614 ls_structured_output_format_dict = _format_ls_structured_output(1615 ls_structured_output_format1616 )16171618 params = self._get_invocation_params(stop=stop, **kwargs)1619 options = {"stop": stop, **ls_structured_output_format_dict}1620 inheritable_metadata = {1621 **(metadata or {}),1622 **self._get_ls_params_with_defaults(stop=stop, **kwargs),1623 }16241625 callback_manager = CallbackManager.configure(1626 callbacks,1627 self.callbacks,1628 self.verbose,1629 tags,1630 self.tags,1631 inheritable_metadata,1632 self.metadata,1633 langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(1634 params1635 ),1636 )1637 messages_to_trace = [1638 _format_for_tracing(message_list) for message_list in messages1639 ]1640 run_managers = callback_manager.on_chat_model_start(1641 self._serialized,1642 messages_to_trace,1643 invocation_params=params,1644 options=options,1645 name=run_name,1646 run_id=run_id,1647 batch_size=len(messages),1648 )1649 results = []1650 input_messages = [1651 _normalize_messages(message_list) for message_list in messages1652 ]1653 for i, m in enumerate(input_messages):1654 try:1655 results.append(1656 self._generate_with_cache(1657 m,1658 stop=stop,1659 run_manager=run_managers[i] if run_managers else None,1660 **kwargs,1661 )1662 )1663 except BaseException as e:1664 if run_managers:1665 generations_with_error_metadata = _generate_response_from_error(e)1666 run_managers[i].on_llm_error(1667 e,1668 response=LLMResult(1669 generations=[generations_with_error_metadata]1670 ),1671 )1672 raise1673 flattened_outputs = [1674 LLMResult(generations=[res.generations], llm_output=res.llm_output)1675 for res in results1676 ]1677 llm_output = self._combine_llm_outputs([res.llm_output for res in results])1678 generations = [res.generations for res in results]1679 output = LLMResult(generations=generations, llm_output=llm_output)1680 if run_managers:1681 run_infos = []1682 for manager, flattened_output in zip(1683 run_managers, flattened_outputs, strict=False1684 ):1685 manager.on_llm_end(flattened_output)1686 run_infos.append(RunInfo(run_id=manager.run_id))1687 output.run = run_infos1688 return output16891690 async def agenerate(1691 self,1692 messages: list[list[BaseMessage]],1693 stop: list[str] | None = None,1694 callbacks: Callbacks = None,1695 *,1696 tags: list[str] | None = None,1697 metadata: builtins.dict[str, Any] | None = None,1698 run_name: str | None = None,1699 run_id: uuid.UUID | None = None,1700 **kwargs: Any,1701 ) -> LLMResult:1702 """Asynchronously pass a sequence of prompts to a model and return generations.17031704 This method should make use of batched calls for models that expose a batched1705 API.17061707 Use this method when you want to:17081709 1. Take advantage of batched calls,1710 2. Need more output from the model than just the top generated value,1711 3. Are building chains that are agnostic to the underlying language model1712 type (e.g., pure text completion models vs chat models).17131714 Args:1715 messages: List of list of messages.1716 stop: Stop words to use when generating.17171718 Model output is cut off at the first occurrence of any of these1719 substrings.1720 callbacks: `Callbacks` to pass through.17211722 Used for executing additional functionality, such as logging or1723 streaming, throughout generation.1724 tags: The tags to apply.1725 metadata: The metadata to apply.1726 run_name: The name of the run.1727 run_id: The ID of the run.1728 **kwargs: Arbitrary additional keyword arguments.17291730 These are usually passed to the model provider API call.17311732 Returns:1733 An `LLMResult`, which contains a list of candidate `Generations` for each1734 input prompt and additional model provider-specific output.17351736 """1737 ls_structured_output_format = kwargs.pop(1738 "ls_structured_output_format", None1739 ) or kwargs.pop("structured_output_format", None)1740 ls_structured_output_format_dict = _format_ls_structured_output(1741 ls_structured_output_format1742 )17431744 params = self._get_invocation_params(stop=stop, **kwargs)1745 options = {"stop": stop, **ls_structured_output_format_dict}1746 inheritable_metadata = {1747 **(metadata or {}),1748 **self._get_ls_params_with_defaults(stop=stop, **kwargs),1749 }17501751 callback_manager = AsyncCallbackManager.configure(1752 callbacks,1753 self.callbacks,1754 self.verbose,1755 tags,1756 self.tags,1757 inheritable_metadata,1758 self.metadata,1759 langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(1760 params1761 ),1762 )17631764 messages_to_trace = [1765 _format_for_tracing(message_list) for message_list in messages1766 ]1767 run_managers = await callback_manager.on_chat_model_start(1768 self._serialized,1769 messages_to_trace,1770 invocation_params=params,1771 options=options,1772 name=run_name,1773 batch_size=len(messages),1774 run_id=run_id,1775 )17761777 input_messages = [1778 _normalize_messages(message_list) for message_list in messages1779 ]1780 results = await asyncio.gather(1781 *[1782 self._agenerate_with_cache(1783 m,1784 stop=stop,1785 run_manager=run_managers[i] if run_managers else None,1786 **kwargs,1787 )1788 for i, m in enumerate(input_messages)1789 ],1790 return_exceptions=True,1791 )1792 exceptions = []1793 for i, res in enumerate(results):1794 if isinstance(res, BaseException):1795 if run_managers:1796 generations_with_error_metadata = _generate_response_from_error(res)1797 await run_managers[i].on_llm_error(1798 res,1799 response=LLMResult(1800 generations=[generations_with_error_metadata]1801 ),1802 )1803 exceptions.append(res)1804 if exceptions:1805 if run_managers:1806 await asyncio.gather(1807 *[1808 run_manager.on_llm_end(1809 LLMResult(1810 generations=[res.generations], # type: ignore[union-attr]1811 llm_output=res.llm_output, # type: ignore[union-attr]1812 )1813 )1814 for run_manager, res in zip(run_managers, results, strict=False)1815 if not isinstance(res, Exception)1816 ]1817 )1818 raise exceptions[0]1819 flattened_outputs = [1820 LLMResult(generations=[res.generations], llm_output=res.llm_output) # type: ignore[union-attr]1821 for res in results1822 ]1823 llm_output = self._combine_llm_outputs([res.llm_output for res in results]) # type: ignore[union-attr]1824 generations = [res.generations for res in results] # type: ignore[union-attr]1825 output = LLMResult(generations=generations, llm_output=llm_output)1826 await asyncio.gather(1827 *[1828 run_manager.on_llm_end(flattened_output)1829 for run_manager, flattened_output in zip(1830 run_managers, flattened_outputs, strict=False1831 )1832 ]1833 )1834 if run_managers:1835 output.run = [1836 RunInfo(run_id=run_manager.run_id) for run_manager in run_managers1837 ]1838 return output18391840 @override1841 def generate_prompt(1842 self,1843 prompts: list[PromptValue],1844 stop: list[str] | None = None,1845 callbacks: Callbacks = None,1846 **kwargs: Any,1847 ) -> LLMResult:1848 prompt_messages = [p.to_messages() for p in prompts]1849 return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)18501851 @override1852 async def agenerate_prompt(1853 self,1854 prompts: list[PromptValue],1855 stop: list[str] | None = None,1856 callbacks: Callbacks = None,1857 **kwargs: Any,1858 ) -> LLMResult:1859 prompt_messages = [p.to_messages() for p in prompts]1860 return await self.agenerate(1861 prompt_messages, stop=stop, callbacks=callbacks, **kwargs1862 )18631864 def _generate_with_cache(1865 self,1866 messages: list[BaseMessage],1867 stop: list[str] | None = None,1868 run_manager: CallbackManagerForLLMRun | None = None,1869 **kwargs: Any,1870 ) -> ChatResult:1871 llm_cache = self.cache if isinstance(self.cache, BaseCache) else get_llm_cache()1872 # We should check the cache unless it's explicitly set to False1873 # A None cache means we should use the default global cache1874 # if it's configured.1875 check_cache = self.cache or self.cache is None1876 if check_cache:1877 if llm_cache:1878 llm_string = self._get_llm_string(stop=stop, **kwargs)1879 normalized_messages = [1880 (1881 msg.model_copy(update={"id": None})1882 if getattr(msg, "id", None) is not None1883 else msg1884 )1885 for msg in messages1886 ]1887 prompt = dumps(normalized_messages)1888 cache_val = llm_cache.lookup(prompt, llm_string)1889 if isinstance(cache_val, list):1890 converted_generations = self._convert_cached_generations(cache_val)1891 self._replay_v2_events_for_cache_hit(1892 converted_generations,1893 run_manager=run_manager,1894 **kwargs,1895 )1896 return ChatResult(generations=converted_generations)1897 elif self.cache is None:1898 pass1899 else:1900 msg = "Asked to cache, but no cache found at `langchain.cache`."1901 raise ValueError(msg)19021903 # Apply the rate limiter after checking the cache, since1904 # we usually don't want to rate limit cache lookups, but1905 # we do want to rate limit API requests.1906 if self.rate_limiter:1907 self.rate_limiter.acquire(blocking=True)19081909 # v2 streaming: preferred over v1 when any attached handler opts in via1910 # `_V2StreamingCallbackHandler`. Drives the protocol event generator1911 # (native or `_stream` compat bridge) through the shared helper so1912 # `on_stream_event` fires per event, then returns a normal `ChatResult`1913 # so caching / `on_llm_end` stay on the existing generate path.1914 if self._should_use_protocol_streaming(1915 async_api=False,1916 run_manager=run_manager,1917 **kwargs,1918 ):1919 stream_accum = ChatModelStream(1920 message_id=(1921 f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None1922 )1923 )1924 assert run_manager is not None # noqa: S1011925 for _event in self._iter_v2_events(1926 messages,1927 run_manager=run_manager,1928 stream=stream_accum,1929 stop=stop,1930 **kwargs,1931 ):1932 pass1933 if stream_accum.output_message is None:1934 msg = "v2 stream finished without producing a message"1935 raise RuntimeError(msg)1936 result = ChatResult(1937 generations=[ChatGeneration(message=stream_accum.output_message)]1938 )1939 # If stream is not explicitly set, check if implicitly requested by1940 # astream_events() or astream_log(). Bail out if _stream not implemented1941 elif self._should_stream(1942 async_api=False,1943 run_manager=run_manager,1944 **kwargs,1945 ):1946 chunks: list[ChatGenerationChunk] = []1947 run_id: str | None = (1948 f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None1949 )1950 yielded = False1951 index = -11952 index_type = ""1953 for chunk in self._stream(messages, stop=stop, **kwargs):1954 chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)1955 if self.output_version == "v1":1956 # Overwrite .content with .content_blocks1957 chunk.message = _update_message_content_to_blocks(1958 chunk.message, "v1"1959 )1960 for block in cast(1961 "list[types.ContentBlock]", chunk.message.content1962 ):1963 if block["type"] != index_type:1964 index_type = block["type"]1965 index += 11966 if "index" not in block:1967 block["index"] = index1968 if run_manager:1969 if chunk.message.id is None:1970 chunk.message.id = run_id1971 run_manager.on_llm_new_token(1972 cast("str", chunk.message.content), chunk=chunk1973 )1974 chunks.append(chunk)1975 yielded = True19761977 # Yield a final empty chunk with chunk_position="last" if not yet yielded1978 if (1979 yielded1980 and isinstance(chunk.message, AIMessageChunk)1981 and not chunk.message.chunk_position1982 ):1983 empty_content: str | list = (1984 "" if isinstance(chunk.message.content, str) else []1985 )1986 chunk = ChatGenerationChunk(1987 message=AIMessageChunk(1988 content=empty_content, chunk_position="last", id=run_id1989 )1990 )1991 if run_manager:1992 run_manager.on_llm_new_token("", chunk=chunk)1993 chunks.append(chunk)1994 result = generate_from_stream(iter(chunks))1995 elif inspect.signature(self._generate).parameters.get("run_manager"):1996 result = self._generate(1997 messages, stop=stop, run_manager=run_manager, **kwargs1998 )1999 else:2000 result = self._generate(messages, stop=stop, **kwargs)
Findings
✓ No findings reported for this file.