libs/core/langchain_core/language_models/chat_models.py PYTHON 2,715 lines View on github.com → Search inside
File is large — showing lines 1–2,000 of 2,715.
1"""Chat models for conversational AI."""23from __future__ import annotations45import asyncio6import builtins  # noqa: TC0037import contextlib8import inspect9import json10from abc import ABC, abstractmethod11from collections.abc import AsyncIterator, Callable, Iterator, Sequence12from functools import cached_property13from operator import itemgetter14from typing import TYPE_CHECKING, Any, Literal, cast, overload1516from langchain_protocol.protocol import MessageFinishData17from pydantic import BaseModel, ConfigDict, Field, model_validator18from typing_extensions import Self, override1920from langchain_core._api import beta, deprecated, suppress_langchain_deprecation_warning21from langchain_core.caches import BaseCache22from langchain_core.callbacks import (23    AsyncCallbackManager,24    AsyncCallbackManagerForLLMRun,25    CallbackManager,26    CallbackManagerForLLMRun,27    Callbacks,28)29from langchain_core.globals import get_llm_cache30from langchain_core.language_models._compat_bridge import (31    achunks_to_events,32    amessage_to_events,33    chunks_to_events,34    message_to_events,35)36from langchain_core.language_models._utils import (37    _filter_invocation_params_for_tracing,38    _normalize_messages,39    _update_message_content_to_blocks,40)41from langchain_core.language_models.base import (42    BaseLanguageModel,43    LangSmithParams,44    LanguageModelInput,45)46from langchain_core.language_models.chat_model_stream import (47    AsyncChatModelStream,48    ChatModelStream,49)50from langchain_core.language_models.model_profile import (51    ModelProfile,52    _warn_unknown_profile_keys,53)54from langchain_core.load import dumpd, dumps55from langchain_core.messages import (56    AIMessage,57    AIMessageChunk,58    AnyMessage,59    BaseMessage,60    convert_to_messages,61    is_data_content_block,62    message_chunk_to_message,63)64from langchain_core.messages import content as types65from langchain_core.messages.block_translators.openai import (66    convert_to_openai_image_block,67)68from langchain_core.output_parsers.openai_tools import (69    JsonOutputKeyToolsParser,70    PydanticToolsParser,71)72from langchain_core.outputs import (73    ChatGeneration,74    ChatGenerationChunk,75    ChatResult,76    Generation,77    LLMResult,78    RunInfo,79)80from langchain_core.outputs.chat_generation import merge_chat_generation_chunks81from langchain_core.prompt_values import ChatPromptValue, PromptValue, StringPromptValue82from langchain_core.rate_limiters import BaseRateLimiter83from langchain_core.runnables import RunnableBinding, RunnableMap, RunnablePassthrough84from langchain_core.runnables.config import ensure_config, run_in_executor85from langchain_core.tracers._streaming import (86    _StreamingCallbackHandler,87    _V2StreamingCallbackHandler,88)89from langchain_core.utils.function_calling import (90    convert_to_json_schema,91    convert_to_openai_tool,92)93from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass94from langchain_core.utils.utils import LC_ID_PREFIX, from_env9596if TYPE_CHECKING:97    import uuid98    from collections.abc import Awaitable99100    from langchain_protocol.protocol import MessagesData101102    from langchain_core.output_parsers.base import OutputParserLike103    from langchain_core.runnables import Runnable, RunnableConfig104    from langchain_core.runnables.schema import StreamEvent105    from langchain_core.tools import BaseTool106107108def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:109    if hasattr(error, "response"):110        response = error.response111        metadata: dict = {}112        if hasattr(response, "json"):113            try:114                metadata["body"] = response.json()115            except Exception:116                try:117                    metadata["body"] = getattr(response, "text", None)118                except Exception:119                    metadata["body"] = None120        if hasattr(response, "headers"):121            try:122                metadata["headers"] = dict(response.headers)123            except Exception:124                metadata["headers"] = None125        if hasattr(response, "status_code"):126            metadata["status_code"] = response.status_code127        if hasattr(error, "request_id"):128            metadata["request_id"] = error.request_id129        generations = [130            ChatGeneration(message=AIMessage(content="", response_metadata=metadata))131        ]132    else:133        generations = []134135    return generations136137138def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:139    """Format messages for tracing in `on_chat_model_start`.140141    - Update image content blocks to OpenAI Chat Completions format (backward142    compatibility).143    - Add `type` key to content blocks that have a single key.144145    Args:146        messages: List of messages to format.147148    Returns:149        List of messages formatted for tracing.150151    """152    messages_to_trace = []153    for message in messages:154        message_to_trace = message155        if isinstance(message.content, list):156            for idx, block in enumerate(message.content):157                if isinstance(block, dict):158                    # Update image content blocks to OpenAI # Chat Completions format.159                    if (160                        block.get("type") == "image"161                        and is_data_content_block(block)162                        and not ("file_id" in block or block.get("source_type") == "id")163                    ):164                        if message_to_trace is message:165                            # Shallow copy166                            message_to_trace = message.model_copy()167                            message_to_trace.content = list(message_to_trace.content)168169                        message_to_trace.content[idx] = (  # type: ignore[index]  # mypy confused by .model_copy170                            convert_to_openai_image_block(block)171                        )172                    elif (173                        block.get("type") == "file"174                        and is_data_content_block(block)  # v0 (image/audio/file) or v1175                        and "base64" in block176                        # Backward compat: convert v1 base64 blocks to v0177                    ):178                        if message_to_trace is message:179                            # Shallow copy180                            message_to_trace = message.model_copy()181                            message_to_trace.content = list(message_to_trace.content)182183                        message_to_trace.content[idx] = {  # type: ignore[index]184                            **{k: v for k, v in block.items() if k != "base64"},185                            "data": block["base64"],186                            "source_type": "base64",187                        }188                    elif len(block) == 1 and "type" not in block:189                        # Tracing assumes all content blocks have a "type" key. Here190                        # we add this key if it is missing, and there's an obvious191                        # choice for the type (e.g., a single key in the block).192                        if message_to_trace is message:193                            # Shallow copy194                            message_to_trace = message.model_copy()195                            message_to_trace.content = list(message_to_trace.content)196                        key = next(iter(block))197                        message_to_trace.content[idx] = {  # type: ignore[index]198                            "type": key,199                            key: block[key],200                        }201        messages_to_trace.append(message_to_trace)202203    return messages_to_trace204205206def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult:207    """Generate from a stream.208209    Args:210        stream: Iterator of `ChatGenerationChunk`.211212    Raises:213        ValueError: If no generations are found in the stream.214215    Returns:216        Chat result.217218    """219    generation = next(stream, None)220    if generation:221        generation += list(stream)222    if generation is None:223        msg = "No generations found in stream."224        raise ValueError(msg)225    return ChatResult(226        generations=[227            ChatGeneration(228                message=message_chunk_to_message(generation.message),229                generation_info=generation.generation_info,230            )231        ]232    )233234235async def agenerate_from_stream(236    stream: AsyncIterator[ChatGenerationChunk],237) -> ChatResult:238    """Async generate from a stream.239240    Args:241        stream: AsyncIterator of `ChatGenerationChunk`.242243    Returns:244        Chat result.245246    """247    chunks = [chunk async for chunk in stream]248    return await run_in_executor(None, generate_from_stream, iter(chunks))249250251def _format_ls_structured_output(ls_structured_output_format: dict | None) -> dict:252    if ls_structured_output_format:253        try:254            ls_structured_output_format_dict = {255                "ls_structured_output_format": {256                    "kwargs": ls_structured_output_format.get("kwargs", {}),257                    "schema": convert_to_json_schema(258                        ls_structured_output_format["schema"]259                    ),260                }261            }262        except ValueError:263            ls_structured_output_format_dict = {}264    else:265        ls_structured_output_format_dict = {}266267    return ls_structured_output_format_dict268269270class BaseChatModel(BaseLanguageModel[AIMessage], ABC):271    r"""Base class for chat models.272273    Key imperative methods:274        Methods that actually call the underlying model.275276        This table provides a brief overview of the main imperative methods. Please see the base `Runnable` reference for full documentation.277278        | Method                 | Input                                                        | Output                                                     | Description                                                                      |279        | ---------------------- | ------------------------------------------------------------ | ---------------------------------------------------------- | -------------------------------------------------------------------------------- |280        | `invoke`               | `str` \| `list[dict | tuple | BaseMessage]` \| `PromptValue` | `BaseMessage`                                              | A single chat model call.                                                        |281        | `ainvoke`              | `'''`                                                        | `BaseMessage`                                              | Defaults to running `invoke` in an async executor.                               |282        | `stream`               | `'''`                                                        | `Iterator[BaseMessageChunk]`                               | Defaults to yielding output of `invoke`.                                         |283        | `astream`              | `'''`                                                        | `AsyncIterator[BaseMessageChunk]`                          | Defaults to yielding output of `ainvoke`.                                        |284        | `astream_events`       | `'''`                                                        | `AsyncIterator[StreamEvent]`                               | Event types: `on_chat_model_start`, `on_chat_model_stream`, `on_chat_model_end`. |285        | `batch`                | `list[''']`                                                  | `list[BaseMessage]`                                        | Defaults to running `invoke` in concurrent threads.                              |286        | `abatch`               | `list[''']`                                                  | `list[BaseMessage]`                                        | Defaults to running `ainvoke` in concurrent threads.                             |287        | `batch_as_completed`   | `list[''']`                                                  | `Iterator[tuple[int, Union[BaseMessage, Exception]]]`      | Defaults to running `invoke` in concurrent threads.                              |288        | `abatch_as_completed`  | `list[''']`                                                  | `AsyncIterator[tuple[int, Union[BaseMessage, Exception]]]` | Defaults to running `ainvoke` in concurrent threads.                             |289290    Key declarative methods:291        Methods for creating another `Runnable` using the chat model.292293        This table provides a brief overview of the main declarative methods. Please see the reference for each method for full documentation.294295        | Method                       | Description                                                                                |296        | ---------------------------- | ------------------------------------------------------------------------------------------ |297        | `bind_tools`                 | Create chat model that can call tools.                                                     |298        | `with_structured_output`     | Create wrapper that structures model output using schema.                                  |299        | `with_retry`                 | Create wrapper that retries model calls on failure.                                        |300        | `with_fallbacks`             | Create wrapper that falls back to other models on failure.                                 |301        | `configurable_fields`        | Specify init args of the model that can be configured at runtime via the `RunnableConfig`. |302        | `configurable_alternatives`  | Specify alternative models which can be swapped in at runtime via the `RunnableConfig`.    |303304    Creating custom chat model:305        Custom chat model implementations should inherit from this class.306        Please reference the table below for information about which307        methods and properties are required or optional for implementations.308309        | Method/Property                  | Description                                                        | Required          |310        | -------------------------------- | ------------------------------------------------------------------ | ----------------- |311        | `_generate`                      | Use to generate a chat result from a prompt                        | Required          |312        | `_llm_type` (property)           | Used to uniquely identify the type of the model. Used for logging. | Required          |313        | `_identifying_params` (property) | Represent model parameterization for tracing purposes.             | Optional          |314        | `_stream`                        | Use to implement streaming                                         | Optional          |315        | `_agenerate`                     | Use to implement a native async method                             | Optional          |316        | `_astream`                       | Use to implement async version of `_stream`                        | Optional          |317318    """  # noqa: E501319320    rate_limiter: BaseRateLimiter | None = Field(default=None, exclude=True)321    "An optional rate limiter to use for limiting the number of requests."322323    disable_streaming: bool | Literal["tool_calling"] = False324    """Whether to disable streaming for this model.325326    If streaming is bypassed, then `stream`/`astream`/`astream_events` will327    defer to `invoke`/`ainvoke`.328329    - If `True`, will always bypass streaming case.330    - If `'tool_calling'`, will bypass streaming case only when the model is called331        with a `tools` keyword argument. In other words, LangChain will automatically332        switch to non-streaming behavior (`invoke`) only when the tools argument is333        provided. This offers the best of both worlds.334    - If `False` (Default), will always use streaming case if available.335336    The main reason for this flag is that code might be written using `stream` and337    a user may want to swap out a given model for another model whose implementation338    does not properly support streaming.339    """340341    output_version: str | None = Field(342        default_factory=from_env("LC_OUTPUT_VERSION", default=None)343    )344    """Version of `AIMessage` output format to store in message content.345346    `AIMessage.content_blocks` will lazily parse the contents of `content` into a347    standard format. This flag can be used to additionally store the standard format348    in message content, e.g., for serialization purposes.349350    Supported values:351352    - `'v0'`: provider-specific format in content (can lazily-parse with353        `content_blocks`)354    - `'v1'`: standardized format in content (consistent with `content_blocks`)355356    Partner packages (e.g.,357    [`langchain-openai`](https://pypi.org/project/langchain-openai)) can also use this358    field to roll out new content formats in a backward-compatible way.359360    !!! version-added "Added in `langchain-core` 1.0.0"361362    """363364    profile: ModelProfile | None = Field(default=None, exclude=True)365    """Profile detailing model capabilities.366367    !!! warning "Beta feature"368369        This is a beta feature. The format of model profiles is subject to change.370371    If not specified, automatically loaded from the provider package on initialization372    if data is available.373374    Example profile data includes context window sizes, supported modalities, or support375    for tool calling, structured output, and other features.376377    !!! version-added "Added in `langchain-core` 1.1.0"378    """379380    model_config = ConfigDict(381        arbitrary_types_allowed=True,382    )383384    def _resolve_model_profile(self) -> ModelProfile | None:385        """Return the default model profile, or `None` if unavailable.386387        Override this in subclasses instead of `_set_model_profile`. The base388        validator calls it automatically and handles assignment. This avoids389        coupling partner code to Pydantic validator mechanics.390391        Each partner needs its own override because things can vary per-partner,392        such as the attribute that identifies the model (e.g., `model`,393        `model_name`, `model_id`, `deployment_name`) and the partner-local394        `_get_default_model_profile` function that reads from each partner's own395        profile data.396        """397        # TODO: consider adding a `_model_identifier` property on BaseChatModel398        # to standardize how partners identify their model, which could allow a399        # default implementation here that calls a shared400        # profile-loading mechanism.401        return None402403    @model_validator(mode="after")404    def _set_model_profile(self) -> Self:405        """Populate `profile` from `_resolve_model_profile` if not provided.406407        Partners should override `_resolve_model_profile` rather than this408        validator. Overriding this with a new `@model_validator` replaces the409        base validator (Pydantic v2 behavior), bypassing the standard resolution410        path. A plain method override does not prevent the base validator from411        running.412        """413        if self.profile is None:414            # Suppress errors from partner overrides (e.g., missing profile415            # files, broken imports) so model construction never fails over an416            # optional field.417            with contextlib.suppress(Exception):418                self.profile = self._resolve_model_profile()419        return self420421    # NOTE: _check_profile_keys must be defined AFTER _set_model_profile.422    # Pydantic v2 runs mode="after" validators in definition order.423    @model_validator(mode="after")424    def _check_profile_keys(self) -> Self:425        """Warn on unrecognized profile keys."""426        # isinstance guard: ModelProfile is a TypedDict (always a dict), but427        # protects against unexpected types from partner overrides.428        if self.profile and isinstance(self.profile, dict):429            _warn_unknown_profile_keys(self.profile)430        return self431432    @cached_property433    def _serialized(self) -> builtins.dict[str, Any]:434        # self is always a Serializable object in this case, thus the result is435        # guaranteed to be a dict since dumpd uses the default callback, which uses436        # obj.to_json which always returns TypedDict subclasses437        return cast("builtins.dict[str, Any]", dumpd(self))438439    # --- Runnable methods ---440441    @property442    @override443    def OutputType(self) -> Any:444        """Get the output type for this `Runnable`."""445        return AnyMessage446447    def _convert_input(self, model_input: LanguageModelInput) -> PromptValue:448        if isinstance(model_input, PromptValue):449            return model_input450        if isinstance(model_input, str):451            return StringPromptValue(text=model_input)452        if isinstance(model_input, Sequence):453            return ChatPromptValue(messages=convert_to_messages(model_input))454        msg = (455            f"Invalid input type {type(model_input)}. "456            "Must be a PromptValue, str, or list of BaseMessages."457        )458        raise ValueError(msg)459460    @override461    def invoke(462        self,463        input: LanguageModelInput,464        config: RunnableConfig | None = None,465        *,466        stop: list[str] | None = None,467        **kwargs: Any,468    ) -> AIMessage:469        config = ensure_config(config)470        return cast(471            "AIMessage",472            cast(473                "ChatGeneration",474                self.generate_prompt(475                    [self._convert_input(input)],476                    stop=stop,477                    callbacks=config.get("callbacks"),478                    tags=config.get("tags"),479                    metadata=config.get("metadata"),480                    run_name=config.get("run_name"),481                    run_id=config.pop("run_id", None),482                    **kwargs,483                ).generations[0][0],484            ).message,485        )486487    @override488    async def ainvoke(489        self,490        input: LanguageModelInput,491        config: RunnableConfig | None = None,492        *,493        stop: list[str] | None = None,494        **kwargs: Any,495    ) -> AIMessage:496        config = ensure_config(config)497        llm_result = await self.agenerate_prompt(498            [self._convert_input(input)],499            stop=stop,500            callbacks=config.get("callbacks"),501            tags=config.get("tags"),502            metadata=config.get("metadata"),503            run_name=config.get("run_name"),504            run_id=config.pop("run_id", None),505            **kwargs,506        )507        return cast(508            "AIMessage", cast("ChatGeneration", llm_result.generations[0][0]).message509        )510511    def _streaming_disabled(self, **kwargs: Any) -> bool:512        """Return whether streaming is hard-disabled for this call.513514        Shared opt-outs honored by both `_should_stream` and515        `_should_use_protocol_streaming`  these override any affirmative trigger516        (attached handler, `stream=True`, etc.):517518        - `self.disable_streaming is True`519        - `self.disable_streaming == "tool_calling"` with `tools` passed520        - `stream=<falsy>` in call kwargs521        - `self.streaming is False` on the instance522        """523        if self.disable_streaming is True:524            return True525        # We assume tools are passed in via "tools" kwarg in all models.526        if self.disable_streaming == "tool_calling" and kwargs.get("tools"):527            return True528        if "stream" in kwargs and not kwargs["stream"]:529            return True530        return (531            "streaming" in self.model_fields_set532            and getattr(self, "streaming", None) is False533        )534535    def _should_stream(536        self,537        *,538        async_api: bool,539        run_manager: CallbackManagerForLLMRun540        | AsyncCallbackManagerForLLMRun541        | None = None,542        **kwargs: Any,543    ) -> bool:544        """Determine if a given model call should hit the streaming API."""545        sync_not_implemented = type(self)._stream == BaseChatModel._stream  # noqa: SLF001546        async_not_implemented = type(self)._astream == BaseChatModel._astream  # noqa: SLF001547548        # Check if streaming is implemented.549        if (not async_api) and sync_not_implemented:550            return False551        # Note, since async falls back to sync we check both here.552        if async_api and async_not_implemented and sync_not_implemented:553            return False554555        if self._streaming_disabled(**kwargs):556            return False557558        # Affirmative: explicit `stream=<truthy>` kwarg.559        if kwargs.get("stream"):560            return True561562        # Affirmative: instance-level `streaming=True` attribute.563        if (564            "streaming" in self.model_fields_set565            and getattr(self, "streaming", None) is True566        ):567            return True568569        # Affirmative: a v1 streaming callback handler is attached.570        handlers = run_manager.handlers if run_manager else []571        return any(isinstance(h, _StreamingCallbackHandler) for h in handlers)572573    def _should_use_protocol_streaming(574        self,575        *,576        async_api: bool,577        run_manager: CallbackManagerForLLMRun578        | AsyncCallbackManagerForLLMRun579        | None = None,580        **kwargs: Any,581    ) -> bool:582        """Determine whether an invoke should route through the v2 event path.583584        Runs alongside `_should_stream` inside `_generate_with_cache` /585        `_agenerate_with_cache`  after the run manager is open  and586        wins over the v1 streaming branch when a handler has declared587        itself a `_V2StreamingCallbackHandler`. Parallel to588        `_should_stream` rather than a delegation  v1 and v2 have589        disjoint affirmative triggers.590591        Args:592            async_api: Whether the caller is on the async path.593            run_manager: The active LLM run manager.594            **kwargs: Call kwargs; inspected for `disable_streaming`595                semantics and an explicit `stream=False` override.596597        Returns:598            `True` if any attached handler inherits599            `_V2StreamingCallbackHandler` and the model can drive the v2600            event generator (natively or via the `_stream` compat601            bridge).602        """603        # Opt-in: only route through v2 when a v2 handler is attached.604        handlers = run_manager.handlers if run_manager else []605        if not any(isinstance(h, _V2StreamingCallbackHandler) for h in handlers):606            return False607608        # Need a source of v2 events on the requested flavor. A native609        # `_(a)stream_chat_model_events` hook bypasses the bridge;610        # otherwise the bridge wraps `_stream` / `_astream`. Async can611        # fall back to sync.612        #613        # `cls._stream is not BaseChatModel._stream` is an identity614        # check for "subclass overrode `_stream`"  same pattern as615        # `_should_stream`.616        cls = type(self)617        has_native_sync = getattr(cls, "_stream_chat_model_events", None) is not None618        has_native_async = getattr(cls, "_astream_chat_model_events", None) is not None619        overrides_sync = cls._stream is not BaseChatModel._stream620        overrides_async = cls._astream is not BaseChatModel._astream621        has_sync_source = has_native_sync or overrides_sync622        has_async_source = has_native_async or overrides_async623        has_source = (624            (has_sync_source or has_async_source) if async_api else has_sync_source625        )626        if not has_source:627            return False628629        return not self._streaming_disabled(**kwargs)630631    def _iter_v2_events(632        self,633        messages: list[BaseMessage],634        *,635        run_manager: CallbackManagerForLLMRun,636        stream: ChatModelStream,637        stop: list[str] | None = None,638        **kwargs: Any,639    ) -> Iterator[MessagesData]:640        """Drive the v2 event generator with per-event dispatch.641642        Shared between the `stream_events(version="v3")` pump and the643        invoke-time v2 branch in `_generate_with_cache`. Picks the native644        `_stream_chat_model_events` hook when the subclass provides one,645        else bridges `_stream` chunks via `chunks_to_events`. Each event646        is dispatched into `stream` and fired as `on_stream_event` on647        the run manager. Run-lifecycle callbacks648        (`on_chat_model_start` / `on_llm_end` / `on_llm_error`) and649        rate-limiter acquisition are the caller's responsibility.650651        Args:652            messages: Normalized input messages.653            run_manager: Active LLM run manager; receives654                `on_stream_event` per event.655            stream: Accumulator owned by the caller; receives each656                event via `stream.dispatch`.657            stop: Optional stop sequences.658            **kwargs: Forwarded to the event producer.659660        Yields:661            Each protocol event produced by the model.662        """663        native = cast(664            "Callable[..., Iterator[MessagesData]] | None",665            getattr(self, "_stream_chat_model_events", None),666        )667        if native is not None:668            event_iter: Iterator[MessagesData] = native(669                messages, stop=stop, run_manager=run_manager, **kwargs670            )671        else:672            event_iter = chunks_to_events(673                self._stream(messages, stop=stop, run_manager=run_manager, **kwargs),674                message_id=stream.message_id,675            )676        for event in event_iter:677            stream.dispatch(event)678            run_manager.on_stream_event(event)679            yield event680681    async def _aiter_v2_events(682        self,683        messages: list[BaseMessage],684        *,685        run_manager: AsyncCallbackManagerForLLMRun,686        stream: AsyncChatModelStream,687        stop: list[str] | None = None,688        **kwargs: Any,689    ) -> AsyncIterator[MessagesData]:690        """Async counterpart to `_iter_v2_events`.691692        See `_iter_v2_events` for the shared contract.693        """694        native = cast(695            "Callable[..., AsyncIterator[MessagesData]] | None",696            getattr(self, "_astream_chat_model_events", None),697        )698        if native is not None:699            event_iter: AsyncIterator[MessagesData] = native(700                messages, stop=stop, run_manager=run_manager, **kwargs701            )702        else:703            event_iter = achunks_to_events(704                self._astream(messages, stop=stop, run_manager=run_manager, **kwargs),705                message_id=stream.message_id,706            )707        async for event in event_iter:708            stream.dispatch(event)709            await run_manager.on_stream_event(event)710            yield event711712    @override713    def stream(714        self,715        input: LanguageModelInput,716        config: RunnableConfig | None = None,717        *,718        stop: list[str] | None = None,719        **kwargs: Any,720    ) -> Iterator[AIMessageChunk]:721        if not self._should_stream(async_api=False, **{**kwargs, "stream": True}):722            # Model doesn't implement streaming, so use default implementation723            yield cast(724                "AIMessageChunk",725                self.invoke(input, config=config, stop=stop, **kwargs),726            )727        else:728            config = ensure_config(config)729            messages = self._convert_input(input).to_messages()730            ls_structured_output_format = kwargs.pop(731                "ls_structured_output_format", None732            ) or kwargs.pop("structured_output_format", None)733            ls_structured_output_format_dict = _format_ls_structured_output(734                ls_structured_output_format735            )736737            params = self._get_invocation_params(stop=stop, **kwargs)738            options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}739            inheritable_metadata = {740                **(config.get("metadata") or {}),741                **self._get_ls_params_with_defaults(stop=stop, **kwargs),742            }743            callback_manager = CallbackManager.configure(744                config.get("callbacks"),745                self.callbacks,746                self.verbose,747                config.get("tags"),748                self.tags,749                inheritable_metadata,750                self.metadata,751                langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(752                    params753                ),754            )755            (run_manager,) = callback_manager.on_chat_model_start(756                self._serialized,757                [_format_for_tracing(messages)],758                invocation_params=params,759                options=options,760                name=config.get("run_name"),761                run_id=config.pop("run_id", None),762                batch_size=1,763            )764765            chunks: list[ChatGenerationChunk] = []766767            if self.rate_limiter:768                self.rate_limiter.acquire(blocking=True)769770            try:771                input_messages = _normalize_messages(messages)772                run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))773                yielded = False774                index = -1775                index_type = ""776                for chunk in self._stream(input_messages, stop=stop, **kwargs):777                    if chunk.message.id is None:778                        chunk.message.id = run_id779                    chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)780                    if self.output_version == "v1":781                        # Overwrite .content with .content_blocks782                        chunk.message = _update_message_content_to_blocks(783                            chunk.message, "v1"784                        )785                        for block in cast(786                            "list[types.ContentBlock]", chunk.message.content787                        ):788                            if block["type"] != index_type:789                                index_type = block["type"]790                                index += 1791                            if "index" not in block:792                                block["index"] = index793                    run_manager.on_llm_new_token(794                        cast("str", chunk.message.content), chunk=chunk795                    )796                    chunks.append(chunk)797                    yield cast("AIMessageChunk", chunk.message)798                    yielded = True799800                # Yield a final empty chunk with chunk_position="last" if not yet801                # yielded802                if (803                    yielded804                    and isinstance(chunk.message, AIMessageChunk)805                    and not chunk.message.chunk_position806                ):807                    empty_content: str | list = (808                        "" if isinstance(chunk.message.content, str) else []809                    )810                    msg_chunk = AIMessageChunk(811                        content=empty_content, chunk_position="last", id=run_id812                    )813                    run_manager.on_llm_new_token(814                        "", chunk=ChatGenerationChunk(message=msg_chunk)815                    )816                    yield msg_chunk817            except BaseException as e:818                generations_with_error_metadata = _generate_response_from_error(e)819                chat_generation_chunk = merge_chat_generation_chunks(chunks)820                if chat_generation_chunk:821                    generations = [822                        [chat_generation_chunk],823                        generations_with_error_metadata,824                    ]825                else:826                    generations = [generations_with_error_metadata]827                run_manager.on_llm_error(828                    e,829                    response=LLMResult(generations=generations),830                )831                raise832833            generation = merge_chat_generation_chunks(chunks)834            if generation is None:835                err = ValueError("No generation chunks were returned")836                run_manager.on_llm_error(err, response=LLMResult(generations=[]))837                raise err838839            run_manager.on_llm_end(LLMResult(generations=[[generation]]))840841    @override842    async def astream(843        self,844        input: LanguageModelInput,845        config: RunnableConfig | None = None,846        *,847        stop: list[str] | None = None,848        **kwargs: Any,849    ) -> AsyncIterator[AIMessageChunk]:850        if not self._should_stream(async_api=True, **{**kwargs, "stream": True}):851            # No async or sync stream is implemented, so fall back to ainvoke852            yield cast(853                "AIMessageChunk",854                await self.ainvoke(input, config=config, stop=stop, **kwargs),855            )856            return857858        config = ensure_config(config)859        messages = self._convert_input(input).to_messages()860861        ls_structured_output_format = kwargs.pop(862            "ls_structured_output_format", None863        ) or kwargs.pop("structured_output_format", None)864        ls_structured_output_format_dict = _format_ls_structured_output(865            ls_structured_output_format866        )867868        params = self._get_invocation_params(stop=stop, **kwargs)869        options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}870        inheritable_metadata = {871            **(config.get("metadata") or {}),872            **self._get_ls_params_with_defaults(stop=stop, **kwargs),873        }874        callback_manager = AsyncCallbackManager.configure(875            config.get("callbacks"),876            self.callbacks,877            self.verbose,878            config.get("tags"),879            self.tags,880            inheritable_metadata,881            self.metadata,882            langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(883                params884            ),885        )886        (run_manager,) = await callback_manager.on_chat_model_start(887            self._serialized,888            [_format_for_tracing(messages)],889            invocation_params=params,890            options=options,891            name=config.get("run_name"),892            run_id=config.pop("run_id", None),893            batch_size=1,894        )895896        if self.rate_limiter:897            await self.rate_limiter.aacquire(blocking=True)898899        chunks: list[ChatGenerationChunk] = []900901        try:902            input_messages = _normalize_messages(messages)903            run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))904            yielded = False905            index = -1906            index_type = ""907            async for chunk in self._astream(908                input_messages,909                stop=stop,910                **kwargs,911            ):912                if chunk.message.id is None:913                    chunk.message.id = run_id914                chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)915                if self.output_version == "v1":916                    # Overwrite .content with .content_blocks917                    chunk.message = _update_message_content_to_blocks(918                        chunk.message, "v1"919                    )920                    for block in cast(921                        "list[types.ContentBlock]", chunk.message.content922                    ):923                        if block["type"] != index_type:924                            index_type = block["type"]925                            index += 1926                        if "index" not in block:927                            block["index"] = index928                await run_manager.on_llm_new_token(929                    cast("str", chunk.message.content), chunk=chunk930                )931                chunks.append(chunk)932                yield cast("AIMessageChunk", chunk.message)933                yielded = True934935            # Yield a final empty chunk with chunk_position="last" if not yet yielded936            if (937                yielded938                and isinstance(chunk.message, AIMessageChunk)939                and not chunk.message.chunk_position940            ):941                empty_content: str | list = (942                    "" if isinstance(chunk.message.content, str) else []943                )944                msg_chunk = AIMessageChunk(945                    content=empty_content, chunk_position="last", id=run_id946                )947                await run_manager.on_llm_new_token(948                    "", chunk=ChatGenerationChunk(message=msg_chunk)949                )950                yield msg_chunk951        except BaseException as e:952            generations_with_error_metadata = _generate_response_from_error(e)953            chat_generation_chunk = merge_chat_generation_chunks(chunks)954            if chat_generation_chunk:955                generations = [[chat_generation_chunk], generations_with_error_metadata]956            else:957                generations = [generations_with_error_metadata]958            await run_manager.on_llm_error(959                e,960                response=LLMResult(generations=generations),961            )962            raise963964        generation = merge_chat_generation_chunks(chunks)965        if not generation:966            err = ValueError("No generation chunks were returned")967            await run_manager.on_llm_error(err, response=LLMResult(generations=[]))968            raise err969970        await run_manager.on_llm_end(971            LLMResult(generations=[[generation]]),972        )973974    # --- stream_events v3 ---975976    @beta()977    def _chat_model_stream_v3(978        self,979        input: LanguageModelInput,980        config: RunnableConfig | None = None,981        *,982        stop: list[str] | None = None,983        **kwargs: Any,984    ) -> ChatModelStream:985        """Internal v3 sync streaming implementation.986987        Public entry point: `stream_events(version='v3')`.988        """989        config = ensure_config(config)990        messages = self._convert_input(input).to_messages()991        input_messages = _normalize_messages(messages)992993        # Strip tracing-only kwargs before forwarding to `_stream`  matches994        # `stream()` / `astream()`. Provider clients reject unknown kwargs,995        # so `.with_structured_output().stream_events(version="v3", ...)`996        # and any other binding that carries `ls_structured_output_format`997        # / `structured_output_format` would raise without this pop.998        ls_structured_output_format = kwargs.pop(999            "ls_structured_output_format", None1000        ) or kwargs.pop("structured_output_format", None)1001        ls_structured_output_format_dict = _format_ls_structured_output(1002            ls_structured_output_format1003        )10041005        params = self._get_invocation_params(stop=stop, **kwargs)1006        options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}1007        inheritable_metadata = {1008            **(config.get("metadata") or {}),1009            **self._get_ls_params_with_defaults(stop=stop, **kwargs),1010        }1011        callback_manager = CallbackManager.configure(1012            config.get("callbacks"),1013            self.callbacks,1014            self.verbose,1015            config.get("tags"),1016            self.tags,1017            inheritable_metadata,1018            self.metadata,1019            langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(1020                params1021            ),1022        )1023        stream = ChatModelStream()1024        run_manager: CallbackManagerForLLMRun | None = None1025        event_iter_ref: Iterator[MessagesData] | None = None1026        rate_limiter_acquired = self.rate_limiter is None1027        run_name = config.get("run_name")1028        run_id = config.pop("run_id", None)10291030        def ensure_started() -> None:1031            nonlocal event_iter_ref, run_manager1032            if event_iter_ref is not None:1033                return10341035            (run_manager,) = callback_manager.on_chat_model_start(1036                self._serialized,1037                [_format_for_tracing(messages)],1038                invocation_params=params,1039                options=options,1040                name=run_name,1041                run_id=run_id,1042                batch_size=1,1043            )1044            stream.set_message_id("-".join((LC_ID_PREFIX, str(run_manager.run_id))))1045            event_iter_ref = iter(1046                self._iter_v2_events(1047                    input_messages,1048                    run_manager=run_manager,1049                    stream=stream,1050                    stop=stop,1051                    **kwargs,1052                )1053            )10541055        def pump_one() -> bool:1056            nonlocal rate_limiter_acquired1057            ensure_started()1058            if not rate_limiter_acquired:1059                assert self.rate_limiter is not None  # noqa: S1011060                self.rate_limiter.acquire(blocking=True)1061                rate_limiter_acquired = True1062            assert event_iter_ref is not None  # noqa: S1011063            assert run_manager is not None  # noqa: S1011064            try:1065                next(event_iter_ref)1066            except StopIteration:1067                if not stream.done:1068                    if stream.has_events:1069                        # Native event producers may omit the terminal1070                        # `message-finish`. Close the lifecycle here so1071                        # `on_llm_end` still observes the assembled1072                        # message. A truly empty stream remains an error1073                        # for parity with `stream()`.1074                        stream.dispatch(MessageFinishData(event="message-finish"))1075                    else:1076                        err = ValueError("No generation chunks were returned")1077                        stream.fail(err)1078                        run_manager.on_llm_error(1079                            err,1080                            response=LLMResult(generations=[]),1081                        )1082                        return False1083                if stream.done and stream.output_message is not None:1084                    run_manager.on_llm_end(1085                        LLMResult(1086                            generations=[1087                                [ChatGeneration(message=stream.output_message)],1088                            ],1089                        ),1090                    )1091                return False1092            except BaseException as exc:1093                stream.fail(exc)1094                run_manager.on_llm_error(1095                    exc,1096                    response=LLMResult(generations=[]),1097                )1098                return False1099            if stream.done and stream.output_message is not None:1100                run_manager.on_llm_end(1101                    LLMResult(1102                        generations=[1103                            [ChatGeneration(message=stream.output_message)],1104                        ],1105                    ),1106                )1107            return True11081109        stream.set_start(ensure_started)1110        stream.bind_pump(pump_one)1111        return stream11121113    @beta()1114    async def _achat_model_stream_v3(1115        self,1116        input: LanguageModelInput,1117        config: RunnableConfig | None = None,1118        *,1119        stop: list[str] | None = None,1120        **kwargs: Any,1121    ) -> AsyncChatModelStream:1122        """Internal v3 async streaming implementation.11231124        Public entry point: `astream_events(version='v3')`.1125        """1126        config = ensure_config(config)1127        messages = self._convert_input(input).to_messages()1128        input_messages = _normalize_messages(messages)11291130        # Strip tracing-only kwargs before forwarding  see the sync v31131        # implementation for the full rationale.1132        ls_structured_output_format = kwargs.pop(1133            "ls_structured_output_format", None1134        ) or kwargs.pop("structured_output_format", None)1135        ls_structured_output_format_dict = _format_ls_structured_output(1136            ls_structured_output_format1137        )11381139        params = self._get_invocation_params(stop=stop, **kwargs)1140        options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}1141        inheritable_metadata = {1142            **(config.get("metadata") or {}),1143            **self._get_ls_params_with_defaults(stop=stop, **kwargs),1144        }1145        callback_manager = AsyncCallbackManager.configure(1146            config.get("callbacks"),1147            self.callbacks,1148            self.verbose,1149            config.get("tags"),1150            self.tags,1151            inheritable_metadata,1152            self.metadata,1153            langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(1154                params1155            ),1156        )1157        stream = AsyncChatModelStream()1158        run_manager: AsyncCallbackManagerForLLMRun | None = None1159        run_name = config.get("run_name")1160        run_id = config.pop("run_id", None)1161        start_lock = asyncio.Lock()11621163        async def _produce() -> None:1164            assert run_manager is not None  # noqa: S1011165            try:1166                if self.rate_limiter:1167                    await self.rate_limiter.aacquire(blocking=True)11681169                async for _event in self._aiter_v2_events(1170                    input_messages,1171                    run_manager=run_manager,1172                    stream=stream,1173                    stop=stop,1174                    **kwargs,1175                ):1176                    pass1177                if not stream.done:1178                    if stream.has_events:1179                        # Native event producers may omit the terminal1180                        # `message-finish`. Close the lifecycle here so1181                        # `on_llm_end` sees the finalized message. A1182                        # truly empty stream remains an error for parity1183                        # with `astream()`.1184                        stream.dispatch(MessageFinishData(event="message-finish"))1185                    else:1186                        err = ValueError("No generation chunks were returned")1187                        stream.fail(err)1188                        await run_manager.on_llm_error(1189                            err,1190                            response=LLMResult(generations=[]),1191                        )1192                        return1193                if stream.done and stream.output_message is not None:1194                    await run_manager.on_llm_end(1195                        LLMResult(1196                            generations=[1197                                [ChatGeneration(message=stream.output_message)],1198                            ],1199                        ),1200                    )1201            except asyncio.CancelledError as exc:1202                stream.fail(exc)1203                # Close the callback lifecycle so tracing observes a1204                # matching end event for the earlier `on_chat_model_start`.1205                # `on_llm_error` is `@shielded`, so the callback runs to1206                # completion in the background even though the `await`1207                # here re-raises our cancellation.1208                with contextlib.suppress(Exception):1209                    await run_manager.on_llm_error(1210                        exc,1211                        response=LLMResult(generations=[]),1212                    )1213                raise1214            except BaseException as exc:1215                stream.fail(exc)1216                await run_manager.on_llm_error(1217                    exc,1218                    response=LLMResult(generations=[]),1219                )12201221        async def ensure_started() -> None:1222            nonlocal run_manager1223            if stream._producer_task is not None:  # noqa: SLF0011224                return12251226            async with start_lock:1227                if stream._producer_task is not None:  # noqa: SLF0011228                    return12291230                (run_manager,) = await callback_manager.on_chat_model_start(1231                    self._serialized,1232                    [_format_for_tracing(messages)],1233                    invocation_params=params,1234                    options=options,1235                    name=run_name,1236                    run_id=run_id,1237                    batch_size=1,1238                )1239                stream.set_message_id("-".join((LC_ID_PREFIX, str(run_manager.run_id))))1240                stream._producer_task = asyncio.get_running_loop().create_task(  # noqa: SLF0011241                    _produce()1242                )12431244        async def _on_aclose_fail(exc: BaseException) -> None:1245            assert run_manager is not None  # noqa: S1011246            # Invoked by `stream.aclose()` only when the producer was1247            # cancelled before `_produce` ran  so `on_llm_error` from1248            # the CancelledError handler never fired. Shielded by the1249            # callback manager; runs to completion even if our caller1250            # is being cancelled.1251            await run_manager.on_llm_error(1252                exc,1253                response=LLMResult(generations=[]),1254            )12551256        stream.set_start(ensure_started)1257        stream._on_aclose_fail = _on_aclose_fail  # noqa: SLF0011258        return stream12591260    @overload  # type: ignore[override]1261    def stream_events(1262        self,1263        input: LanguageModelInput,1264        config: RunnableConfig | None = None,1265        *,1266        version: Literal["v1", "v2"] = "v2",1267        **kwargs: Any,1268    ) -> Iterator[StreamEvent]: ...12691270    @overload1271    def stream_events(1272        self,1273        input: LanguageModelInput,1274        config: RunnableConfig | None = None,1275        *,1276        version: Literal["v3"],1277        stop: list[str] | None = None,1278        **kwargs: Any,1279    ) -> ChatModelStream: ...12801281    def stream_events(1282        self,1283        input: LanguageModelInput,1284        config: RunnableConfig | None = None,1285        *,1286        version: Literal["v1", "v2", "v3"] = "v2",1287        stop: list[str] | None = None,1288        **kwargs: Any,1289    ) -> Iterator[StreamEvent] | ChatModelStream:1290        """Stream events from this chat model.12911292        For `version="v1"` / `"v2"`, yields `StreamEvent` dicts (see1293        `Runnable.stream_events`). For `version="v3"`, returns a1294        `ChatModelStream` exposing typed projections (`.text`,1295        `.reasoning`, `.tool_calls`, `.output`).12961297        !!! warning "Beta"12981299            `version="v3"` is in beta. The protocol shape, return type,1300            and surface area may change in future releases. Calling it1301            emits a `LangChainBetaWarning` at runtime.13021303        !!! note "v3 always produces v1-shaped content"13041305            `ChatModelStream.output.content` is always a list of v11306            content blocks (text / reasoning / tool_call / image / …),1307            regardless of the model's `output_version` attribute. The1308            setting only affects the legacy `stream()` / `astream()` /1309            `invoke()` paths. If you're mixing1310            `stream_events(version="v3")` with those paths in the same1311            pipeline and need a consistent output shape across them,1312            set `output_version="v1"` on the model.13131314        Args:1315            input: The model input.1316            config: Optional runnable config.1317            version: Streaming-event schema version. `"v3"` selects the1318                content-block-centric streaming protocol.1319            stop: Optional stop sequences. Only used for `version="v3"`;1320                ignored otherwise.1321            **kwargs: Additional keyword arguments. For `version="v3"`,1322                forwarded to the model.13231324        Returns:1325            For `version="v3"`, a `ChatModelStream` with typed1326            projections. Otherwise an `Iterator[StreamEvent]`.1327        """1328        if version == "v3":1329            return self._chat_model_stream_v3(input, config, stop=stop, **kwargs)1330        return super().stream_events(1331            input, config, version=version, stop=stop, **kwargs1332        )13331334    @overload1335    def astream_events(1336        self,1337        input: LanguageModelInput,1338        config: RunnableConfig | None = None,1339        *,1340        version: Literal["v1", "v2"] = "v2",1341        **kwargs: Any,1342    ) -> AsyncIterator[StreamEvent]: ...13431344    @overload1345    def astream_events(1346        self,1347        input: LanguageModelInput,1348        config: RunnableConfig | None = None,1349        *,1350        version: Literal["v3"],1351        stop: list[str] | None = None,1352        **kwargs: Any,1353    ) -> Awaitable[AsyncChatModelStream]: ...13541355    def astream_events(1356        self,1357        input: LanguageModelInput,1358        config: RunnableConfig | None = None,1359        *,1360        version: Literal["v1", "v2", "v3"] = "v2",1361        stop: list[str] | None = None,1362        **kwargs: Any,1363    ) -> AsyncIterator[StreamEvent] | Awaitable[AsyncChatModelStream]:1364        """Async variant of `stream_events`. See `stream_events` for full docs."""1365        if version == "v3":1366            return self._achat_model_stream_v3(input, config, stop=stop, **kwargs)1367        # v1/v2: forward to Runnable.astream_events (async generator).1368        return super().astream_events(1369            input, config, version=version, stop=stop, **kwargs1370        )13711372    # --- Custom methods ---13731374    def _combine_llm_outputs(1375        self, _llm_outputs: list[builtins.dict | None], /1376    ) -> builtins.dict:1377        return {}13781379    def _convert_cached_generations(self, cache_val: list) -> list[ChatGeneration]:1380        """Convert cached Generation objects to ChatGeneration objects.13811382        Handle case where cache contains Generation objects instead of1383        ChatGeneration objects. This can happen due to serialization/deserialization1384        issues or legacy cache data (see #22389).13851386        Args:1387            cache_val: List of cached generation objects.13881389        Returns:1390            List of ChatGeneration objects.13911392        """1393        converted_generations = []1394        for gen in cache_val:1395            if isinstance(gen, Generation) and not isinstance(gen, ChatGeneration):1396                # Convert Generation to ChatGeneration by creating AIMessage1397                # from the text content1398                chat_gen = ChatGeneration(1399                    message=AIMessage(content=gen.text),1400                    generation_info=gen.generation_info,1401                )1402                converted_generations.append(chat_gen)1403            else:1404                # Already a ChatGeneration or other expected type1405                if hasattr(gen, "message") and isinstance(gen.message, AIMessage):1406                    # We zero out cost on cache hits1407                    gen.message = gen.message.model_copy(1408                        update={1409                            "usage_metadata": {1410                                **(gen.message.usage_metadata or {}),1411                                "total_cost": 0,1412                            }1413                        }1414                    )1415                converted_generations.append(gen)1416        return converted_generations14171418    def _replay_v2_events_for_cache_hit(1419        self,1420        generations: list[ChatGeneration],1421        *,1422        run_manager: CallbackManagerForLLMRun | None,1423        **kwargs: Any,1424    ) -> None:1425        """Replay cached messages as v2 events when a v2 handler is attached.14261427        A warm cache must produce the same `on_stream_event` stream as a1428        cold call so LangGraph-style consumers do not observe behavior1429        that depends on cache state. Gated by1430        `_should_use_protocol_streaming` so a `disable_streaming` config1431        that suppresses v2 on cold calls also suppresses it here.1432        """1433        if run_manager is None or not self._should_use_protocol_streaming(1434            async_api=False, run_manager=run_manager, **kwargs1435        ):1436            return1437        message_id = f"{LC_ID_PREFIX}-{run_manager.run_id}"1438        for gen in generations:1439            msg = getattr(gen, "message", None)1440            if not isinstance(msg, AIMessage):1441                continue1442            for event in message_to_events(msg, message_id=message_id):1443                run_manager.on_stream_event(event)14441445    async def _areplay_v2_events_for_cache_hit(1446        self,1447        generations: list[ChatGeneration],1448        *,1449        run_manager: AsyncCallbackManagerForLLMRun | None,1450        **kwargs: Any,1451    ) -> None:1452        """Async counterpart to `_replay_v2_events_for_cache_hit`."""1453        if run_manager is None or not self._should_use_protocol_streaming(1454            async_api=True, run_manager=run_manager, **kwargs1455        ):1456            return1457        message_id = f"{LC_ID_PREFIX}-{run_manager.run_id}"1458        for gen in generations:1459            msg = getattr(gen, "message", None)1460            if not isinstance(msg, AIMessage):1461                continue1462            async for event in amessage_to_events(msg, message_id=message_id):1463                await run_manager.on_stream_event(event)14641465    def _get_invocation_params(1466        self,1467        stop: list[str] | None = None,1468        **kwargs: Any,1469    ) -> builtins.dict:1470        params = self._dict_for_compat()1471        params["stop"] = stop1472        return {**params, **kwargs}14731474    def _get_ls_params(1475        self,1476        stop: list[str] | None = None,1477        **kwargs: Any,1478    ) -> LangSmithParams:1479        """Get standard params for LangSmith tracing.14801481        Subclasses **should override** this method to populate `ls_provider`1482        and `ls_model_name` from provider-specific attributes (e.g. `self.model`,1483        `self.model_name`, `self.model_id`) and to honor per-call overrides1484        passed via `kwargs["model"]` so that runtime `bind`/`invoke` model1485        changes are reflected in traces.14861487        The implementation here is a best-effort fallback for subclasses that1488        do not override it. It is not part of a stable contract and the1489        derivation rules may change:14901491        - `ls_provider` is derived from the class name by stripping a leading1492            or trailing `"Chat"` and lowercasing the remainder. This produces1493            ugly values for multi-word providers (e.g. `ChatGoogleGenerativeAI`1494            would become `"googlegenerativeai"`).14951496            Override to set a stable, conventional value1497            such as `"google_genai"`.1498        - `ls_model_name` is resolved from `kwargs["model"]`, then1499            `self.model`, then `self.model_name`.15001501            Subclasses whose model attribute has a different name1502            (`model_id`, `deployment_name`, ...) must override.1503        """1504        # get default provider from class name1505        default_provider = self.__class__.__name__1506        if default_provider.startswith("Chat"):1507            default_provider = default_provider[4:].lower()1508        elif default_provider.endswith("Chat"):1509            default_provider = default_provider[:-4]1510        default_provider = default_provider.lower()15111512        ls_params = LangSmithParams(ls_provider=default_provider, ls_model_type="chat")1513        if stop:1514            ls_params["ls_stop"] = stop15151516        # model1517        if "model" in kwargs and isinstance(kwargs["model"], str):1518            ls_params["ls_model_name"] = kwargs["model"]1519        elif hasattr(self, "model") and isinstance(self.model, str):1520            ls_params["ls_model_name"] = self.model1521        elif hasattr(self, "model_name") and isinstance(self.model_name, str):1522            ls_params["ls_model_name"] = self.model_name15231524        # temperature1525        if "temperature" in kwargs and isinstance(kwargs["temperature"], (int, float)):1526            ls_params["ls_temperature"] = kwargs["temperature"]1527        elif hasattr(self, "temperature") and isinstance(1528            self.temperature, (int, float)1529        ):1530            ls_params["ls_temperature"] = self.temperature15311532        # max_tokens1533        if "max_tokens" in kwargs and isinstance(kwargs["max_tokens"], int):1534            ls_params["ls_max_tokens"] = kwargs["max_tokens"]1535        elif hasattr(self, "max_tokens") and isinstance(self.max_tokens, int):1536            ls_params["ls_max_tokens"] = self.max_tokens15371538        return ls_params15391540    def _get_ls_params_with_defaults(1541        self,1542        stop: list[str] | None = None,1543        **kwargs: Any,1544    ) -> LangSmithParams:1545        """Wrap _get_ls_params to always include ls_integration."""1546        ls_params = self._get_ls_params(stop=stop, **kwargs)1547        ls_params["ls_integration"] = "langchain_chat_model"1548        return ls_params15491550    def _get_llm_string(self, stop: list[str] | None = None, **kwargs: Any) -> str:1551        if self.is_lc_serializable():1552            params = {**kwargs, "stop": stop}1553            param_string = str(sorted(params.items()))1554            # This code is not super efficient as it goes back and forth between1555            # json and dict.1556            serialized_repr = self._serialized1557            _cleanup_llm_representation(serialized_repr, 1)1558            llm_string = json.dumps(serialized_repr, sort_keys=True)1559            return llm_string + "---" + param_string1560        params = self._get_invocation_params(stop=stop, **kwargs)1561        params = {**params, **kwargs}1562        return str(sorted(params.items()))15631564    def generate(1565        self,1566        messages: list[list[BaseMessage]],1567        stop: list[str] | None = None,1568        callbacks: Callbacks = None,1569        *,1570        tags: list[str] | None = None,1571        metadata: builtins.dict[str, Any] | None = None,1572        run_name: str | None = None,1573        run_id: uuid.UUID | None = None,1574        **kwargs: Any,1575    ) -> LLMResult:1576        """Pass a sequence of prompts to the model and return model generations.15771578        This method should make use of batched calls for models that expose a batched1579        API.15801581        Use this method when you want to:15821583        1. Take advantage of batched calls,1584        2. Need more output from the model than just the top generated value,1585        3. Are building chains that are agnostic to the underlying language model1586            type (e.g., pure text completion models vs chat models).15871588        Args:1589            messages: List of list of messages.1590            stop: Stop words to use when generating.15911592                Model output is cut off at the first occurrence of any of these1593                substrings.1594            callbacks: `Callbacks` to pass through.15951596                Used for executing additional functionality, such as logging or1597                streaming, throughout generation.1598            tags: The tags to apply.1599            metadata: The metadata to apply.1600            run_name: The name of the run.1601            run_id: The ID of the run.1602            **kwargs: Arbitrary additional keyword arguments.16031604                These are usually passed to the model provider API call.16051606        Returns:1607            An `LLMResult`, which contains a list of candidate `Generations` for each1608                input prompt and additional model provider-specific output.16091610        """1611        ls_structured_output_format = kwargs.pop(1612            "ls_structured_output_format", None1613        ) or kwargs.pop("structured_output_format", None)1614        ls_structured_output_format_dict = _format_ls_structured_output(1615            ls_structured_output_format1616        )16171618        params = self._get_invocation_params(stop=stop, **kwargs)1619        options = {"stop": stop, **ls_structured_output_format_dict}1620        inheritable_metadata = {1621            **(metadata or {}),1622            **self._get_ls_params_with_defaults(stop=stop, **kwargs),1623        }16241625        callback_manager = CallbackManager.configure(1626            callbacks,1627            self.callbacks,1628            self.verbose,1629            tags,1630            self.tags,1631            inheritable_metadata,1632            self.metadata,1633            langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(1634                params1635            ),1636        )1637        messages_to_trace = [1638            _format_for_tracing(message_list) for message_list in messages1639        ]1640        run_managers = callback_manager.on_chat_model_start(1641            self._serialized,1642            messages_to_trace,1643            invocation_params=params,1644            options=options,1645            name=run_name,1646            run_id=run_id,1647            batch_size=len(messages),1648        )1649        results = []1650        input_messages = [1651            _normalize_messages(message_list) for message_list in messages1652        ]1653        for i, m in enumerate(input_messages):1654            try:1655                results.append(1656                    self._generate_with_cache(1657                        m,1658                        stop=stop,1659                        run_manager=run_managers[i] if run_managers else None,1660                        **kwargs,1661                    )1662                )1663            except BaseException as e:1664                if run_managers:1665                    generations_with_error_metadata = _generate_response_from_error(e)1666                    run_managers[i].on_llm_error(1667                        e,1668                        response=LLMResult(1669                            generations=[generations_with_error_metadata]1670                        ),1671                    )1672                raise1673        flattened_outputs = [1674            LLMResult(generations=[res.generations], llm_output=res.llm_output)1675            for res in results1676        ]1677        llm_output = self._combine_llm_outputs([res.llm_output for res in results])1678        generations = [res.generations for res in results]1679        output = LLMResult(generations=generations, llm_output=llm_output)1680        if run_managers:1681            run_infos = []1682            for manager, flattened_output in zip(1683                run_managers, flattened_outputs, strict=False1684            ):1685                manager.on_llm_end(flattened_output)1686                run_infos.append(RunInfo(run_id=manager.run_id))1687            output.run = run_infos1688        return output16891690    async def agenerate(1691        self,1692        messages: list[list[BaseMessage]],1693        stop: list[str] | None = None,1694        callbacks: Callbacks = None,1695        *,1696        tags: list[str] | None = None,1697        metadata: builtins.dict[str, Any] | None = None,1698        run_name: str | None = None,1699        run_id: uuid.UUID | None = None,1700        **kwargs: Any,1701    ) -> LLMResult:1702        """Asynchronously pass a sequence of prompts to a model and return generations.17031704        This method should make use of batched calls for models that expose a batched1705        API.17061707        Use this method when you want to:17081709        1. Take advantage of batched calls,1710        2. Need more output from the model than just the top generated value,1711        3. Are building chains that are agnostic to the underlying language model1712            type (e.g., pure text completion models vs chat models).17131714        Args:1715            messages: List of list of messages.1716            stop: Stop words to use when generating.17171718                Model output is cut off at the first occurrence of any of these1719                substrings.1720            callbacks: `Callbacks` to pass through.17211722                Used for executing additional functionality, such as logging or1723                streaming, throughout generation.1724            tags: The tags to apply.1725            metadata: The metadata to apply.1726            run_name: The name of the run.1727            run_id: The ID of the run.1728            **kwargs: Arbitrary additional keyword arguments.17291730                These are usually passed to the model provider API call.17311732        Returns:1733            An `LLMResult`, which contains a list of candidate `Generations` for each1734                input prompt and additional model provider-specific output.17351736        """1737        ls_structured_output_format = kwargs.pop(1738            "ls_structured_output_format", None1739        ) or kwargs.pop("structured_output_format", None)1740        ls_structured_output_format_dict = _format_ls_structured_output(1741            ls_structured_output_format1742        )17431744        params = self._get_invocation_params(stop=stop, **kwargs)1745        options = {"stop": stop, **ls_structured_output_format_dict}1746        inheritable_metadata = {1747            **(metadata or {}),1748            **self._get_ls_params_with_defaults(stop=stop, **kwargs),1749        }17501751        callback_manager = AsyncCallbackManager.configure(1752            callbacks,1753            self.callbacks,1754            self.verbose,1755            tags,1756            self.tags,1757            inheritable_metadata,1758            self.metadata,1759            langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(1760                params1761            ),1762        )17631764        messages_to_trace = [1765            _format_for_tracing(message_list) for message_list in messages1766        ]1767        run_managers = await callback_manager.on_chat_model_start(1768            self._serialized,1769            messages_to_trace,1770            invocation_params=params,1771            options=options,1772            name=run_name,1773            batch_size=len(messages),1774            run_id=run_id,1775        )17761777        input_messages = [1778            _normalize_messages(message_list) for message_list in messages1779        ]1780        results = await asyncio.gather(1781            *[1782                self._agenerate_with_cache(1783                    m,1784                    stop=stop,1785                    run_manager=run_managers[i] if run_managers else None,1786                    **kwargs,1787                )1788                for i, m in enumerate(input_messages)1789            ],1790            return_exceptions=True,1791        )1792        exceptions = []1793        for i, res in enumerate(results):1794            if isinstance(res, BaseException):1795                if run_managers:1796                    generations_with_error_metadata = _generate_response_from_error(res)1797                    await run_managers[i].on_llm_error(1798                        res,1799                        response=LLMResult(1800                            generations=[generations_with_error_metadata]1801                        ),1802                    )1803                exceptions.append(res)1804        if exceptions:1805            if run_managers:1806                await asyncio.gather(1807                    *[1808                        run_manager.on_llm_end(1809                            LLMResult(1810                                generations=[res.generations],  # type: ignore[union-attr]1811                                llm_output=res.llm_output,  # type: ignore[union-attr]1812                            )1813                        )1814                        for run_manager, res in zip(run_managers, results, strict=False)1815                        if not isinstance(res, Exception)1816                    ]1817                )1818            raise exceptions[0]1819        flattened_outputs = [1820            LLMResult(generations=[res.generations], llm_output=res.llm_output)  # type: ignore[union-attr]1821            for res in results1822        ]1823        llm_output = self._combine_llm_outputs([res.llm_output for res in results])  # type: ignore[union-attr]1824        generations = [res.generations for res in results]  # type: ignore[union-attr]1825        output = LLMResult(generations=generations, llm_output=llm_output)1826        await asyncio.gather(1827            *[1828                run_manager.on_llm_end(flattened_output)1829                for run_manager, flattened_output in zip(1830                    run_managers, flattened_outputs, strict=False1831                )1832            ]1833        )1834        if run_managers:1835            output.run = [1836                RunInfo(run_id=run_manager.run_id) for run_manager in run_managers1837            ]1838        return output18391840    @override1841    def generate_prompt(1842        self,1843        prompts: list[PromptValue],1844        stop: list[str] | None = None,1845        callbacks: Callbacks = None,1846        **kwargs: Any,1847    ) -> LLMResult:1848        prompt_messages = [p.to_messages() for p in prompts]1849        return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)18501851    @override1852    async def agenerate_prompt(1853        self,1854        prompts: list[PromptValue],1855        stop: list[str] | None = None,1856        callbacks: Callbacks = None,1857        **kwargs: Any,1858    ) -> LLMResult:1859        prompt_messages = [p.to_messages() for p in prompts]1860        return await self.agenerate(1861            prompt_messages, stop=stop, callbacks=callbacks, **kwargs1862        )18631864    def _generate_with_cache(1865        self,1866        messages: list[BaseMessage],1867        stop: list[str] | None = None,1868        run_manager: CallbackManagerForLLMRun | None = None,1869        **kwargs: Any,1870    ) -> ChatResult:1871        llm_cache = self.cache if isinstance(self.cache, BaseCache) else get_llm_cache()1872        # We should check the cache unless it's explicitly set to False1873        # A None cache means we should use the default global cache1874        # if it's configured.1875        check_cache = self.cache or self.cache is None1876        if check_cache:1877            if llm_cache:1878                llm_string = self._get_llm_string(stop=stop, **kwargs)1879                normalized_messages = [1880                    (1881                        msg.model_copy(update={"id": None})1882                        if getattr(msg, "id", None) is not None1883                        else msg1884                    )1885                    for msg in messages1886                ]1887                prompt = dumps(normalized_messages)1888                cache_val = llm_cache.lookup(prompt, llm_string)1889                if isinstance(cache_val, list):1890                    converted_generations = self._convert_cached_generations(cache_val)1891                    self._replay_v2_events_for_cache_hit(1892                        converted_generations,1893                        run_manager=run_manager,1894                        **kwargs,1895                    )1896                    return ChatResult(generations=converted_generations)1897            elif self.cache is None:1898                pass1899            else:1900                msg = "Asked to cache, but no cache found at `langchain.cache`."1901                raise ValueError(msg)19021903        # Apply the rate limiter after checking the cache, since1904        # we usually don't want to rate limit cache lookups, but1905        # we do want to rate limit API requests.1906        if self.rate_limiter:1907            self.rate_limiter.acquire(blocking=True)19081909        # v2 streaming: preferred over v1 when any attached handler opts in via1910        # `_V2StreamingCallbackHandler`. Drives the protocol event generator1911        # (native or `_stream` compat bridge) through the shared helper so1912        # `on_stream_event` fires per event, then returns a normal `ChatResult`1913        # so caching / `on_llm_end` stay on the existing generate path.1914        if self._should_use_protocol_streaming(1915            async_api=False,1916            run_manager=run_manager,1917            **kwargs,1918        ):1919            stream_accum = ChatModelStream(1920                message_id=(1921                    f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None1922                )1923            )1924            assert run_manager is not None  # noqa: S1011925            for _event in self._iter_v2_events(1926                messages,1927                run_manager=run_manager,1928                stream=stream_accum,1929                stop=stop,1930                **kwargs,1931            ):1932                pass1933            if stream_accum.output_message is None:1934                msg = "v2 stream finished without producing a message"1935                raise RuntimeError(msg)1936            result = ChatResult(1937                generations=[ChatGeneration(message=stream_accum.output_message)]1938            )1939        # If stream is not explicitly set, check if implicitly requested by1940        # astream_events() or astream_log(). Bail out if _stream not implemented1941        elif self._should_stream(1942            async_api=False,1943            run_manager=run_manager,1944            **kwargs,1945        ):1946            chunks: list[ChatGenerationChunk] = []1947            run_id: str | None = (1948                f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None1949            )1950            yielded = False1951            index = -11952            index_type = ""1953            for chunk in self._stream(messages, stop=stop, **kwargs):1954                chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)1955                if self.output_version == "v1":1956                    # Overwrite .content with .content_blocks1957                    chunk.message = _update_message_content_to_blocks(1958                        chunk.message, "v1"1959                    )1960                    for block in cast(1961                        "list[types.ContentBlock]", chunk.message.content1962                    ):1963                        if block["type"] != index_type:1964                            index_type = block["type"]1965                            index += 11966                        if "index" not in block:1967                            block["index"] = index1968                if run_manager:1969                    if chunk.message.id is None:1970                        chunk.message.id = run_id1971                    run_manager.on_llm_new_token(1972                        cast("str", chunk.message.content), chunk=chunk1973                    )1974                chunks.append(chunk)1975                yielded = True19761977            # Yield a final empty chunk with chunk_position="last" if not yet yielded1978            if (1979                yielded1980                and isinstance(chunk.message, AIMessageChunk)1981                and not chunk.message.chunk_position1982            ):1983                empty_content: str | list = (1984                    "" if isinstance(chunk.message.content, str) else []1985                )1986                chunk = ChatGenerationChunk(1987                    message=AIMessageChunk(1988                        content=empty_content, chunk_position="last", id=run_id1989                    )1990                )1991                if run_manager:1992                    run_manager.on_llm_new_token("", chunk=chunk)1993                chunks.append(chunk)1994            result = generate_from_stream(iter(chunks))1995        elif inspect.signature(self._generate).parameters.get("run_manager"):1996            result = self._generate(1997                messages, stop=stop, run_manager=run_manager, **kwargs1998            )1999        else:2000            result = self._generate(messages, stop=stop, **kwargs)

Findings

✓ No findings reported for this file.

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.