libs/core/langchain_core/language_models/chat_models.py · langchain-ai/langchain

1"""Chat models for conversational AI."""23from __future__ import annotations45import asyncio6import builtins  # noqa: TC003  # runtime-evaluated; subclass `dict()` shadows the builtin7import contextlib8import inspect9import json10from abc import ABC, abstractmethod11from collections.abc import AsyncIterator, Callable, Iterator, Sequence12from functools import cached_property13from operator import itemgetter14from typing import TYPE_CHECKING, Any, Literal, cast, overload1516from langchain_protocol.protocol import MessageFinishData17from pydantic import BaseModel, ConfigDict, Field, model_validator18from typing_extensions import Self, override1920from langchain_core._api import beta, deprecated, suppress_langchain_deprecation_warning21from langchain_core.caches import BaseCache22from langchain_core.callbacks import (23    AsyncCallbackManager,24    AsyncCallbackManagerForLLMRun,25    CallbackManager,26    CallbackManagerForLLMRun,27    Callbacks,28)29from langchain_core.globals import get_llm_cache30from langchain_core.language_models._compat_bridge import (31    achunks_to_events,32    amessage_to_events,33    chunks_to_events,34    message_to_events,35)36from langchain_core.language_models._utils import (37    _filter_invocation_params_for_tracing,38    _normalize_messages,39    _update_message_content_to_blocks,40)41from langchain_core.language_models.base import (42    BaseLanguageModel,43    LangSmithParams,44    LanguageModelInput,45)46from langchain_core.language_models.chat_model_stream import (47    AsyncChatModelStream,48    ChatModelStream,49)50from langchain_core.language_models.model_profile import (51    ModelProfile,52    _warn_unknown_profile_keys,53)54from langchain_core.load import dumpd, dumps55from langchain_core.messages import (56    AIMessage,57    AIMessageChunk,58    AnyMessage,59    BaseMessage,60    convert_to_messages,61    is_data_content_block,62    message_chunk_to_message,63)64from langchain_core.messages import content as types65from langchain_core.messages.block_translators.openai import (66    convert_to_openai_image_block,67)68from langchain_core.output_parsers.openai_tools import (69    JsonOutputKeyToolsParser,70    JsonOutputToolsParser,71    PydanticToolsParser,72)73from langchain_core.outputs import (74    ChatGeneration,75    ChatGenerationChunk,76    ChatResult,77    Generation,78    LLMResult,79    RunInfo,80)81from langchain_core.outputs.chat_generation import merge_chat_generation_chunks82from langchain_core.prompt_values import ChatPromptValue, PromptValue, StringPromptValue83from langchain_core.rate_limiters import BaseRateLimiter84from langchain_core.runnables import RunnableBinding, RunnableMap, RunnablePassthrough85from langchain_core.runnables.config import ensure_config, run_in_executor86from langchain_core.tracers._streaming import (87    _StreamingCallbackHandler,88    _V2StreamingCallbackHandler,89)90from langchain_core.utils.function_calling import (91    convert_to_json_schema,92    convert_to_openai_tool,93)94from langchain_core.utils.pydantic import is_basemodel_subclass95from langchain_core.utils.utils import LC_ID_PREFIX, from_env9697if TYPE_CHECKING:98    import uuid99    from collections.abc import Awaitable100101    from langchain_protocol.protocol import MessagesData102103    from langchain_core.runnables import Runnable, RunnableConfig104    from langchain_core.runnables.schema import StreamEvent105    from langchain_core.tools import BaseTool106107108def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:109    if hasattr(error, "response"):110        response = error.response111        metadata: dict[str, Any] = {}112        if hasattr(response, "json"):113            try:114                metadata["body"] = response.json()115            except Exception:116                try:117                    metadata["body"] = getattr(response, "text", None)118                except Exception:119                    metadata["body"] = None120        if hasattr(response, "headers"):121            try:122                metadata["headers"] = dict(response.headers)123            except Exception:124                metadata["headers"] = None125        if hasattr(response, "status_code"):126            metadata["status_code"] = response.status_code127        if hasattr(error, "request_id"):128            metadata["request_id"] = error.request_id129        generations = [130            ChatGeneration(message=AIMessage(content="", response_metadata=metadata))131        ]132    else:133        generations = []134135    return generations136137138def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:139    """Format messages for tracing in `on_chat_model_start`.140141    - Update image content blocks to OpenAI Chat Completions format (backward142    compatibility).143    - Add `type` key to content blocks that have a single key.144145    Args:146        messages: List of messages to format.147148    Returns:149        List of messages formatted for tracing.150151    """152    messages_to_trace = []153    for message in messages:154        message_to_trace = message155        if isinstance(message.content, list):156            for idx, block in enumerate(message.content):157                if isinstance(block, dict):158                    # Update image content blocks to OpenAI # Chat Completions format.159                    if (160                        block.get("type") == "image"161                        and is_data_content_block(block)162                        and not ("file_id" in block or block.get("source_type") == "id")163                    ):164                        if message_to_trace is message:165                            # Shallow copy166                            message_to_trace = message.model_copy()167                            message_to_trace.content = list(message_to_trace.content)168169                        message_to_trace.content[idx] = (  # type: ignore[index]  # mypy confused by .model_copy170                            convert_to_openai_image_block(block)171                        )172                    elif (173                        block.get("type") == "file"174                        and is_data_content_block(block)  # v0 (image/audio/file) or v1175                        and "base64" in block176                        # Backward compat: convert v1 base64 blocks to v0177                    ):178                        if message_to_trace is message:179                            # Shallow copy180                            message_to_trace = message.model_copy()181                            message_to_trace.content = list(message_to_trace.content)182183                        message_to_trace.content[idx] = {  # type: ignore[index]184                            **{k: v for k, v in block.items() if k != "base64"},185                            "data": block["base64"],186                            "source_type": "base64",187                        }188                    elif len(block) == 1 and "type" not in block:189                        # Tracing assumes all content blocks have a "type" key. Here190                        # we add this key if it is missing, and there's an obvious191                        # choice for the type (e.g., a single key in the block).192                        if message_to_trace is message:193                            # Shallow copy194                            message_to_trace = message.model_copy()195                            message_to_trace.content = list(message_to_trace.content)196                        key = next(iter(block))197                        message_to_trace.content[idx] = {  # type: ignore[index]198                            "type": key,199                            key: block[key],200                        }201        messages_to_trace.append(message_to_trace)202203    return messages_to_trace204205206def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult:207    """Generate from a stream.208209    Args:210        stream: Iterator of `ChatGenerationChunk`.211212    Raises:213        ValueError: If no generations are found in the stream.214215    Returns:216        Chat result.217218    """219    generation = next(stream, None)220    if generation:221        generation += list(stream)222    if generation is None:223        msg = "No generations found in stream."224        raise ValueError(msg)225    return ChatResult(226        generations=[227            ChatGeneration(228                message=message_chunk_to_message(generation.message),229                generation_info=generation.generation_info,230            )231        ]232    )233234235async def agenerate_from_stream(236    stream: AsyncIterator[ChatGenerationChunk],237) -> ChatResult:238    """Async generate from a stream.239240    Args:241        stream: AsyncIterator of `ChatGenerationChunk`.242243    Returns:244        Chat result.245246    """247    chunks = [chunk async for chunk in stream]248    return await run_in_executor(None, generate_from_stream, iter(chunks))249250251def _format_ls_structured_output(252    ls_structured_output_format: dict[str, Any] | None,253) -> dict[str, Any]:254    if ls_structured_output_format:255        try:256            ls_structured_output_format_dict = {257                "ls_structured_output_format": {258                    "kwargs": ls_structured_output_format.get("kwargs", {}),259                    "schema": convert_to_json_schema(260                        ls_structured_output_format["schema"]261                    ),262                }263            }264        except ValueError:265            ls_structured_output_format_dict = {}266    else:267        ls_structured_output_format_dict = {}268269    return ls_structured_output_format_dict270271272class BaseChatModel(BaseLanguageModel[AIMessage], ABC):273    r"""Base class for chat models.274275    Key imperative methods:276        Methods that actually call the underlying model.277278        This table provides a brief overview of the main imperative methods. Please see the base `Runnable` reference for full documentation.279280        | Method                 | Input                                                        | Output                                                     | Description                                                                      |281        | ---------------------- | ------------------------------------------------------------ | ---------------------------------------------------------- | -------------------------------------------------------------------------------- |282        | `invoke`               | `str` \| `list[dict | tuple | BaseMessage]` \| `PromptValue` | `BaseMessage`                                              | A single chat model call.                                                        |283        | `ainvoke`              | `'''`                                                        | `BaseMessage`                                              | Defaults to running `invoke` in an async executor.                               |284        | `stream`               | `'''`                                                        | `Iterator[BaseMessageChunk]`                               | Defaults to yielding output of `invoke`.                                         |285        | `astream`              | `'''`                                                        | `AsyncIterator[BaseMessageChunk]`                          | Defaults to yielding output of `ainvoke`.                                        |286        | `astream_events`       | `'''`                                                        | `AsyncIterator[StreamEvent]`                               | Event types: `on_chat_model_start`, `on_chat_model_stream`, `on_chat_model_end`. |287        | `batch`                | `list[''']`                                                  | `list[BaseMessage]`                                        | Defaults to running `invoke` in concurrent threads.                              |288        | `abatch`               | `list[''']`                                                  | `list[BaseMessage]`                                        | Defaults to running `ainvoke` in concurrent threads.                             |289        | `batch_as_completed`   | `list[''']`                                                  | `Iterator[tuple[int, Union[BaseMessage, Exception]]]`      | Defaults to running `invoke` in concurrent threads.                              |290        | `abatch_as_completed`  | `list[''']`                                                  | `AsyncIterator[tuple[int, Union[BaseMessage, Exception]]]` | Defaults to running `ainvoke` in concurrent threads.                             |291292    Key declarative methods:293        Methods for creating another `Runnable` using the chat model.294295        This table provides a brief overview of the main declarative methods. Please see the reference for each method for full documentation.296297        | Method                       | Description                                                                                |298        | ---------------------------- | ------------------------------------------------------------------------------------------ |299        | `bind_tools`                 | Create chat model that can call tools.                                                     |300        | `with_structured_output`     | Create wrapper that structures model output using schema.                                  |301        | `with_retry`                 | Create wrapper that retries model calls on failure.                                        |302        | `with_fallbacks`             | Create wrapper that falls back to other models on failure.                                 |303        | `configurable_fields`        | Specify init args of the model that can be configured at runtime via the `RunnableConfig`. |304        | `configurable_alternatives`  | Specify alternative models which can be swapped in at runtime via the `RunnableConfig`.    |305306    Creating custom chat model:307        Custom chat model implementations should inherit from this class.308        Please reference the table below for information about which309        methods and properties are required or optional for implementations.310311        | Method/Property                  | Description                                                        | Required          |312        | -------------------------------- | ------------------------------------------------------------------ | ----------------- |313        | `_generate`                      | Use to generate a chat result from a prompt                        | Required          |314        | `_llm_type` (property)           | Used to uniquely identify the type of the model. Used for logging. | Required          |315        | `_identifying_params` (property) | Represent model parameterization for tracing purposes.             | Optional          |316        | `_stream`                        | Use to implement streaming                                         | Optional          |317        | `_agenerate`                     | Use to implement a native async method                             | Optional          |318        | `_astream`                       | Use to implement async version of `_stream`                        | Optional          |319320    """  # noqa: E501321322    rate_limiter: BaseRateLimiter | None = Field(default=None, exclude=True)323    "An optional rate limiter to use for limiting the number of requests."324325    disable_streaming: bool | Literal["tool_calling"] = False326    """Whether to disable streaming for this model.327328    If streaming is bypassed, then `stream`/`astream`/`astream_events` will329    defer to `invoke`/`ainvoke`.330331    - If `True`, will always bypass streaming case.332    - If `'tool_calling'`, will bypass streaming case only when the model is called333        with a `tools` keyword argument. In other words, LangChain will automatically334        switch to non-streaming behavior (`invoke`) only when the tools argument is335        provided. This offers the best of both worlds.336    - If `False` (Default), will always use streaming case if available.337338    The main reason for this flag is that code might be written using `stream` and339    a user may want to swap out a given model for another model whose implementation340    does not properly support streaming.341    """342343    output_version: str | None = Field(344        default_factory=from_env("LC_OUTPUT_VERSION", default=None)345    )346    """Version of `AIMessage` output format to store in message content.347348    `AIMessage.content_blocks` will lazily parse the contents of `content` into a349    standard format. This flag can be used to additionally store the standard format350    in message content, e.g., for serialization purposes.351352    Supported values:353354    - `'v0'`: provider-specific format in content (can lazily-parse with355        `content_blocks`)356    - `'v1'`: standardized format in content (consistent with `content_blocks`)357358    Partner packages (e.g.,359    [`langchain-openai`](https://pypi.org/project/langchain-openai)) can also use this360    field to roll out new content formats in a backward-compatible way.361362    !!! version-added "Added in `langchain-core` 1.0.0"363364    """365366    profile: ModelProfile | None = Field(default=None, exclude=True)367    """Profile detailing model capabilities.368369    !!! warning "Beta feature"370371        This is a beta feature. The format of model profiles is subject to change.372373    If not specified, automatically loaded from the provider package on initialization374    if data is available.375376    Example profile data includes context window sizes, supported modalities, or support377    for tool calling, structured output, and other features.378379    !!! version-added "Added in `langchain-core` 1.1.0"380    """381382    model_config = ConfigDict(383        arbitrary_types_allowed=True,384    )385386    def _resolve_model_profile(self) -> ModelProfile | None:387        """Return the default model profile, or `None` if unavailable.388389        Override this in subclasses instead of `_set_model_profile`. The base390        validator calls it automatically and handles assignment. This avoids391        coupling partner code to Pydantic validator mechanics.392393        Each partner needs its own override because things can vary per-partner,394        such as the attribute that identifies the model (e.g., `model`,395        `model_name`, `model_id`, `deployment_name`) and the partner-local396        `_get_default_model_profile` function that reads from each partner's own397        profile data.398        """399        # TODO: consider adding a `_model_identifier` property on BaseChatModel400        # to standardize how partners identify their model, which could allow a401        # default implementation here that calls a shared402        # profile-loading mechanism.403        return None404405    @model_validator(mode="after")406    def _set_model_profile(self) -> Self:407        """Populate `profile` from `_resolve_model_profile` if not provided.408409        Partners should override `_resolve_model_profile` rather than this410        validator. Overriding this with a new `@model_validator` replaces the411        base validator (Pydantic v2 behavior), bypassing the standard resolution412        path. A plain method override does not prevent the base validator from413        running.414        """415        if self.profile is None:416            # Suppress errors from partner overrides (e.g., missing profile417            # files, broken imports) so model construction never fails over an418            # optional field.419            with contextlib.suppress(Exception):420                self.profile = self._resolve_model_profile()421        return self422423    # NOTE: _check_profile_keys must be defined AFTER _set_model_profile.424    # Pydantic v2 runs mode="after" validators in definition order.425    @model_validator(mode="after")426    def _check_profile_keys(self) -> Self:427        """Warn on unrecognized profile keys."""428        # isinstance guard: ModelProfile is a TypedDict (always a dict), but429        # protects against unexpected types from partner overrides.430        if self.profile and isinstance(self.profile, dict):431            _warn_unknown_profile_keys(self.profile)432        return self433434    @cached_property435    def _serialized(self) -> builtins.dict[str, Any]:436        # self is always a Serializable object in this case, thus the result is437        # guaranteed to be a dict since dumpd uses the default callback, which uses438        # obj.to_json which always returns TypedDict subclasses439        return cast("builtins.dict[str, Any]", dumpd(self))440441    # --- Runnable methods ---442443    @property444    @override445    def OutputType(self) -> Any:446        """Get the output type for this `Runnable`."""447        return AnyMessage448449    def _convert_input(self, model_input: LanguageModelInput) -> PromptValue:450        if isinstance(model_input, PromptValue):451            return model_input452        if isinstance(model_input, str):453            return StringPromptValue(text=model_input)454        if isinstance(model_input, Sequence):455            return ChatPromptValue(messages=convert_to_messages(model_input))456        msg = (  # type: ignore[unreachable]457            f"Invalid input type {type(model_input)}. "458            "Must be a PromptValue, str, or list of BaseMessages."459        )460        raise ValueError(msg)461462    @override463    def invoke(464        self,465        input: LanguageModelInput,466        config: RunnableConfig | None = None,467        *,468        stop: list[str] | None = None,469        **kwargs: Any,470    ) -> AIMessage:471        config = ensure_config(config)472        return cast(473            "AIMessage",474            cast(475                "ChatGeneration",476                self.generate_prompt(477                    [self._convert_input(input)],478                    stop=stop,479                    callbacks=config.get("callbacks"),480                    tags=config.get("tags"),481                    metadata=config.get("metadata"),482                    run_name=config.get("run_name"),483                    run_id=config.pop("run_id", None),484                    **kwargs,485                ).generations[0][0],486            ).message,487        )488489    @override490    async def ainvoke(491        self,492        input: LanguageModelInput,493        config: RunnableConfig | None = None,494        *,495        stop: list[str] | None = None,496        **kwargs: Any,497    ) -> AIMessage:498        config = ensure_config(config)499        llm_result = await self.agenerate_prompt(500            [self._convert_input(input)],501            stop=stop,502            callbacks=config.get("callbacks"),503            tags=config.get("tags"),504            metadata=config.get("metadata"),505            run_name=config.get("run_name"),506            run_id=config.pop("run_id", None),507            **kwargs,508        )509        return cast(510            "AIMessage", cast("ChatGeneration", llm_result.generations[0][0]).message511        )512513    def _streaming_disabled(self, **kwargs: Any) -> bool:514        """Return whether streaming is hard-disabled for this call.515516        Shared opt-outs honored by both `_should_stream` and517        `_should_use_protocol_streaming` — these override any affirmative trigger518        (attached handler, `stream=True`, etc.):519520        - `self.disable_streaming is True`521        - `self.disable_streaming == "tool_calling"` with `tools` passed522        - `stream=<falsy>` in call kwargs523        - `self.streaming is False` on the instance524        """525        if self.disable_streaming is True:526            return True527        # We assume tools are passed in via "tools" kwarg in all models.528        if self.disable_streaming == "tool_calling" and kwargs.get("tools"):529            return True530        if "stream" in kwargs and not kwargs["stream"]:531            return True532        return (533            "streaming" in self.model_fields_set534            and getattr(self, "streaming", None) is False535        )536537    def _should_stream(538        self,539        *,540        async_api: bool,541        run_manager: CallbackManagerForLLMRun542        | AsyncCallbackManagerForLLMRun543        | None = None,544        **kwargs: Any,545    ) -> bool:546        """Determine if a given model call should hit the streaming API."""547        sync_not_implemented = type(self)._stream == BaseChatModel._stream  # noqa: SLF001548        async_not_implemented = type(self)._astream == BaseChatModel._astream  # noqa: SLF001549550        # Check if streaming is implemented.551        if (not async_api) and sync_not_implemented:552            return False553        # Note, since async falls back to sync we check both here.554        if async_api and async_not_implemented and sync_not_implemented:555            return False556557        if self._streaming_disabled(**kwargs):558            return False559560        # Affirmative: explicit `stream=<truthy>` kwarg.561        if kwargs.get("stream"):562            return True563564        # Affirmative: instance-level `streaming=True` attribute.565        if (566            "streaming" in self.model_fields_set567            and getattr(self, "streaming", None) is True568        ):569            return True570571        # Affirmative: a v1 streaming callback handler is attached.572        handlers = run_manager.handlers if run_manager else []573        return any(isinstance(h, _StreamingCallbackHandler) for h in handlers)574575    def _should_use_protocol_streaming(576        self,577        *,578        async_api: bool,579        run_manager: CallbackManagerForLLMRun580        | AsyncCallbackManagerForLLMRun581        | None = None,582        **kwargs: Any,583    ) -> bool:584        """Determine whether an invoke should route through the v2 event path.585586        Runs alongside `_should_stream` inside `_generate_with_cache` /587        `_agenerate_with_cache` — after the run manager is open — and588        wins over the v1 streaming branch when a handler has declared589        itself a `_V2StreamingCallbackHandler`. Parallel to590        `_should_stream` rather than a delegation — v1 and v2 have591        disjoint affirmative triggers.592593        Args:594            async_api: Whether the caller is on the async path.595            run_manager: The active LLM run manager.596            **kwargs: Call kwargs; inspected for `disable_streaming`597                semantics and an explicit `stream=False` override.598599        Returns:600            `True` if any attached handler inherits601            `_V2StreamingCallbackHandler` and the model can drive the v2602            event generator (natively or via the `_stream` compat603            bridge).604        """605        # Opt-in: only route through v2 when a v2 handler is attached.606        handlers = run_manager.handlers if run_manager else []607        if not any(isinstance(h, _V2StreamingCallbackHandler) for h in handlers):608            return False609610        # Need a source of v2 events on the requested flavor. A native611        # `_(a)stream_chat_model_events` hook bypasses the bridge;612        # otherwise the bridge wraps `_stream` / `_astream`. Async can613        # fall back to sync.614        #615        # `cls._stream is not BaseChatModel._stream` is an identity616        # check for "subclass overrode `_stream`" — same pattern as617        # `_should_stream`.618        cls = type(self)619        has_native_sync = getattr(cls, "_stream_chat_model_events", None) is not None620        has_native_async = getattr(cls, "_astream_chat_model_events", None) is not None621        overrides_sync = cls._stream is not BaseChatModel._stream622        overrides_async = cls._astream is not BaseChatModel._astream623        has_sync_source = has_native_sync or overrides_sync624        has_async_source = has_native_async or overrides_async625        has_source = (626            (has_sync_source or has_async_source) if async_api else has_sync_source627        )628        if not has_source:629            return False630631        return not self._streaming_disabled(**kwargs)632633    def _iter_v2_events(634        self,635        messages: list[BaseMessage],636        *,637        run_manager: CallbackManagerForLLMRun,638        stream: ChatModelStream,639        stop: list[str] | None = None,640        **kwargs: Any,641    ) -> Iterator[MessagesData]:642        """Drive the v2 event generator with per-event dispatch.643644        Shared between the `stream_events(version="v3")` pump and the645        invoke-time v2 branch in `_generate_with_cache`. Picks the native646        `_stream_chat_model_events` hook when the subclass provides one,647        else bridges `_stream` chunks via `chunks_to_events`. Each event648        is dispatched into `stream` and fired as `on_stream_event` on649        the run manager. Run-lifecycle callbacks650        (`on_chat_model_start` / `on_llm_end` / `on_llm_error`) and651        rate-limiter acquisition are the caller's responsibility.652653        Args:654            messages: Normalized input messages.655            run_manager: Active LLM run manager; receives656                `on_stream_event` per event.657            stream: Accumulator owned by the caller; receives each658                event via `stream.dispatch`.659            stop: Optional stop sequences.660            **kwargs: Forwarded to the event producer.661662        Yields:663            Each protocol event produced by the model.664        """665        native = cast(666            "Callable[..., Iterator[MessagesData]] | None",667            getattr(self, "_stream_chat_model_events", None),668        )669        if native is not None:670            event_iter: Iterator[MessagesData] = native(671                messages, stop=stop, run_manager=run_manager, **kwargs672            )673        else:674            event_iter = chunks_to_events(675                self._stream(messages, stop=stop, run_manager=run_manager, **kwargs),676                message_id=stream.message_id,677            )678        for event in event_iter:679            stream.dispatch(event)680            run_manager.on_stream_event(event)681            yield event682683    async def _aiter_v2_events(684        self,685        messages: list[BaseMessage],686        *,687        run_manager: AsyncCallbackManagerForLLMRun,688        stream: AsyncChatModelStream,689        stop: list[str] | None = None,690        **kwargs: Any,691    ) -> AsyncIterator[MessagesData]:692        """Async counterpart to `_iter_v2_events`.693694        See `_iter_v2_events` for the shared contract.695        """696        native = cast(697            "Callable[..., AsyncIterator[MessagesData]] | None",698            getattr(self, "_astream_chat_model_events", None),699        )700        if native is not None:701            event_iter: AsyncIterator[MessagesData] = native(702                messages, stop=stop, run_manager=run_manager, **kwargs703            )704        else:705            event_iter = achunks_to_events(706                self._astream(messages, stop=stop, run_manager=run_manager, **kwargs),707                message_id=stream.message_id,708            )709        async for event in event_iter:710            stream.dispatch(event)711            await run_manager.on_stream_event(event)712            yield event713714    @override715    def stream(716        self,717        input: LanguageModelInput,718        config: RunnableConfig | None = None,719        *,720        stop: list[str] | None = None,721        **kwargs: Any,722    ) -> Iterator[AIMessageChunk]:723        if not self._should_stream(async_api=False, **{**kwargs, "stream": True}):724            # Model doesn't implement streaming, so use default implementation725            yield cast(726                "AIMessageChunk",727                self.invoke(input, config=config, stop=stop, **kwargs),728            )729        else:730            config = ensure_config(config)731            messages = self._convert_input(input).to_messages()732            ls_structured_output_format = kwargs.pop(733                "ls_structured_output_format", None734            ) or kwargs.pop("structured_output_format", None)735            ls_structured_output_format_dict = _format_ls_structured_output(736                ls_structured_output_format737            )738739            params = self._get_invocation_params(stop=stop, **kwargs)740            options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}741            inheritable_metadata = {742                **(config.get("metadata") or {}),743                **self._get_ls_params_with_defaults(stop=stop, **kwargs),744            }745            callback_manager = CallbackManager.configure(746                config.get("callbacks"),747                self.callbacks,748                self.verbose,749                config.get("tags"),750                self.tags,751                inheritable_metadata,752                self.metadata,753                langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(754                    params755                ),756            )757            (run_manager,) = callback_manager.on_chat_model_start(758                self._serialized,759                [_format_for_tracing(messages)],760                invocation_params=params,761                options=options,762                name=config.get("run_name"),763                run_id=config.pop("run_id", None),764                batch_size=1,765            )766767            chunks: list[ChatGenerationChunk] = []768769            if self.rate_limiter:770                self.rate_limiter.acquire(blocking=True)771772            try:773                input_messages = _normalize_messages(messages)774                run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))775                yielded = False776                index = -1777                index_type = ""778                for chunk in self._stream(input_messages, stop=stop, **kwargs):779                    if chunk.message.id is None:780                        chunk.message.id = run_id781                    chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)782                    if self.output_version == "v1":783                        # Overwrite .content with .content_blocks784                        chunk.message = _update_message_content_to_blocks(785                            chunk.message, "v1"786                        )787                        for block in cast(788                            "list[types.ContentBlock]", chunk.message.content789                        ):790                            if block["type"] != index_type:791                                index_type = block["type"]792                                index += 1793                            if "index" not in block:794                                block["index"] = index795                    run_manager.on_llm_new_token(chunk.message.content, chunk=chunk)796                    chunks.append(chunk)797                    yield cast("AIMessageChunk", chunk.message)798                    yielded = True799800                # Yield a final empty chunk with chunk_position="last" if not yet801                # yielded802                if (803                    yielded804                    and isinstance(chunk.message, AIMessageChunk)805                    and not chunk.message.chunk_position806                ):807                    empty_content: str | list[str | dict[str, Any]] = (808                        "" if isinstance(chunk.message.content, str) else []809                    )810                    msg_chunk = AIMessageChunk(811                        content=empty_content, chunk_position="last", id=run_id812                    )813                    run_manager.on_llm_new_token(814                        "", chunk=ChatGenerationChunk(message=msg_chunk)815                    )816                    yield msg_chunk817            except BaseException as e:818                generations_with_error_metadata = _generate_response_from_error(e)819                chat_generation_chunk = merge_chat_generation_chunks(chunks)820                if chat_generation_chunk:821                    generations = [822                        [chat_generation_chunk],823                        generations_with_error_metadata,824                    ]825                else:826                    generations = [generations_with_error_metadata]827                run_manager.on_llm_error(828                    e,829                    response=LLMResult(generations=generations),830                )831                raise832833            generation = merge_chat_generation_chunks(chunks)834            if generation is None:835                err = ValueError("No generation chunks were returned")836                run_manager.on_llm_error(err, response=LLMResult(generations=[]))837                raise err838839            run_manager.on_llm_end(LLMResult(generations=[[generation]]))840841    @override842    async def astream(843        self,844        input: LanguageModelInput,845        config: RunnableConfig | None = None,846        *,847        stop: list[str] | None = None,848        **kwargs: Any,849    ) -> AsyncIterator[AIMessageChunk]:850        if not self._should_stream(async_api=True, **{**kwargs, "stream": True}):851            # No async or sync stream is implemented, so fall back to ainvoke852            yield cast(853                "AIMessageChunk",854                await self.ainvoke(input, config=config, stop=stop, **kwargs),855            )856            return857858        config = ensure_config(config)859        messages = self._convert_input(input).to_messages()860861        ls_structured_output_format = kwargs.pop(862            "ls_structured_output_format", None863        ) or kwargs.pop("structured_output_format", None)864        ls_structured_output_format_dict = _format_ls_structured_output(865            ls_structured_output_format866        )867868        params = self._get_invocation_params(stop=stop, **kwargs)869        options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}870        inheritable_metadata = {871            **(config.get("metadata") or {}),872            **self._get_ls_params_with_defaults(stop=stop, **kwargs),873        }874        callback_manager = AsyncCallbackManager.configure(875            config.get("callbacks"),876            self.callbacks,877            self.verbose,878            config.get("tags"),879            self.tags,880            inheritable_metadata,881            self.metadata,882            langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(883                params884            ),885        )886        (run_manager,) = await callback_manager.on_chat_model_start(887            self._serialized,888            [_format_for_tracing(messages)],889            invocation_params=params,890            options=options,891            name=config.get("run_name"),892            run_id=config.pop("run_id", None),893            batch_size=1,894        )895896        if self.rate_limiter:897            await self.rate_limiter.aacquire(blocking=True)898899        chunks: list[ChatGenerationChunk] = []900901        try:902            input_messages = _normalize_messages(messages)903            run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id)))904            yielded = False905            index = -1906            index_type = ""907            async for chunk in self._astream(908                input_messages,909                stop=stop,910                **kwargs,911            ):912                if chunk.message.id is None:913                    chunk.message.id = run_id914                chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)915                if self.output_version == "v1":916                    # Overwrite .content with .content_blocks917                    chunk.message = _update_message_content_to_blocks(918                        chunk.message, "v1"919                    )920                    for block in cast(921                        "list[types.ContentBlock]", chunk.message.content922                    ):923                        if block["type"] != index_type:924                            index_type = block["type"]925                            index += 1926                        if "index" not in block:927                            block["index"] = index928                await run_manager.on_llm_new_token(chunk.message.content, chunk=chunk)929                chunks.append(chunk)930                yield cast("AIMessageChunk", chunk.message)931                yielded = True932933            # Yield a final empty chunk with chunk_position="last" if not yet yielded934            if (935                yielded936                and isinstance(chunk.message, AIMessageChunk)937                and not chunk.message.chunk_position938            ):939                empty_content: str | list[str | dict[str, Any]] = (940                    "" if isinstance(chunk.message.content, str) else []941                )942                msg_chunk = AIMessageChunk(943                    content=empty_content, chunk_position="last", id=run_id944                )945                await run_manager.on_llm_new_token(946                    "", chunk=ChatGenerationChunk(message=msg_chunk)947                )948                yield msg_chunk949        except BaseException as e:950            generations_with_error_metadata = _generate_response_from_error(e)951            chat_generation_chunk = merge_chat_generation_chunks(chunks)952            if chat_generation_chunk:953                generations = [[chat_generation_chunk], generations_with_error_metadata]954            else:955                generations = [generations_with_error_metadata]956            await run_manager.on_llm_error(957                e,958                response=LLMResult(generations=generations),959            )960            raise961962        generation = merge_chat_generation_chunks(chunks)963        if not generation:964            err = ValueError("No generation chunks were returned")965            await run_manager.on_llm_error(err, response=LLMResult(generations=[]))966            raise err967968        await run_manager.on_llm_end(969            LLMResult(generations=[[generation]]),970        )971972    # --- stream_events v3 ---973974    @beta()975    def _chat_model_stream_v3(976        self,977        input: LanguageModelInput,978        config: RunnableConfig | None = None,979        *,980        stop: list[str] | None = None,981        **kwargs: Any,982    ) -> ChatModelStream:983        """Internal v3 sync streaming implementation.984985        Public entry point: `stream_events(version='v3')`.986        """987        config = ensure_config(config)988        messages = self._convert_input(input).to_messages()989        input_messages = _normalize_messages(messages)990991        # Strip tracing-only kwargs before forwarding to `_stream` — matches992        # `stream()` / `astream()`. Provider clients reject unknown kwargs,993        # so `.with_structured_output().stream_events(version="v3", ...)`994        # and any other binding that carries `ls_structured_output_format`995        # / `structured_output_format` would raise without this pop.996        ls_structured_output_format = kwargs.pop(997            "ls_structured_output_format", None998        ) or kwargs.pop("structured_output_format", None)999        ls_structured_output_format_dict = _format_ls_structured_output(1000            ls_structured_output_format1001        )10021003        params = self._get_invocation_params(stop=stop, **kwargs)1004        options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}1005        inheritable_metadata = {1006            **(config.get("metadata") or {}),1007            **self._get_ls_params_with_defaults(stop=stop, **kwargs),1008        }1009        callback_manager = CallbackManager.configure(1010            config.get("callbacks"),1011            self.callbacks,1012            self.verbose,1013            config.get("tags"),1014            self.tags,1015            inheritable_metadata,1016            self.metadata,1017            langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(1018                params1019            ),1020        )1021        stream = ChatModelStream()1022        run_manager: CallbackManagerForLLMRun | None = None1023        event_iter_ref: Iterator[MessagesData] | None = None1024        rate_limiter_acquired = self.rate_limiter is None1025        run_name = config.get("run_name")1026        run_id = config.pop("run_id", None)10271028        def ensure_started() -> None:1029            nonlocal event_iter_ref, run_manager1030            if event_iter_ref is not None:1031                return10321033            (run_manager,) = callback_manager.on_chat_model_start(1034                self._serialized,1035                [_format_for_tracing(messages)],1036                invocation_params=params,1037                options=options,1038                name=run_name,1039                run_id=run_id,1040                batch_size=1,1041            )1042            stream.set_message_id("-".join((LC_ID_PREFIX, str(run_manager.run_id))))1043            event_iter_ref = iter(1044                self._iter_v2_events(1045                    input_messages,1046                    run_manager=run_manager,1047                    stream=stream,1048                    stop=stop,1049                    **kwargs,1050                )1051            )10521053        def pump_one() -> bool:1054            nonlocal rate_limiter_acquired1055            ensure_started()1056            if not rate_limiter_acquired:1057                assert self.rate_limiter is not None  # noqa: S1011058                self.rate_limiter.acquire(blocking=True)1059                rate_limiter_acquired = True1060            assert event_iter_ref is not None  # noqa: S1011061            assert run_manager is not None  # noqa: S1011062            try:1063                next(event_iter_ref)1064            except StopIteration:1065                if not stream.done:1066                    if stream.has_events:1067                        # Native event producers may omit the terminal1068                        # `message-finish`. Close the lifecycle here so1069                        # `on_llm_end` still observes the assembled1070                        # message. A truly empty stream remains an error1071                        # for parity with `stream()`.1072                        stream.dispatch(MessageFinishData(event="message-finish"))1073                    else:1074                        err = ValueError("No generation chunks were returned")1075                        stream.fail(err)1076                        run_manager.on_llm_error(1077                            err,1078                            response=LLMResult(generations=[]),1079                        )1080                        return False1081                if stream.done and stream.output_message is not None:1082                    run_manager.on_llm_end(1083                        LLMResult(1084                            generations=[1085                                [ChatGeneration(message=stream.output_message)],1086                            ],1087                        ),1088                    )1089                return False1090            except BaseException as exc:1091                stream.fail(exc)1092                run_manager.on_llm_error(1093                    exc,1094                    response=LLMResult(generations=[]),1095                )1096                return False1097            if stream.done and stream.output_message is not None:1098                run_manager.on_llm_end(1099                    LLMResult(1100                        generations=[1101                            [ChatGeneration(message=stream.output_message)],1102                        ],1103                    ),1104                )1105            return True11061107        stream.set_start(ensure_started)1108        stream.bind_pump(pump_one)1109        return stream11101111    @beta()1112    async def _achat_model_stream_v3(1113        self,1114        input: LanguageModelInput,1115        config: RunnableConfig | None = None,1116        *,1117        stop: list[str] | None = None,1118        **kwargs: Any,1119    ) -> AsyncChatModelStream:1120        """Internal v3 async streaming implementation.11211122        Public entry point: `astream_events(version='v3')`.1123        """1124        config = ensure_config(config)1125        messages = self._convert_input(input).to_messages()1126        input_messages = _normalize_messages(messages)11271128        # Strip tracing-only kwargs before forwarding — see the sync v31129        # implementation for the full rationale.1130        ls_structured_output_format = kwargs.pop(1131            "ls_structured_output_format", None1132        ) or kwargs.pop("structured_output_format", None)1133        ls_structured_output_format_dict = _format_ls_structured_output(1134            ls_structured_output_format1135        )11361137        params = self._get_invocation_params(stop=stop, **kwargs)1138        options = {"stop": stop, **kwargs, **ls_structured_output_format_dict}1139        inheritable_metadata = {1140            **(config.get("metadata") or {}),1141            **self._get_ls_params_with_defaults(stop=stop, **kwargs),1142        }1143        callback_manager = AsyncCallbackManager.configure(1144            config.get("callbacks"),1145            self.callbacks,1146            self.verbose,1147            config.get("tags"),1148            self.tags,1149            inheritable_metadata,1150            self.metadata,1151            langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(1152                params1153            ),1154        )1155        stream = AsyncChatModelStream()1156        run_manager: AsyncCallbackManagerForLLMRun | None = None1157        run_name = config.get("run_name")1158        run_id = config.pop("run_id", None)1159        start_lock = asyncio.Lock()11601161        async def _produce() -> None:1162            assert run_manager is not None  # noqa: S1011163            try:1164                if self.rate_limiter:1165                    await self.rate_limiter.aacquire(blocking=True)11661167                async for _event in self._aiter_v2_events(1168                    input_messages,1169                    run_manager=run_manager,1170                    stream=stream,1171                    stop=stop,1172                    **kwargs,1173                ):1174                    pass1175                if not stream.done:1176                    if stream.has_events:1177                        # Native event producers may omit the terminal1178                        # `message-finish`. Close the lifecycle here so1179                        # `on_llm_end` sees the finalized message. A1180                        # truly empty stream remains an error for parity1181                        # with `astream()`.1182                        stream.dispatch(MessageFinishData(event="message-finish"))1183                    else:1184                        err = ValueError("No generation chunks were returned")1185                        stream.fail(err)1186                        await run_manager.on_llm_error(1187                            err,1188                            response=LLMResult(generations=[]),1189                        )1190                        return1191                if stream.done and stream.output_message is not None:1192                    await run_manager.on_llm_end(1193                        LLMResult(1194                            generations=[1195                                [ChatGeneration(message=stream.output_message)],1196                            ],1197                        ),1198                    )1199            except asyncio.CancelledError as exc:1200                stream.fail(exc)1201                # Close the callback lifecycle so tracing observes a1202                # matching end event for the earlier `on_chat_model_start`.1203                # `on_llm_error` is `@shielded`, so the callback runs to1204                # completion in the background even though the `await`1205                # here re-raises our cancellation.1206                with contextlib.suppress(Exception):1207                    await run_manager.on_llm_error(1208                        exc,1209                        response=LLMResult(generations=[]),1210                    )1211                raise1212            except BaseException as exc:1213                stream.fail(exc)1214                await run_manager.on_llm_error(1215                    exc,1216                    response=LLMResult(generations=[]),1217                )12181219        async def ensure_started() -> None:1220            nonlocal run_manager1221            if stream._producer_task is not None:  # noqa: SLF0011222                return12231224            async with start_lock:1225                if stream._producer_task is not None:  # noqa: SLF0011226                    return  # type: ignore[unreachable]12271228                (run_manager,) = await callback_manager.on_chat_model_start(1229                    self._serialized,1230                    [_format_for_tracing(messages)],1231                    invocation_params=params,1232                    options=options,1233                    name=run_name,1234                    run_id=run_id,1235                    batch_size=1,1236                )1237                stream.set_message_id("-".join((LC_ID_PREFIX, str(run_manager.run_id))))1238                stream._producer_task = asyncio.get_running_loop().create_task(  # noqa: SLF0011239                    _produce()1240                )12411242        async def _on_aclose_fail(exc: BaseException) -> None:1243            assert run_manager is not None  # noqa: S1011244            # Invoked by `stream.aclose()` only when the producer was1245            # cancelled before `_produce` ran — so `on_llm_error` from1246            # the CancelledError handler never fired. Shielded by the1247            # callback manager; runs to completion even if our caller1248            # is being cancelled.1249            await run_manager.on_llm_error(1250                exc,1251                response=LLMResult(generations=[]),1252            )12531254        stream.set_start(ensure_started)1255        stream._on_aclose_fail = _on_aclose_fail  # noqa: SLF0011256        return stream12571258    @overload  # type: ignore[override]1259    def stream_events(1260        self,1261        input: LanguageModelInput,1262        config: RunnableConfig | None = None,1263        *,1264        version: Literal["v1", "v2"] = "v2",1265        **kwargs: Any,1266    ) -> Iterator[StreamEvent]: ...12671268    @overload1269    def stream_events(1270        self,1271        input: LanguageModelInput,1272        config: RunnableConfig | None = None,1273        *,1274        version: Literal["v3"],1275        stop: list[str] | None = None,1276        **kwargs: Any,1277    ) -> ChatModelStream: ...12781279    def stream_events(1280        self,1281        input: LanguageModelInput,1282        config: RunnableConfig | None = None,1283        *,1284        version: Literal["v1", "v2", "v3"] = "v2",1285        stop: list[str] | None = None,1286        **kwargs: Any,1287    ) -> Iterator[StreamEvent] | ChatModelStream:1288        """Stream events from this chat model.12891290        For `version="v1"` / `"v2"`, yields `StreamEvent` dicts (see1291        `Runnable.stream_events`). For `version="v3"`, returns a1292        `ChatModelStream` exposing typed projections (`.text`,1293        `.reasoning`, `.tool_calls`, `.output`).12941295        !!! warning "Beta"12961297            `version="v3"` is in beta. The protocol shape, return type,1298            and surface area may change in future releases. Calling it1299            emits a `LangChainBetaWarning` at runtime.13001301        !!! note "v3 always produces v1-shaped content"13021303            `ChatModelStream.output.content` is always a list of v11304            content blocks (text / reasoning / tool_call / image / …),1305            regardless of the model's `output_version` attribute. The1306            setting only affects the legacy `stream()` / `astream()` /1307            `invoke()` paths. If you're mixing1308            `stream_events(version="v3")` with those paths in the same1309            pipeline and need a consistent output shape across them,1310            set `output_version="v1"` on the model.13111312        Args:1313            input: The model input.1314            config: Optional runnable config.1315            version: Streaming-event schema version. `"v3"` selects the1316                content-block-centric streaming protocol.1317            stop: Optional stop sequences. Only used for `version="v3"`;1318                ignored otherwise.1319            **kwargs: Additional keyword arguments. For `version="v3"`,1320                forwarded to the model.13211322        Returns:1323            For `version="v3"`, a `ChatModelStream` with typed1324            projections. Otherwise an `Iterator[StreamEvent]`.1325        """1326        if version == "v3":1327            return self._chat_model_stream_v3(input, config, stop=stop, **kwargs)1328        return super().stream_events(1329            input, config, version=version, stop=stop, **kwargs1330        )13311332    @overload1333    def astream_events(1334        self,1335        input: LanguageModelInput,1336        config: RunnableConfig | None = None,1337        *,1338        version: Literal["v1", "v2"] = "v2",1339        **kwargs: Any,1340    ) -> AsyncIterator[StreamEvent]: ...13411342    @overload1343    def astream_events(1344        self,1345        input: LanguageModelInput,1346        config: RunnableConfig | None = None,1347        *,1348        version: Literal["v3"],1349        stop: list[str] | None = None,1350        **kwargs: Any,1351    ) -> Awaitable[AsyncChatModelStream]: ...13521353    def astream_events(1354        self,1355        input: LanguageModelInput,1356        config: RunnableConfig | None = None,1357        *,1358        version: Literal["v1", "v2", "v3"] = "v2",1359        stop: list[str] | None = None,1360        **kwargs: Any,1361    ) -> AsyncIterator[StreamEvent] | Awaitable[AsyncChatModelStream]:1362        """Async variant of `stream_events`. See `stream_events` for full docs."""1363        if version == "v3":1364            return self._achat_model_stream_v3(input, config, stop=stop, **kwargs)1365        # v1/v2: forward to Runnable.astream_events (async generator).1366        return super().astream_events(1367            input, config, version=version, stop=stop, **kwargs1368        )13691370    # --- Custom methods ---13711372    def _combine_llm_outputs(1373        self, _llm_outputs: list[builtins.dict[str, Any] | None], /1374    ) -> builtins.dict[str, Any]:1375        return {}13761377    def _convert_cached_generations(1378        self, cache_val: list[Generation]1379    ) -> list[ChatGeneration]:1380        """Convert cached Generation objects to ChatGeneration objects.13811382        Handle case where cache contains Generation objects instead of1383        ChatGeneration objects. This can happen due to serialization/deserialization1384        issues or legacy cache data (see #22389).13851386        Args:1387            cache_val: List of cached generation objects.13881389        Returns:1390            List of ChatGeneration objects.13911392        """1393        converted_generations = []1394        for gen in cache_val:1395            if isinstance(gen, Generation) and not isinstance(gen, ChatGeneration):1396                # Convert Generation to ChatGeneration by creating AIMessage1397                # from the text content1398                chat_gen = ChatGeneration(1399                    message=AIMessage(content=gen.text),1400                    generation_info=gen.generation_info,1401                )1402                converted_generations.append(chat_gen)1403            else:1404                # Already a ChatGeneration or other expected type1405                if hasattr(gen, "message") and isinstance(gen.message, AIMessage):1406                    # We zero out cost on cache hits1407                    gen.message = gen.message.model_copy(1408                        update={1409                            "usage_metadata": {1410                                **(gen.message.usage_metadata or {}),1411                                "total_cost": 0,1412                            }1413                        }1414                    )1415                converted_generations.append(gen)1416        return converted_generations14171418    def _replay_v2_events_for_cache_hit(1419        self,1420        generations: list[ChatGeneration],1421        *,1422        run_manager: CallbackManagerForLLMRun | None,1423        **kwargs: Any,1424    ) -> None:1425        """Replay cached messages as v2 events when a v2 handler is attached.14261427        A warm cache must produce the same `on_stream_event` stream as a1428        cold call so LangGraph-style consumers do not observe behavior1429        that depends on cache state. Gated by1430        `_should_use_protocol_streaming` so a `disable_streaming` config1431        that suppresses v2 on cold calls also suppresses it here.1432        """1433        if run_manager is None or not self._should_use_protocol_streaming(1434            async_api=False, run_manager=run_manager, **kwargs1435        ):1436            return1437        message_id = f"{LC_ID_PREFIX}-{run_manager.run_id}"1438        for gen in generations:1439            msg = getattr(gen, "message", None)1440            if not isinstance(msg, AIMessage):1441                continue1442            for event in message_to_events(msg, message_id=message_id):1443                run_manager.on_stream_event(event)14441445    async def _areplay_v2_events_for_cache_hit(1446        self,1447        generations: list[ChatGeneration],1448        *,1449        run_manager: AsyncCallbackManagerForLLMRun | None,1450        **kwargs: Any,1451    ) -> None:1452        """Async counterpart to `_replay_v2_events_for_cache_hit`."""1453        if run_manager is None or not self._should_use_protocol_streaming(1454            async_api=True, run_manager=run_manager, **kwargs1455        ):1456            return1457        message_id = f"{LC_ID_PREFIX}-{run_manager.run_id}"1458        for gen in generations:1459            msg = getattr(gen, "message", None)1460            if not isinstance(msg, AIMessage):1461                continue1462            async for event in amessage_to_events(msg, message_id=message_id):1463                await run_manager.on_stream_event(event)14641465    def _get_invocation_params(1466        self,1467        stop: list[str] | None = None,1468        **kwargs: Any,1469    ) -> builtins.dict[str, Any]:1470        params = self._dict_for_compat()1471        params["stop"] = stop1472        return {**params, **kwargs}14731474    def _get_ls_params(1475        self,1476        stop: list[str] | None = None,1477        **kwargs: Any,1478    ) -> LangSmithParams:1479        """Get standard params for LangSmith tracing.14801481        Subclasses **should override** this method to populate `ls_provider`1482        and `ls_model_name` from provider-specific attributes (e.g. `self.model`,1483        `self.model_name`, `self.model_id`) and to honor per-call overrides1484        passed via `kwargs["model"]` so that runtime `bind`/`invoke` model1485        changes are reflected in traces.14861487        The implementation here is a best-effort fallback for subclasses that1488        do not override it. It is not part of a stable contract and the1489        derivation rules may change:14901491        - `ls_provider` is derived from the class name by stripping a leading1492            or trailing `"Chat"` and lowercasing the remainder. This produces1493            ugly values for multi-word providers (e.g. `ChatGoogleGenerativeAI`1494            would become `"googlegenerativeai"`).14951496            Override to set a stable, conventional value1497            such as `"google_genai"`.1498        - `ls_model_name` is resolved from `kwargs["model"]`, then1499            `self.model`, then `self.model_name`.15001501            Subclasses whose model attribute has a different name1502            (`model_id`, `deployment_name`, ...) must override.1503        """1504        # get default provider from class name1505        default_provider = self.__class__.__name__1506        if default_provider.startswith("Chat"):1507            default_provider = default_provider[4:].lower()1508        elif default_provider.endswith("Chat"):1509            default_provider = default_provider[:-4]1510        default_provider = default_provider.lower()15111512        ls_params = LangSmithParams(ls_provider=default_provider, ls_model_type="chat")1513        if stop:1514            ls_params["ls_stop"] = stop15151516        # model1517        if "model" in kwargs and isinstance(kwargs["model"], str):1518            ls_params["ls_model_name"] = kwargs["model"]1519        elif hasattr(self, "model") and isinstance(self.model, str):1520            ls_params["ls_model_name"] = self.model1521        elif hasattr(self, "model_name") and isinstance(self.model_name, str):1522            ls_params["ls_model_name"] = self.model_name15231524        # temperature1525        if "temperature" in kwargs and isinstance(kwargs["temperature"], (int, float)):1526            ls_params["ls_temperature"] = kwargs["temperature"]1527        elif hasattr(self, "temperature") and isinstance(1528            self.temperature, (int, float)1529        ):1530            ls_params["ls_temperature"] = self.temperature15311532        # max_tokens1533        if "max_tokens" in kwargs and isinstance(kwargs["max_tokens"], int):1534            ls_params["ls_max_tokens"] = kwargs["max_tokens"]1535        elif hasattr(self, "max_tokens") and isinstance(self.max_tokens, int):1536            ls_params["ls_max_tokens"] = self.max_tokens15371538        return ls_params15391540    def _get_ls_params_with_defaults(1541        self,1542        stop: list[str] | None = None,1543        **kwargs: Any,1544    ) -> LangSmithParams:1545        """Wrap _get_ls_params to always include ls_integration."""1546        ls_params = self._get_ls_params(stop=stop, **kwargs)1547        ls_params["ls_integration"] = "langchain_chat_model"1548        return ls_params15491550    def _get_llm_string(self, stop: list[str] | None = None, **kwargs: Any) -> str:1551        if self.is_lc_serializable():1552            params = {**kwargs, "stop": stop}1553            param_string = str(sorted(params.items()))1554            # This code is not super efficient as it goes back and forth between1555            # json and dict.1556            serialized_repr = self._serialized1557            _cleanup_llm_representation(serialized_repr, 1)1558            llm_string = json.dumps(serialized_repr, sort_keys=True)1559            return llm_string + "---" + param_string1560        params = self._get_invocation_params(stop=stop, **kwargs)1561        params = {**params, **kwargs}1562        return str(sorted(params.items()))15631564    def generate(1565        self,1566        messages: list[list[BaseMessage]],1567        stop: list[str] | None = None,1568        callbacks: Callbacks = None,1569        *,1570        tags: list[str] | None = None,1571        metadata: builtins.dict[str, Any] | None = None,1572        run_name: str | None = None,1573        run_id: uuid.UUID | None = None,1574        **kwargs: Any,1575    ) -> LLMResult:1576        """Pass a sequence of prompts to the model and return model generations.15771578        This method should make use of batched calls for models that expose a batched1579        API.15801581        Use this method when you want to:15821583        1. Take advantage of batched calls,1584        2. Need more output from the model than just the top generated value,1585        3. Are building chains that are agnostic to the underlying language model1586            type (e.g., pure text completion models vs chat models).15871588        Args:1589            messages: List of list of messages.1590            stop: Stop words to use when generating.15911592                Model output is cut off at the first occurrence of any of these1593                substrings.1594            callbacks: `Callbacks` to pass through.15951596                Used for executing additional functionality, such as logging or1597                streaming, throughout generation.1598            tags: The tags to apply.1599            metadata: The metadata to apply.1600            run_name: The name of the run.1601            run_id: The ID of the run.1602            **kwargs: Arbitrary additional keyword arguments.16031604                These are usually passed to the model provider API call.16051606        Returns:1607            An `LLMResult`, which contains a list of candidate `Generations` for each1608                input prompt and additional model provider-specific output.16091610        """1611        ls_structured_output_format = kwargs.pop(1612            "ls_structured_output_format", None1613        ) or kwargs.pop("structured_output_format", None)1614        ls_structured_output_format_dict = _format_ls_structured_output(1615            ls_structured_output_format1616        )16171618        params = self._get_invocation_params(stop=stop, **kwargs)1619        options = {"stop": stop, **ls_structured_output_format_dict}1620        inheritable_metadata = {1621            **(metadata or {}),1622            **self._get_ls_params_with_defaults(stop=stop, **kwargs),1623        }16241625        callback_manager = CallbackManager.configure(1626            callbacks,1627            self.callbacks,1628            self.verbose,1629            tags,1630            self.tags,1631            inheritable_metadata,1632            self.metadata,1633            langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(1634                params1635            ),1636        )1637        messages_to_trace = [1638            _format_for_tracing(message_list) for message_list in messages1639        ]1640        run_managers = callback_manager.on_chat_model_start(1641            self._serialized,1642            messages_to_trace,1643            invocation_params=params,1644            options=options,1645            name=run_name,1646            run_id=run_id,1647            batch_size=len(messages),1648        )1649        results = []1650        input_messages = [1651            _normalize_messages(message_list) for message_list in messages1652        ]1653        for i, m in enumerate(input_messages):1654            try:1655                results.append(1656                    self._generate_with_cache(1657                        m,1658                        stop=stop,1659                        run_manager=run_managers[i] if run_managers else None,1660                        **kwargs,1661                    )1662                )1663            except BaseException as e:1664                if run_managers:1665                    generations_with_error_metadata = _generate_response_from_error(e)1666                    run_managers[i].on_llm_error(1667                        e,1668                        response=LLMResult(1669                            generations=[generations_with_error_metadata]1670                        ),1671                    )1672                raise1673        flattened_outputs = [1674            LLMResult(generations=[res.generations], llm_output=res.llm_output)1675            for res in results1676        ]1677        llm_output = self._combine_llm_outputs([res.llm_output for res in results])1678        generations = [res.generations for res in results]1679        output = LLMResult(generations=generations, llm_output=llm_output)1680        if run_managers:1681            run_infos = []1682            for manager, flattened_output in zip(1683                run_managers, flattened_outputs, strict=False1684            ):1685                manager.on_llm_end(flattened_output)1686                run_infos.append(RunInfo(run_id=manager.run_id))1687            output.run = run_infos1688        return output16891690    async def agenerate(1691        self,1692        messages: list[list[BaseMessage]],1693        stop: list[str] | None = None,1694        callbacks: Callbacks = None,1695        *,1696        tags: list[str] | None = None,1697        metadata: builtins.dict[str, Any] | None = None,1698        run_name: str | None = None,1699        run_id: uuid.UUID | None = None,1700        **kwargs: Any,1701    ) -> LLMResult:1702        """Asynchronously pass a sequence of prompts to a model and return generations.17031704        This method should make use of batched calls for models that expose a batched1705        API.17061707        Use this method when you want to:17081709        1. Take advantage of batched calls,1710        2. Need more output from the model than just the top generated value,1711        3. Are building chains that are agnostic to the underlying language model1712            type (e.g., pure text completion models vs chat models).17131714        Args:1715            messages: List of list of messages.1716            stop: Stop words to use when generating.17171718                Model output is cut off at the first occurrence of any of these1719                substrings.1720            callbacks: `Callbacks` to pass through.17211722                Used for executing additional functionality, such as logging or1723                streaming, throughout generation.1724            tags: The tags to apply.1725            metadata: The metadata to apply.1726            run_name: The name of the run.1727            run_id: The ID of the run.1728            **kwargs: Arbitrary additional keyword arguments.17291730                These are usually passed to the model provider API call.17311732        Returns:1733            An `LLMResult`, which contains a list of candidate `Generations` for each1734                input prompt and additional model provider-specific output.17351736        """1737        ls_structured_output_format = kwargs.pop(1738            "ls_structured_output_format", None1739        ) or kwargs.pop("structured_output_format", None)1740        ls_structured_output_format_dict = _format_ls_structured_output(1741            ls_structured_output_format1742        )17431744        params = self._get_invocation_params(stop=stop, **kwargs)1745        options = {"stop": stop, **ls_structured_output_format_dict}1746        inheritable_metadata = {1747            **(metadata or {}),1748            **self._get_ls_params_with_defaults(stop=stop, **kwargs),1749        }17501751        callback_manager = AsyncCallbackManager.configure(1752            callbacks,1753            self.callbacks,1754            self.verbose,1755            tags,1756            self.tags,1757            inheritable_metadata,1758            self.metadata,1759            langsmith_inheritable_metadata=_filter_invocation_params_for_tracing(1760                params1761            ),1762        )17631764        messages_to_trace = [1765            _format_for_tracing(message_list) for message_list in messages1766        ]1767        run_managers = await callback_manager.on_chat_model_start(1768            self._serialized,1769            messages_to_trace,1770            invocation_params=params,1771            options=options,1772            name=run_name,1773            batch_size=len(messages),1774            run_id=run_id,1775        )17761777        input_messages = [1778            _normalize_messages(message_list) for message_list in messages1779        ]1780        results = await asyncio.gather(1781            *[1782                self._agenerate_with_cache(1783                    m,1784                    stop=stop,1785                    run_manager=run_managers[i] if run_managers else None,1786                    **kwargs,1787                )1788                for i, m in enumerate(input_messages)1789            ],1790            return_exceptions=True,1791        )1792        exceptions = []1793        for i, res in enumerate(results):1794            if isinstance(res, BaseException):1795                if run_managers:1796                    generations_with_error_metadata = _generate_response_from_error(res)1797                    await run_managers[i].on_llm_error(1798                        res,1799                        response=LLMResult(1800                            generations=[generations_with_error_metadata]1801                        ),1802                    )1803                exceptions.append(res)1804        if exceptions:1805            if run_managers:1806                await asyncio.gather(1807                    *[1808                        run_manager.on_llm_end(1809                            LLMResult(1810                                generations=[res.generations],  # type: ignore[union-attr]1811                                llm_output=res.llm_output,  # type: ignore[union-attr]1812                            )1813                        )1814                        for run_manager, res in zip(run_managers, results, strict=False)1815                        if not isinstance(res, Exception)1816                    ]1817                )1818            raise exceptions[0]1819        flattened_outputs = [1820            LLMResult(generations=[res.generations], llm_output=res.llm_output)  # type: ignore[union-attr]1821            for res in results1822        ]1823        llm_output = self._combine_llm_outputs([res.llm_output for res in results])  # type: ignore[union-attr]1824        generations = [res.generations for res in results]  # type: ignore[union-attr]1825        output = LLMResult(generations=generations, llm_output=llm_output)1826        await asyncio.gather(1827            *[1828                run_manager.on_llm_end(flattened_output)1829                for run_manager, flattened_output in zip(1830                    run_managers, flattened_outputs, strict=False1831                )1832            ]1833        )1834        if run_managers:1835            output.run = [1836                RunInfo(run_id=run_manager.run_id) for run_manager in run_managers1837            ]1838        return output18391840    @override1841    def generate_prompt(1842        self,1843        prompts: list[PromptValue],1844        stop: list[str] | None = None,1845        callbacks: Callbacks = None,1846        **kwargs: Any,1847    ) -> LLMResult:1848        prompt_messages = [p.to_messages() for p in prompts]1849        return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)18501851    @override1852    async def agenerate_prompt(1853        self,1854        prompts: list[PromptValue],1855        stop: list[str] | None = None,1856        callbacks: Callbacks = None,1857        **kwargs: Any,1858    ) -> LLMResult:1859        prompt_messages = [p.to_messages() for p in prompts]1860        return await self.agenerate(1861            prompt_messages, stop=stop, callbacks=callbacks, **kwargs1862        )18631864    def _generate_with_cache(1865        self,1866        messages: list[BaseMessage],1867        stop: list[str] | None = None,1868        run_manager: CallbackManagerForLLMRun | None = None,1869        **kwargs: Any,1870    ) -> ChatResult:1871        llm_cache = self.cache if isinstance(self.cache, BaseCache) else get_llm_cache()1872        # We should check the cache unless it's explicitly set to False1873        # A None cache means we should use the default global cache1874        # if it's configured.1875        check_cache = self.cache or self.cache is None1876        if check_cache:1877            if llm_cache:1878                llm_string = self._get_llm_string(stop=stop, **kwargs)1879                normalized_messages = [1880                    (1881                        msg.model_copy(update={"id": None})1882                        if getattr(msg, "id", None) is not None1883                        else msg1884                    )1885                    for msg in messages1886                ]1887                prompt = dumps(normalized_messages)1888                cache_val = llm_cache.lookup(prompt, llm_string)1889                if isinstance(cache_val, list):1890                    converted_generations = self._convert_cached_generations(cache_val)1891                    self._replay_v2_events_for_cache_hit(1892                        converted_generations,1893                        run_manager=run_manager,1894                        **kwargs,1895                    )1896                    return ChatResult(generations=converted_generations)1897            elif self.cache is None:1898                pass1899            else:1900                msg = "Asked to cache, but no cache found at `langchain.cache`."1901                raise ValueError(msg)19021903        # Apply the rate limiter after checking the cache, since1904        # we usually don't want to rate limit cache lookups, but1905        # we do want to rate limit API requests.1906        if self.rate_limiter:1907            self.rate_limiter.acquire(blocking=True)19081909        # v2 streaming: preferred over v1 when any attached handler opts in via1910        # `_V2StreamingCallbackHandler`. Drives the protocol event generator1911        # (native or `_stream` compat bridge) through the shared helper so1912        # `on_stream_event` fires per event, then returns a normal `ChatResult`1913        # so caching / `on_llm_end` stay on the existing generate path.1914        if self._should_use_protocol_streaming(1915            async_api=False,1916            run_manager=run_manager,1917            **kwargs,1918        ):1919            stream_accum = ChatModelStream(1920                message_id=(1921                    f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None1922                )1923            )1924            assert run_manager is not None  # noqa: S1011925            for _event in self._iter_v2_events(1926                messages,1927                run_manager=run_manager,1928                stream=stream_accum,1929                stop=stop,1930                **kwargs,1931            ):1932                pass1933            if stream_accum.output_message is None:1934                msg = "v2 stream finished without producing a message"1935                raise RuntimeError(msg)1936            result = ChatResult(1937                generations=[ChatGeneration(message=stream_accum.output_message)]1938            )1939        # If stream is not explicitly set, check if implicitly requested by1940        # astream_events() or astream_log(). Bail out if _stream not implemented1941        elif self._should_stream(1942            async_api=False,1943            run_manager=run_manager,1944            **kwargs,1945        ):1946            chunks: list[ChatGenerationChunk] = []1947            run_id: str | None = (1948                f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None1949            )1950            yielded = False1951            index = -11952            index_type = ""1953            for chunk in self._stream(messages, stop=stop, **kwargs):1954                chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)1955                if self.output_version == "v1":1956                    # Overwrite .content with .content_blocks1957                    chunk.message = _update_message_content_to_blocks(1958                        chunk.message, "v1"1959                    )1960                    for block in cast(1961                        "list[types.ContentBlock]", chunk.message.content1962                    ):1963                        if block["type"] != index_type:1964                            index_type = block["type"]1965                            index += 11966                        if "index" not in block:1967                            block["index"] = index1968                if run_manager:1969                    if chunk.message.id is None:1970                        chunk.message.id = run_id1971                    run_manager.on_llm_new_token(chunk.message.content, chunk=chunk)1972                chunks.append(chunk)1973                yielded = True19741975            # Yield a final empty chunk with chunk_position="last" if not yet yielded1976            if (1977                yielded1978                and isinstance(chunk.message, AIMessageChunk)1979                and not chunk.message.chunk_position1980            ):1981                empty_content: str | list[str | dict[str, Any]] = (1982                    "" if isinstance(chunk.message.content, str) else []1983                )1984                chunk = ChatGenerationChunk(1985                    message=AIMessageChunk(1986                        content=empty_content, chunk_position="last", id=run_id1987                    )1988                )1989                if run_manager:1990                    run_manager.on_llm_new_token("", chunk=chunk)1991                chunks.append(chunk)1992            result = generate_from_stream(iter(chunks))1993        elif inspect.signature(self._generate).parameters.get("run_manager"):1994            result = self._generate(1995                messages, stop=stop, run_manager=run_manager, **kwargs1996            )1997        else:1998            result = self._generate(messages, stop=stop, **kwargs)19992000        if self.output_version == "v1":
Findings

✓ No findings reported for this file.
Findings

Get this view in your editor