libs/core/langchain_core/language_models/chat_model_stream.py · langchain-ai/langchain

1"""Per-message streaming objects for content-block protocol events.23`ChatModelStream` is the synchronous variant returned by4`BaseChatModel.stream_events(version="v3")`.  `AsyncChatModelStream` is the5asynchronous variant returned by `BaseChatModel.astream_events(version="v3")`.67Both expose typed projection properties (`.text`, `.reasoning`,8`.tool_calls`, `.usage`, `.output`) that accumulate protocol9events as they arrive.  Projections can be iterated for deltas or10drained for the final accumulated value.1112Raw protocol events are also available via direct iteration on the13stream object (replay-buffer semantics — multiple independent14consumers supported).15"""1617from __future__ import annotations1819import asyncio20import contextlib21from typing import TYPE_CHECKING, Any, cast2223from langchain_core.language_models._compat_bridge import finalize_tool_call_chunk24from langchain_core.messages import AIMessage2526if TYPE_CHECKING:27    from collections.abc import Awaitable, Callable, Generator, Iterator, Mapping2829    from langchain_protocol.protocol import (30        ContentBlockDeltaData,31        ContentBlockFinishData,32        FinalizedContentBlock,33        InvalidToolCall,34        MessageFinishData,35        MessageMetadata,36        MessagesData,37        MessageStartData,38        ReasoningContentBlock,39        ServerToolCallChunk,40        TextContentBlock,41        ToolCall,42        ToolCallChunk,43        UsageInfo,44    )45    from typing_extensions import Self464748# ---------------------------------------------------------------------------49# Tool-call chunk helpers (shared by tool_call_chunk and server_tool_call_chunk)50# ---------------------------------------------------------------------------515253def _merge_chunk_into_store(54    store: dict[int, dict[str, Any]],55    idx: int,56    block: dict[str, Any],57) -> None:58    """Merge a tool-call-chunk delta: sticky id/name, concat args."""59    existing = store.get(idx, {})60    if block.get("id") and "id" not in existing:61        existing["id"] = block["id"]62    if block.get("name") and "name" not in existing:63        existing["name"] = block["name"]64    existing["args"] = existing.get("args", "") + (block.get("args") or "")65    store[idx] = existing666768def _merge_block_delta_into_store(69    store: dict[int, dict[str, Any]],70    idx: int,71    fields: dict[str, Any],72) -> None:73    """Shallow-merge a block-delta snapshot into an indexed chunk store."""74    existing = store.get(idx, {})75    for key, value in fields.items():76        if value is not None:77            existing[key] = value78    store[idx] = existing798081def _event_content_block(data: Mapping[str, Any]) -> dict[str, Any] | None:82    """Return start/finish content, tolerating the pre-delta field name."""83    block = data.get("content") or data.get("content_block")84    return block if isinstance(block, dict) else None858687def _legacy_block_to_delta(block: Mapping[str, Any]) -> dict[str, Any]:88    """Convert the old content-block delta shape to an explicit delta."""89    btype = block.get("type")90    if btype == "text":91        return {"type": "text-delta", "text": block.get("text", "")}92    if btype == "reasoning":93        return {94            "type": "reasoning-delta",95            "reasoning": block.get("reasoning", ""),96        }97    if "data" in block:98        delta = {"type": "data-delta", "data": block.get("data", "")}99        if block.get("encoding") == "base64":100            delta["encoding"] = "base64"101        return delta102    return {"type": "legacy-block-delta", "fields": block}103104105def _event_delta(data: Mapping[str, Any]) -> dict[str, Any] | None:106    """Return an explicit delta, converting legacy content-block deltas."""107    delta = data.get("delta")108    if isinstance(delta, dict):109        return delta110    block = data.get("content_block")111    if isinstance(block, dict):112        return _legacy_block_to_delta(block)113    return None114115116def _sweep_chunk_store(117    store: dict[int, dict[str, Any]],118    *,119    finalized_type: str,120    finalized_blocks: dict[int, FinalizedContentBlock],121    tool_calls_acc: list[ToolCall] | None,122    invalid_acc: list[InvalidToolCall],123) -> None:124    """Parse each unswept chunk's `args`; record as `finalized_type` or invalid.125126    `tool_calls_acc` is only populated when `finalized_type == "tool_call"`127    (server-side calls don't surface through `.tool_calls`).128129    Deliberately does not backfill `index` onto finalized tool-call blocks:130    matches v1 (`AIMessage.init_tool_calls` drops `index` when substituting131    `tool_call_chunk` → `tool_call`) and prevents `merge_lists` from132    re-merging further chunks into an already-parsed args dict.133    """134    for idx in sorted(store):135        chunk = store[idx]136        # Carry over any non-finalize-rewritten fields the chunk collected137        # (e.g., `extras`). `_merge_chunk_into_store` only populates138        # `id` / `name` / `args`, so this is empty in practice today;139        # future provider-specific fields would flow through here.140        extras = {141            k: v142            for k, v in chunk.items()143            if k not in {"type", "id", "name", "args"} and v is not None144        }145        final_block = finalize_tool_call_chunk(146            raw_args=chunk.get("args"),147            id_=chunk.get("id"),148            name=chunk.get("name"),149            extras=extras,150            finalized_type=finalized_type,151        )152        if final_block["type"] == "invalid_tool_call":153            invalid_acc.append(final_block)154        elif tool_calls_acc is not None and finalized_type == "tool_call":155            tool_calls_acc.append(cast("ToolCall", final_block))156        finalized_blocks[idx] = final_block157    store.clear()158159160# ---------------------------------------------------------------------------161# Projection base — shared producer API162# ---------------------------------------------------------------------------163164165class _ProjectionBase:166    """Shared state and producer API for sync and async projections.167168    The `push` / `complete` / `fail` methods are the producer-side169    API — called by the stream as events arrive. Subclasses add the170    consumer protocol (sync iteration or async iteration + await).171172    `done` and `error` are safe read-only views of the terminal state173    for iterators and other siblings that need to observe lifecycle174    without reaching into the underlying fields.175    """176177    __slots__ = ("_deltas", "_done", "_error", "_final_set", "_final_value")178179    def __init__(self) -> None:180        """Initialize empty projection state."""181        self._deltas: list[Any] = []182        self._final_value: Any = None183        self._final_set: bool = False184        self._done: bool = False185        self._error: BaseException | None = None186187    @property188    def done(self) -> bool:189        """Whether the projection has finished (successfully or via error)."""190        return self._done191192    @property193    def error(self) -> BaseException | None:194        """The terminal error, if any."""195        return self._error196197    def push(self, delta: Any) -> None:198        """Append a delta value. Producer-side API."""199        self._deltas.append(delta)200201    def complete(self, final_value: Any) -> None:202        """Set the final accumulated value and mark as done. Producer-side API."""203        self._final_value = final_value204        self._final_set = True205        self._done = True206207    def fail(self, error: BaseException) -> None:208        """Mark as errored. Producer-side API."""209        self._error = error210        self._done = True211212213# ---------------------------------------------------------------------------214# Sync projections215# ---------------------------------------------------------------------------216217218class SyncProjection(_ProjectionBase):219    """Sync iterable of deltas with pull-based backpressure.220221    Follows the same `_request_more` convention as langgraph's222    `EventLog`: when the cursor catches up to the buffer and the223    projection is not done, it calls `_request_more()` to pull more224    events from the producer.225226    Each call to `__iter__` creates a new cursor at position 0.227    Multiple iterators replay all deltas from the start.228    """229230    __slots__ = ("_ensure_started", "_request_more")231232    def __init__(self) -> None:233        """Initialize with no pull callback."""234        super().__init__()235        self._ensure_started: Callable[[], None] | None = None236        self._request_more: Callable[[], bool] | None = None237238    def set_start(self, cb: Callable[[], None] | None) -> None:239        """Install a lazy-start callback invoked on first consumption."""240        self._ensure_started = cb241242    def set_request_more(self, cb: Callable[[], bool] | None) -> None:243        """Install the pull callback the iterator uses to drain the source."""244        self._request_more = cb245246    def __iter__(self) -> Iterator[Any]:247        """Yield deltas, pulling via `_request_more` when caught up."""248        if self._ensure_started is not None:249            self._ensure_started()250        cursor = 0251        while True:252            if cursor < len(self._deltas):253                yield self._deltas[cursor]254                cursor += 1255            elif self._error is not None:256                raise self._error257            elif self._done:258                return259            elif self._request_more is not None:260                while cursor >= len(self._deltas) and not self._done:261                    if not self._request_more():262                        break263                if cursor >= len(self._deltas):264                    if self._error is not None:265                        raise self._error266                    return267            else:268                return269270    def get(self) -> Any:271        """Drain via `_request_more` and return the final value."""272        if self._ensure_started is not None:273            self._ensure_started()274        if not self._done and self._request_more is not None:275            while not self._done:276                if not self._request_more():277                    break278        if self._error is not None:279            raise self._error280        return self._final_value281282283class SyncTextProjection(SyncProjection):284    """String-specialized sync projection.285286    Adds `__str__`, `__bool__`, `__repr__` for ergonomic use with287    `.text` and `.reasoning` projections.288    """289290    __slots__ = ()291292    def __str__(self) -> str:293        """Drain and return the full accumulated string."""294        val = self.get()295        return val if val is not None else ""296297    def __bool__(self) -> bool:298        """Return whether any deltas have been pushed."""299        return len(self._deltas) > 0300301    def __repr__(self) -> str:302        """Return repr of the accumulated text so far."""303        if self._final_set:304            return repr(self._final_value)305        return repr("".join(self._deltas))306307308# ---------------------------------------------------------------------------309# Async projection310# ---------------------------------------------------------------------------311312313class AsyncProjection(_ProjectionBase):314    """Async iterable of deltas that is also awaitable for the final value.315316    Uses an `asyncio.Event` to notify consumers of state changes. Each317    waiter — the awaitable (`__await__`) and each async iterator cursor318    — shares the event and re-checks its own condition on wake. The event319    is cleared before a waiter awaits, so stale "something happened"320    signals don't cause spin loops.321322    This is single-loop only — producers and consumers must share an323    event loop. If cross-thread wake is ever required, revert to a324    list-of-futures pattern with `call_soon_threadsafe`.325    """326327    __slots__ = ("_arequest_more", "_ensure_started", "_event")328329    def __init__(self) -> None:330        """Initialize with an un-set event and no pump callback."""331        super().__init__()332        self._event = asyncio.Event()333        self._arequest_more: Callable[[], Awaitable[bool]] | None = None334        self._ensure_started: Callable[[], Awaitable[None]] | None = None335336    def set_start(self, cb: Callable[[], Awaitable[None]] | None) -> None:337        """Install a lazy-start callback invoked on first consumption."""338        self._ensure_started = cb339340    def set_arequest_more(self, cb: Callable[[], Awaitable[bool]] | None) -> None:341        """Wire the async pull callback iterators use to drive the source.342343        Mirrors `SyncProjection.set_request_more`. Under caller-driven344        streaming, consumers call this callback when their buffer is345        empty so that the owning graph advances one step.346347        Args:348            cb: Async no-arg callable returning `True` when a new event349                was produced, `False` when the source is exhausted. Pass350                `None` to unwire.351        """352        self._arequest_more = cb353354    def push(self, delta: Any) -> None:355        """Append a delta and notify waiters."""356        super().push(delta)357        self._event.set()358359    def complete(self, final_value: Any) -> None:360        """Set the final value, mark done, and notify waiters."""361        super().complete(final_value)362        self._event.set()363364    def fail(self, error: BaseException) -> None:365        """Mark errored and notify waiters."""366        super().fail(error)367        self._event.set()368369    # -- Async iterable (yields deltas) ------------------------------------370371    def __aiter__(self) -> _AsyncProjectionIterator:372        """Return an async iterator over deltas."""373        return _AsyncProjectionIterator(self)374375    # -- Awaitable (returns final value) -----------------------------------376377    def __await__(self) -> Generator[Any, None, Any]:378        """Await the final accumulated value."""379        return self._await_impl().__await__()380381    async def _await_impl(self) -> Any:382        """Wait until the final value is set and return it.383384        When a caller-driven pump is wired via `set_arequest_more`, drive385        it instead of blocking on `self._event`; otherwise fall back to386        the event (used by tests that dispatch manually).387        """388        if self._ensure_started is not None:389            await self._ensure_started()390        while not self._final_set:391            if self._error is not None:392                raise self._error393            if self._arequest_more is not None:394                if not await self._arequest_more() and not self._final_set:395                    # Pump exhausted without completing this projection —396                    # nothing more will arrive. Return current state and397                    # let callers observe the missing final via the398                    # returned None / unset error.399                    break400            else:401                self._event.clear()402                await self._event.wait()403        if self._error is not None:404            raise self._error405        return self._final_value406407408class _AsyncProjectionIterator:409    """Async iterator over an `AsyncProjection`'s deltas."""410411    __slots__ = ("_offset", "_proj")412413    def __init__(self, proj: AsyncProjection) -> None:414        """Initialize cursor at position 0."""415        self._proj = proj416        self._offset = 0417418    def __aiter__(self) -> _AsyncProjectionIterator:419        """Return self for the async iteration protocol."""420        return self421422    async def __anext__(self) -> Any:423        """Return the next delta, awaiting if necessary.424425        When the projection has an `_arequest_more` pump wired, drain it426        in an inner loop (mirrors `SyncProjection.__iter__`) until this427        cursor advances or the pump reports exhaustion. Without a pump,428        fall back to waiting on the shared event.429        """430        proj = self._proj431        if proj._ensure_started is not None:  # noqa: SLF001432            await proj._ensure_started()  # noqa: SLF001433        while True:434            # Direct access to the projection's internal list/event is435            # intentional — the iterator is the projection's sidekick and436            # depends on reading the shared buffer by cursor.437            if self._offset < len(proj._deltas):  # noqa: SLF001438                item = proj._deltas[self._offset]  # noqa: SLF001439                self._offset += 1440                return item441            if proj.error is not None:442                raise proj.error443            if proj.done:444                raise StopAsyncIteration445            if proj._arequest_more is not None:  # noqa: SLF001446                # Caller-driven: drive the producer. Pump may land new447                # deltas for a sibling projection — loop until our cursor448                # advances, the projection terminates, or the pump is449                # exhausted.450                while (451                    self._offset >= len(proj._deltas)  # noqa: SLF001452                    and not proj.done453                ):454                    if not await proj._arequest_more():  # noqa: SLF001455                        break456                if (457                    self._offset >= len(proj._deltas)  # noqa: SLF001458                    and not proj.done459                ):460                    if proj.error is not None:461                        raise proj.error462                    raise StopAsyncIteration463            else:464                proj._event.clear()  # noqa: SLF001465                await proj._event.wait()  # noqa: SLF001466467468# ---------------------------------------------------------------------------469# Sync stream470# ---------------------------------------------------------------------------471472473class _ChatModelStreamBase:474    """Shared state and event dispatch for chat-model streams.475476    Holds accumulated protocol state (text, reasoning, tool calls,477    usage, metadata) and the event-dispatch machinery that drives the478    typed projections. `ChatModelStream` (sync) and479    `AsyncChatModelStream` (async) inherit from this base and add the480    projection types and consumer APIs for their flavor.481    """482483    # Projection instances — concrete subclasses create them as sync or484    # async variants in their own __init__ after calling super().485    _text_proj: _ProjectionBase486    _reasoning_proj: _ProjectionBase487    _tool_calls_proj: _ProjectionBase488489    def __init__(490        self,491        *,492        namespace: list[str] | None = None,493        node: str | None = None,494        message_id: str | None = None,495    ) -> None:496        self._namespace = namespace or []497        self._node = node498        self._message_id = message_id499500        # Accumulated state501        self._text_acc: str = ""502        self._reasoning_acc: str = ""503        # Per-block text / reasoning storage keyed by wire index. Used to504        # populate the finalized block payload without cross-contaminating505        # other blocks of the same type in the same message. Without506        # per-block storage the message-wide accumulator would bleed507        # earlier block text into later finalized blocks.508        self._text_per_block: dict[int, str] = {}509        self._reasoning_per_block: dict[int, str] = {}510        self._tool_call_chunks: dict[int, dict[str, Any]] = {}511        self._tool_calls_acc: list[ToolCall] = []512        self._invalid_tool_calls_acc: list[InvalidToolCall] = []513        self._server_tool_call_chunks: dict[int, dict[str, Any]] = {}514        # Ordered snapshot of every finalized block, keyed by event index.515        # Single source of truth for .output.content. Typed accumulators516        # (text/reasoning/tool_calls/invalid_tool_calls) continue to serve517        # the public projections.518        self._blocks: dict[int, FinalizedContentBlock] = {}519        self._usage_value: UsageInfo | None = None520        self._start_metadata: MessageMetadata | None = None521        self._finish_metadata: dict[str, Any] | None = None522        self._additional_kwargs: dict[str, Any] | None = None523        self._done: bool = False524        self._error: BaseException | None = None525        self._output_message: AIMessage | None = None526527        # Raw event replay buffer528        self._events: list[MessagesData] = []529530    # -- Common properties ------------------------------------------------531532    @property533    def namespace(self) -> list[str]:534        """Graph namespace path for this message."""535        return self._namespace536537    @property538    def node(self) -> str | None:539        """Graph node that produced this message."""540        return self._node541542    @property543    def message_id(self) -> str | None:544        """Stable message identifier."""545        return self._message_id546547    def set_message_id(self, message_id: str) -> None:548        """Assign the stable message identifier once the run starts.549550        Called by the stream driver (`stream_events(version="v3")` /551        `astream_events(version="v3")`) after `on_chat_model_start` produces a run552        id. Not intended for end-user code.553        """554        self._message_id = message_id555556    @property557    def done(self) -> bool:558        """Whether the stream has finished."""559        return self._done560561    @property562    def has_events(self) -> bool:563        """Whether any protocol events have been recorded."""564        return bool(self._events)565566    @property567    def output_message(self) -> AIMessage | None:568        """The assembled message if the stream has finished, else `None`.569570        Unlike `ChatModelStream.output` (which blocks until the stream571        finishes), this never pumps, blocks, or raises. Intended for the572        stream driver (`stream_events(version="v3")` and its async573        equivalent) to check whether the stream produced a message before574        firing `on_llm_end` callbacks.575        """576        return self._output_message577578    # -- Event ingestion (public) ------------------------------------------579580    def dispatch(self, event: Mapping[str, Any]) -> None:581        """Route a protocol event to the appropriate internal handler.582583        Public entry point for feeding events into the stream. Called by584        the stream driver (the `stream_events(version="v3")` pump and its585        async equivalent) and by any observer or test that needs to586        inject protocol events.587        """588        self._record_event(event)589        event_type = event.get("event")590        if event_type == "message-start":591            self._push_message_start(cast("MessageStartData", event))592        elif event_type == "content-block-delta":593            self._push_content_block_delta(cast("ContentBlockDeltaData", event))594        elif event_type == "content-block-finish":595            self._push_content_block_finish(cast("ContentBlockFinishData", event))596        elif event_type == "message-finish":597            self._finish(cast("MessageFinishData", event))598        elif event_type == "error":599            self.fail(RuntimeError(event.get("message", "Unknown error")))600        # content-block-start is informational — no accumulation needed601602    # -- Internal push API (called by dispatch) ----------------------------603604    def _record_event(self, event: Mapping[str, Any]) -> None:605        """Append a raw event to the replay buffer."""606        self._events.append(cast("MessagesData", event))607608    def _push_message_start(self, data: MessageStartData) -> None:609        """Process a `message-start` event."""610        self._start_metadata = data.get("metadata")611        message_id = data.get("id")612        if message_id:613            self._message_id = message_id614615    def _push_content_block_delta(self, data: ContentBlockDeltaData) -> None:616        """Process a `content-block-delta` event."""617        delta = _event_delta(data)618        if delta is None:619            return620        event_idx = data.get("index")621        dtype = delta.get("type", "")622623        if dtype == "text-delta":624            delta_text = delta.get("text", "")625            if delta_text:626                self._text_acc += delta_text627                if event_idx is not None:628                    self._text_per_block[event_idx] = (629                        self._text_per_block.get(event_idx, "") + delta_text630                    )631                self._text_proj.push(delta_text)632        elif dtype == "reasoning-delta":633            delta_r = delta.get("reasoning", "")634            if delta_r:635                self._reasoning_acc += delta_r636                if event_idx is not None:637                    self._reasoning_per_block[event_idx] = (638                        self._reasoning_per_block.get(event_idx, "") + delta_r639                    )640                self._reasoning_proj.push(delta_r)641        elif dtype == "block-delta":642            fields = delta.get("fields")643            if not isinstance(fields, dict):644                return645            btype = fields.get("type", "")646            if btype == "tool_call_chunk":647                tcc = cast("ToolCallChunk", fields)648                idx = data.get("index")649                if idx is None:650                    idx = tcc.get("index", len(self._tool_call_chunks))  # type: ignore[unreachable]651                _merge_block_delta_into_store(self._tool_call_chunks, idx, dict(tcc))652                chunk_block: ToolCallChunk = {653                    "type": "tool_call_chunk",654                    "id": tcc.get("id"),655                    "name": tcc.get("name"),656                    "args": tcc.get("args"),657                }658                if "index" in tcc:659                    chunk_block["index"] = tcc["index"]660                self._tool_calls_proj.push(chunk_block)661            elif btype == "server_tool_call_chunk":662                stcc = cast("ServerToolCallChunk", fields)663                idx = data.get("index")664                if idx is None:665                    idx = len(self._server_tool_call_chunks)  # type: ignore[unreachable]666                _merge_block_delta_into_store(667                    self._server_tool_call_chunks,668                    idx,669                    dict(stcc),670                )671        elif dtype == "legacy-block-delta":672            fields = delta.get("fields")673            if not isinstance(fields, dict):674                return675            btype = fields.get("type", "")676            if btype == "tool_call_chunk":677                tcc = cast("ToolCallChunk", fields)678                idx = data.get("index")679                if idx is None:680                    idx = tcc.get("index", len(self._tool_call_chunks))  # type: ignore[unreachable]681                _merge_chunk_into_store(self._tool_call_chunks, idx, dict(tcc))682                legacy_chunk_block: ToolCallChunk = {683                    "type": "tool_call_chunk",684                    "id": tcc.get("id"),685                    "name": tcc.get("name"),686                    "args": tcc.get("args"),687                }688                if "index" in tcc:689                    legacy_chunk_block["index"] = tcc["index"]690                self._tool_calls_proj.push(legacy_chunk_block)691            elif btype == "server_tool_call_chunk":692                stcc = cast("ServerToolCallChunk", fields)693                idx = data.get("index")694                if idx is None:695                    idx = len(self._server_tool_call_chunks)  # type: ignore[unreachable]696                _merge_chunk_into_store(697                    self._server_tool_call_chunks,698                    idx,699                    dict(stcc),700                )701        elif dtype == "data-delta":702            # Binary/modal payload deltas are reflected in the final703            # content-block finish event; there is no dedicated projection.704            return705        else:706            # Transitional legacy path for old `content_block` deltas that707            # should not be reachable after `_event_delta` conversion, kept708            # here for custom in-tree test fixtures or third-party emitters.709            block = data.get("content_block")710            if not isinstance(block, dict):711                return712            btype = block.get("type", "")713            if btype != "tool_call_chunk":714                return715            tcc = cast("ToolCallChunk", block)716            idx = data.get("index")717            if idx is None:718                idx = tcc.get("index", len(self._tool_call_chunks))  # type: ignore[unreachable]719            _merge_chunk_into_store(self._tool_call_chunks, idx, dict(tcc))720            fallback_chunk_block: ToolCallChunk = {721                "type": "tool_call_chunk",722                "id": tcc.get("id"),723                "name": tcc.get("name"),724                "args": tcc.get("args"),725            }726            if "index" in tcc:727                fallback_chunk_block["index"] = tcc["index"]728            self._tool_calls_proj.push(fallback_chunk_block)729730    def _resolve_block_text(self, idx: int | None, full_text: str) -> str:731        """Return authoritative text for a single text block at `idx`.732733        Prefers per-block delta accumulation; reconciles with the finish734        event's `full_text` when the provider emits authoritative text735        that differs from what the deltas built up.736737        Does not mutate `self._text_acc` (the delta-sum accumulator) —738        the message-wide projection value is derived from per-block739        storage at `_finish` time, so reconciliation remains correct740        regardless of finish ordering across blocks.741        """742        if idx is None:743            # No wire index — legacy behavior: use the message-wide744            # accumulator. Preserved for pre-index semantics; not745            # exercised by the compat bridge or any in-tree provider.746            if full_text and full_text != self._text_acc:747                self._text_acc = full_text748            return self._text_acc749        existing = self._text_per_block.get(idx, "")750        if full_text and full_text != existing:751            if not existing:752                # No deltas arrived for this block — surface the full753                # text as a single delta so the stream projection754                # reflects it.755                self._text_acc += full_text756                self._text_proj.push(full_text)757            elif full_text.startswith(existing):758                # Authoritative text extends the partial deltas — emit759                # the tail so delta consumers see the completion.760                tail = full_text[len(existing) :]761                self._text_acc += tail762                self._text_proj.push(tail)763            # else: authoritative text replaces the partial deltas764            # entirely. No corrective delta is emitted (semantics765            # would be ambiguous mid-stream). `_text_acc` is not766            # spliced — the final value is computed from per-block767            # storage at `_finish`, so this remains correct even when768            # other blocks have added to `_text_acc` in between.769            self._text_per_block[idx] = full_text770        return self._text_per_block.get(idx, "")771772    def _resolve_block_reasoning(self, idx: int | None, full_r: str) -> str:773        """Return authoritative reasoning text for a single block at `idx`.774775        Mirrors `_resolve_block_text` for the reasoning projection.776        """777        if idx is None:778            if full_r and full_r != self._reasoning_acc:779                self._reasoning_acc = full_r780            return self._reasoning_acc781        existing = self._reasoning_per_block.get(idx, "")782        if full_r and full_r != existing:783            if not existing:784                self._reasoning_acc += full_r785                self._reasoning_proj.push(full_r)786            elif full_r.startswith(existing):787                tail = full_r[len(existing) :]788                self._reasoning_acc += tail789                self._reasoning_proj.push(tail)790            self._reasoning_per_block[idx] = full_r791        return self._reasoning_per_block.get(idx, "")792793    def _push_content_block_finish(self, data: ContentBlockFinishData) -> None:794        """Process a `content-block-finish` event."""795        block = _event_content_block(data)796        if block is None:797            return798        btype = block.get("type", "")799        idx = data.get("index")800        finalized: FinalizedContentBlock | None = None801802        if btype == "text":803            text_block = cast("TextContentBlock", block)804            full_text = text_block.get("text", "")805            block_text = self._resolve_block_text(idx, full_text)806            finalized = cast(807                "FinalizedContentBlock",808                {809                    **text_block,810                    "type": "text",811                    "text": block_text,812                },813            )814        elif btype == "reasoning":815            reasoning_block = cast("ReasoningContentBlock", block)816            full_r = reasoning_block.get("reasoning", "")817            block_reasoning = self._resolve_block_reasoning(idx, full_r)818            # Keep provider-specific fields alongside the accumulated819            # reasoning text. Anthropic's `signature` arrives under820            # `extras` and is required on follow-up turns. Only overwrite821            # `reasoning` when we have accumulated content; OpenAI can822            # emit a reasoning block with no text deltas, and writing an823            # empty string there makes downstream serializers synthesize824            # an empty summary entry.825            finalized_dict: dict[str, Any] = {**reasoning_block, "type": "reasoning"}826            if block_reasoning:827                finalized_dict["reasoning"] = block_reasoning828            finalized = cast("FinalizedContentBlock", finalized_dict)829        elif btype == "tool_call":830            tcb = cast("ToolCall", block)831            # Preserve provider-specific fields (extras, etc.) on the832            # content block. `_assemble_message` separately projects the833            # minimal {id, name, args, type} shape onto834            # `AIMessage.tool_calls`. Strip `index` to match v1835            # (`AIMessage.init_tool_calls` rebuilds the block without836            # `index`); see `_finalize_block` in `_compat_bridge.py`.837            tc = cast(838                "ToolCall",839                {840                    **{k: v for k, v in tcb.items() if k != "index"},841                    "type": "tool_call",842                    "id": tcb.get("id", ""),843                    "name": tcb.get("name", ""),844                    "args": tcb.get("args", {}),845                },846            )847            self._tool_calls_acc.append(tc)848            if idx is not None and idx in self._tool_call_chunks:849                del self._tool_call_chunks[idx]850            finalized = tc851        elif btype == "invalid_tool_call":852            itc = cast("InvalidToolCall", block)853            # Strip `index` on the stored block to stay symmetric with854            # the `tool_call` path.855            itc = cast(856                "InvalidToolCall",857                {k: v for k, v in itc.items() if k != "index"},858            )859            self._invalid_tool_calls_acc.append(itc)860            # Critical: drop the stale chunk so _finish's sweep doesn't revive861            # it as an empty-args ToolCall.862            if idx is not None and idx in self._tool_call_chunks:863                del self._tool_call_chunks[idx]864            if idx is not None and idx in self._server_tool_call_chunks:865                del self._server_tool_call_chunks[idx]866            finalized = itc867        elif btype in {868            "server_tool_call",869            "server_tool_result",870            "image",871            "audio",872            "video",873            "file",874            "non_standard",875        }:876            if btype == "server_tool_call" and idx is not None:877                self._server_tool_call_chunks.pop(idx, None)878            finalized = cast("FinalizedContentBlock", block)879880        if finalized is not None and idx is not None:881            # Backfill the wire index onto the finalized block when the882            # source didn't supply one. `langchain_core.utils._merge`'s883            # block-merger (used by `AIMessageChunk.__add__` /884            # `add_ai_message_chunks`) keys on `block["index"]` to group885            # deltas into the same output block — without it, a v2-886            # assembled `AIMessage` that later re-enters the chunk887            # aggregation path won't merge cleanly. Client-side888            # `tool_call` / `invalid_tool_call` blocks are excluded: v1889            # finalization drops `index` on them so further deltas890            # cannot clobber already-parsed args, and v2 mirrors that.891            if btype not in {"tool_call", "invalid_tool_call"}:892                finalized.setdefault("index", idx)893            self._blocks[idx] = finalized894895    def _finish(self, data: MessageFinishData) -> None:896        """Process a `message-finish` event."""897        self._done = True898        self._usage_value = data.get("usage")899        self._finish_metadata = cast("dict[str, Any] | None", data.get("metadata"))900        # Off-spec extension carrying provider-side `additional_kwargs`901        # that don't map onto a typed protocol field (e.g. Gemini's902        # `__gemini_function_call_thought_signatures__`). The compat903        # bridge emits this on `message-finish` so the assembled message904        # carries the same data `ainvoke` would have preserved.905        self._additional_kwargs = cast(906            "dict[str, Any] | None",907            cast("dict[str, Any]", data).get("additional_kwargs"),908        )909910        # Finalize any unswept chunks — both client- and server-side.911        _sweep_chunk_store(912            self._tool_call_chunks,913            finalized_type="tool_call",914            finalized_blocks=self._blocks,915            tool_calls_acc=self._tool_calls_acc,916            invalid_acc=self._invalid_tool_calls_acc,917        )918        _sweep_chunk_store(919            self._server_tool_call_chunks,920            finalized_type="server_tool_call",921            finalized_blocks=self._blocks,922            tool_calls_acc=None,923            invalid_acc=self._invalid_tool_calls_acc,924        )925926        # Prefer the per-block sum when any indexed text / reasoning927        # arrived — it stays correct regardless of finish ordering and928        # of whether finish events carried authoritative text that929        # differed from the deltas. Fall back to the delta-sum930        # accumulator only for the legacy no-index path.931        if self._text_per_block:932            text_final = "".join(933                self._text_per_block[i] for i in sorted(self._text_per_block)934            )935        else:936            text_final = self._text_acc937        if self._reasoning_per_block:938            reasoning_final = "".join(939                self._reasoning_per_block[i] for i in sorted(self._reasoning_per_block)940            )941        else:942            reasoning_final = self._reasoning_acc943944        self._text_proj.complete(text_final)945        self._reasoning_proj.complete(reasoning_final)946        self._tool_calls_proj.complete(self._tool_calls_acc)947        self._output_message = self._assemble_message()948949    def fail(self, error: BaseException) -> None:950        """Mark the stream as errored and propagate to all projections.951952        Public API — called by the stream driver (`stream_events(version="v3")` /953        `astream_events(version="v3")`) when the underlying producer raises, by954        `dispatch` when an `error` protocol event arrives, and by955        cancellation paths.956        """957        self._done = True958        self._error = error959        self._text_proj.fail(error)960        self._reasoning_proj.fail(error)961        self._tool_calls_proj.fail(error)962963    def _assemble_message(self) -> AIMessage:964        """Build an `AIMessage` from accumulated state.965966        Content is built from `self._blocks`, an index-ordered snapshot of967        finalized protocol blocks. The bare-string fast path is used when968        the message has exactly one `text` block (the common chat case);969        otherwise content is a list of protocol-shape block dicts.970        """971        content: Any972        if not self._blocks:973            # No protocol blocks ever arrived. Fall back to the accumulated974            # text (possibly empty) as bare-string content.975            content = self._text_acc976        else:977            # `ChatModelStream` is the v1 content-block surface: content978            # is always a list of protocol blocks when any block arrived.979            # Do not collapse a single text block down to a bare string —980            # that would drop block-level fields (`id`, `index`,981            # annotations, extras) that downstream serializers need to982            # round-trip the message on a follow-up turn.983            ordered_blocks = [self._blocks[idx] for idx in sorted(self._blocks)]984            content = [dict(b) for b in ordered_blocks]985986        response_metadata: dict[str, Any] = {}987        if self._start_metadata:988            if "provider" in self._start_metadata:989                response_metadata["model_provider"] = self._start_metadata["provider"]990            if "model" in self._start_metadata:991                response_metadata["model_name"] = self._start_metadata["model"]992        if self._finish_metadata:993            response_metadata.update(self._finish_metadata)994        # Pin `output_version` last: `stream_events(version="v3")` always995        # assembles content as v1 protocol blocks, regardless of the996        # provider's configured output format.997        # A provider-supplied `output_version` in finish metadata (e.g.998        # `"responses/v1"` from `ChatOpenAI(use_responses_api=True, ...)`) would999        # otherwise cause `AIMessage.content_blocks` to re-run the wrong1000        # translator on already-v1 content.1001        response_metadata["output_version"] = "v1"10021003        tool_calls = [1004            {1005                "id": tc.get("id", ""),1006                "name": tc.get("name", ""),1007                "args": tc.get("args", {}),1008                "type": "tool_call",1009            }1010            for tc in self._tool_calls_acc1011        ]10121013        invalid_tool_calls = [1014            {1015                "type": "invalid_tool_call",1016                "id": itc.get("id") or None,1017                "name": itc.get("name") or None,1018                "args": itc.get("args") or None,1019                "error": itc.get("error"),1020            }1021            for itc in self._invalid_tool_calls_acc1022        ]10231024        message_kwargs: dict[str, Any] = {1025            "content": content,1026            "id": self._message_id,1027            "tool_calls": tool_calls,1028            "invalid_tool_calls": invalid_tool_calls,1029            "usage_metadata": self._usage_value,1030            "response_metadata": response_metadata,1031        }1032        if self._additional_kwargs:1033            message_kwargs["additional_kwargs"] = dict(self._additional_kwargs)1034        return AIMessage(**message_kwargs)103510361037# ---------------------------------------------------------------------------1038# Sync stream1039# ---------------------------------------------------------------------------104010411042class ChatModelStream(_ChatModelStreamBase):1043    """Synchronous per-message streaming object for a single LLM response.10441045    Returned by `BaseChatModel.stream_events(version="v3")`.  Content-block protocol1046    events are fed into this object and accumulated into typed projections.10471048    Projections (always return the same cached object):10491050    - `.text` — iterable of `str` deltas; `str()` for full text1051    - `.reasoning` — same as `.text` for reasoning content1052    - `.tool_calls` — iterable of `ToolCallChunk` deltas;1053      `.get()` returns `list[ToolCall]`1054    - `.output` — blocking property, returns assembled `AIMessage`10551056    Usage info is available on `.output.usage_metadata` once the stream1057    has finished.10581059    !!! note "Output shape is always v1 content blocks"10601061        `.output.content` is always a list of v1 protocol blocks1062        (text, reasoning, tool_call, image, …), regardless of the1063        underlying model's `output_version` setting. That attribute1064        only controls the legacy `stream()` / `astream()` / `invoke()`1065        paths; `ChatModelStream` is built on the content-block1066        protocol and emits v1 shapes by construction.10671068    Raw event iteration::10691070        for event in stream:1071            print(event)  # MessagesData dicts1072    """10731074    _text_proj: SyncTextProjection1075    _reasoning_proj: SyncTextProjection1076    _tool_calls_proj: SyncProjection10771078    def __init__(  # noqa: D1071079        self,1080        *,1081        namespace: list[str] | None = None,1082        node: str | None = None,1083        message_id: str | None = None,1084    ) -> None:1085        super().__init__(namespace=namespace, node=node, message_id=message_id)1086        # Projections — created eagerly1087        self._text_proj = SyncTextProjection()1088        self._reasoning_proj = SyncTextProjection()1089        self._tool_calls_proj = SyncProjection()1090        # Pull callback (set by bind_pump or set_request_more)1091        self._ensure_started: Callable[[], None] | None = None1092        self._request_more: Callable[[], bool] | None = None10931094    # -- Pump/pull wiring --------------------------------------------------10951096    def bind_pump(self, pump_one: Callable[[], bool]) -> None:1097        """Bind a pump for standalone streaming.10981099        Delegates to `set_request_more`.  Used by1100        `BaseChatModel.stream_events(version="v3")`.1101        """1102        self.set_request_more(pump_one)11031104    def set_start(self, cb: Callable[[], None] | None) -> None:1105        """Install a lazy-start callback on this stream and its projections."""1106        self._ensure_started = cb1107        self._text_proj.set_start(cb)1108        self._reasoning_proj.set_start(cb)1109        self._tool_calls_proj.set_start(cb)11101111    def set_request_more(self, cb: Callable[[], bool]) -> None:1112        """Set the pull callback on this stream and all its projections.11131114        Used by langgraph's `GraphRunStream._wire_request_more` to1115        connect the shared graph pump.1116        """1117        self._request_more = cb1118        self._text_proj.set_request_more(cb)1119        self._reasoning_proj.set_request_more(cb)1120        self._tool_calls_proj.set_request_more(cb)11211122    # -- Public projections ------------------------------------------------11231124    @property1125    def text(self) -> SyncTextProjection:1126        """Text content — iterable of `str` deltas, `str()` for full."""1127        return self._text_proj11281129    @property1130    def reasoning(self) -> SyncTextProjection:1131        """Reasoning content — same interface as :attr:`text`."""1132        return self._reasoning_proj11331134    @property1135    def tool_calls(self) -> SyncProjection:1136        """Tool calls — iterable of `ToolCallChunk` deltas.11371138        `.get()` returns finalized `list[ToolCall]`.1139        """1140        return self._tool_calls_proj11411142    @property1143    def output(self) -> AIMessage:1144        """Assembled `AIMessage` — blocks until the stream finishes."""1145        self._drain()1146        if self._error is not None:1147            raise self._error1148        if self._output_message is None:1149            msg = "Stream finished without producing a message"1150            raise RuntimeError(msg)1151        return self._output_message11521153    # -- Raw event iteration (replay buffer) -------------------------------11541155    def __iter__(self) -> Iterator[MessagesData]:1156        """Iterate raw protocol events with replay-buffer semantics."""1157        if self._ensure_started is not None:1158            self._ensure_started()1159        cursor = 01160        while True:1161            if cursor < len(self._events):1162                yield self._events[cursor]1163                cursor += 11164            elif self._error is not None:1165                raise self._error1166            elif self._done:1167                return1168            elif self._request_more is not None:1169                while cursor >= len(self._events) and not self._done:1170                    if not self._request_more():1171                        break1172                if cursor >= len(self._events):1173                    if self._error is not None:1174                        raise self._error1175                    return1176            else:1177                return11781179    # -- Internal helpers --------------------------------------------------11801181    def _drain(self) -> None:1182        """Pull all remaining events until done."""1183        if self._done:1184            return1185        if self._ensure_started is not None:1186            self._ensure_started()1187        if self._request_more is not None:1188            while not self._done:1189                if not self._request_more():1190                    break119111921193# ---------------------------------------------------------------------------1194# Async stream1195# ---------------------------------------------------------------------------119611971198class AsyncChatModelStream(_ChatModelStreamBase):1199    """Asynchronous per-message streaming object for a single LLM response.12001201    Returned by `BaseChatModel.astream_events(version="v3")`.  Content-block events1202    are fed into this object by a background producer task.12031204    Projections:12051206    - `.text` — async iterable of text deltas; awaitable for full text1207    - `.reasoning` — async iterable of reasoning deltas; awaitable1208    - `.tool_calls` — async iterable of `ToolCallChunk` deltas;1209      awaitable for `list[ToolCall]`1210    - `.output` — awaitable for assembled `AIMessage`12111212    Usage info is available on `.output.usage_metadata` once the stream1213    has finished.12141215    !!! note "Output shape is always v1 content blocks"12161217        The assembled message's content is always a list of v11218        protocol blocks, regardless of the model's `output_version`1219        setting — see `ChatModelStream` for the full rationale.12201221    The stream itself is awaitable (`msg = await stream`) and1222    async-iterable (`async for event in stream`).1223    """12241225    _text_proj: AsyncProjection1226    _reasoning_proj: AsyncProjection1227    _tool_calls_proj: AsyncProjection12281229    def __init__(  # noqa: D1071230        self,1231        *,1232        namespace: list[str] | None = None,1233        node: str | None = None,1234        message_id: str | None = None,1235    ) -> None:1236        super().__init__(namespace=namespace, node=node, message_id=message_id)1237        self._text_proj = AsyncProjection()1238        self._reasoning_proj = AsyncProjection()1239        self._tool_calls_proj = AsyncProjection()1240        self._output_proj = AsyncProjection()1241        self._events_proj = AsyncProjection()1242        self._ensure_started: Callable[[], Awaitable[None]] | None = None1243        self._producer_task: asyncio.Task[None] | None = None1244        # Teardown callback invoked by `aclose()` only when the producer1245        # task was cancelled before its body ran (so the normal1246        # `_produce` CancelledError handler — which fires1247        # `on_llm_error` — never executed). Set by `astream_events(version="v3")`.1248        self._on_aclose_fail: Callable[[BaseException], Awaitable[None]] | None = None12491250    # -- Pump/pull wiring (async) ------------------------------------------12511252    def set_arequest_more(self, cb: Callable[[], Awaitable[bool]] | None) -> None:1253        """Fan the async pump callback out to every projection.12541255        Used by langgraph's `AsyncGraphRunStream._wire_arequest_more` so1256        cursors on `stream.text`, `stream.reasoning`, etc. can drive the1257        shared graph pump when their buffer is empty.12581259        Args:1260            cb: Async no-arg callable returning `True` when a new event1261                was produced, `False` when the source is exhausted. Pass1262                `None` to unwire.1263        """1264        for proj in (1265            self._text_proj,1266            self._reasoning_proj,1267            self._tool_calls_proj,1268            self._output_proj,1269            self._events_proj,1270        ):1271            proj.set_arequest_more(cb)12721273    def set_start(self, cb: Callable[[], Awaitable[None]] | None) -> None:1274        """Install a lazy-start callback on this stream and its projections."""1275        self._ensure_started = cb1276        for proj in (1277            self._text_proj,1278            self._reasoning_proj,1279            self._tool_calls_proj,1280            self._output_proj,1281            self._events_proj,1282        ):1283            proj.set_start(cb)12841285    # -- Public projections ------------------------------------------------12861287    @property1288    def text(self) -> AsyncProjection:1289        """Text content — async iterable of deltas, awaitable for full."""1290        return self._text_proj12911292    @property1293    def reasoning(self) -> AsyncProjection:1294        """Reasoning content — same interface as :attr:`text`."""1295        return self._reasoning_proj12961297    @property1298    def tool_calls(self) -> AsyncProjection:1299        """Tool calls — async iterable, awaitable for finalized list."""1300        return self._tool_calls_proj13011302    @property1303    def output(self) -> AsyncProjection:1304        """Assembled `AIMessage` — awaitable."""1305        return self._output_proj13061307    def __await__(self) -> Generator[Any, None, AIMessage]:1308        """Await the assembled `AIMessage` and full producer lifecycle.13091310        The producer task is awaited after the output projection resolves so1311        that post-stream work (notably `on_llm_end` callbacks) has run by1312        the time the caller's `await` returns.1313        """1314        return self._await_full().__await__()13151316    async def _await_full(self) -> AIMessage:1317        if self._ensure_started is not None:1318            await self._ensure_started()1319        message: AIMessage = await self._output_proj1320        if self._producer_task is not None:1321            await self._producer_task1322        return message13231324    def __aiter__(self) -> _AsyncProjectionIterator:1325        """Iterate raw protocol events asynchronously."""1326        return _AsyncProjectionIterator(self._events_proj)13271328    # -- Cleanup -----------------------------------------------------------13291330    async def aclose(self) -> None:1331        """Cancel the background producer task and release resources.13321333        If a consumer cancels mid-stream or decides to stop iterating1334        early, the producer task keeps pumping the provider HTTP call to1335        completion because `asyncio.Task` has no implicit link to its1336        awaiter. Call this method to cancel the producer explicitly; the1337        stream transitions to an errored state with `CancelledError`.13381339        If the stream has already produced a message successfully (for1340        example, after `await stream.output`), the producer may still be1341        running post-stream work such as `on_llm_end` callbacks. In that1342        case `aclose()` awaits the task rather than cancelling it —1343        turning a successful run into a cancelled one would drop the1344        end callback and corrupt tracing.13451346        Idempotent: safe to call multiple times, including after the1347        stream has finished normally. Also invoked by the async context1348        manager protocol on `__aexit__`.1349        """1350        if self._ensure_started is not None and self._producer_task is None:1351            await self._ensure_started()13521353        task = self._producer_task1354        if task is None:1355            return1356        if task.done() and self._done:1357            return13581359        we_cancelled = not (self._output_message is not None and self._error is None)1360        if we_cancelled and not task.done():1361            task.cancel()13621363        # Wait for the task via a linked `Future`, not by awaiting the1364        # task directly. Awaiting the task would raise `CancelledError`1365        # in two indistinguishable cases: (1) the task we just cancelled1366        # completed, (2) our caller cancelled us. `asyncio.Task.cancelling()`1367        # disambiguates on 3.11+ but doesn't exist on 3.10.1368        #1369        # The `done_future` resolves with `None` whenever the task1370        # finishes (any reason). It is not a `Task` itself, so its1371        # `await` only raises when our caller is cancelled — giving us1372        # a portable, unambiguous signal to propagate.1373        if not task.done():1374            loop = asyncio.get_running_loop()1375            done_future: asyncio.Future[None] = loop.create_future()13761377            def _link(_: asyncio.Task[None]) -> None:1378                if not done_future.done():1379                    done_future.set_result(None)13801381            task.add_done_callback(_link)1382            try:1383                await done_future1384            finally:1385                task.remove_done_callback(_link)13861387        # If the task was cancelled before `_produce` ran (e.g.1388        # `astream_events(version="v3")` immediately followed by `aclose()`), the stream1389        # never reached `_produce`'s CancelledError handler — its1390        # projections are still pending and no end-of-lifecycle callback1391        # has fired. Resolve both here so callers of `await stream.output`1392        # don't hang and tracing sees a matching end event.1393        if we_cancelled and not self._done:1394            cancel_exc = asyncio.CancelledError()1395            self.fail(cancel_exc)1396            teardown = self._on_aclose_fail1397            if teardown is not None:1398                with contextlib.suppress(Exception):1399                    await teardown(cancel_exc)14001401    async def __aenter__(self) -> Self:1402        """Enter the async context — returns self."""1403        return self14041405    async def __aexit__(1406        self,1407        exc_type: type[BaseException] | None,1408        exc: BaseException | None,1409        tb: object,1410    ) -> None:1411        """Exit the async context — cancels the producer via `aclose()`."""1412        del exc_type, exc, tb1413        await self.aclose()14141415    # -- Internal API (extend base to drive async projections) -------------14161417    def _record_event(self, event: Mapping[str, Any]) -> None:1418        """Record event and push to async event replay projection."""1419        super()._record_event(event)1420        self._events_proj.push(cast("MessagesData", event))14211422    def _finish(self, data: MessageFinishData) -> None:1423        """Finish base projections and async-only projections."""1424        super()._finish(data)1425        self._output_proj.complete(self._output_message)1426        self._events_proj.complete(self._events)14271428    def fail(self, error: BaseException) -> None:1429        """Fail base projections and async-only projections."""1430        super().fail(error)1431        self._output_proj.fail(error)1432        self._events_proj.fail(error)143314341435__all__ = [1436    "AsyncChatModelStream",1437    "AsyncProjection",1438    "ChatModelStream",1439    "SyncProjection",1440    "SyncTextProjection",1441]