Overuse may indicate design issues; consider polymorphism
return block if isinstance(block, dict) else None
1"""Per-message streaming objects for content-block protocol events.23`ChatModelStream` is the synchronous variant returned by4`BaseChatModel.stream_events(version="v3")`. `AsyncChatModelStream` is the5asynchronous variant returned by `BaseChatModel.astream_events(version="v3")`.67Both expose typed projection properties (`.text`, `.reasoning`,8`.tool_calls`, `.usage`, `.output`) that accumulate protocol9events as they arrive. Projections can be iterated for deltas or10drained for the final accumulated value.1112Raw protocol events are also available via direct iteration on the13stream object (replay-buffer semantics — multiple independent14consumers supported).15"""1617from __future__ import annotations1819import asyncio20import contextlib21from typing import TYPE_CHECKING, Any, cast2223from langchain_core.language_models._compat_bridge import finalize_tool_call_chunk24from langchain_core.messages import AIMessage2526if TYPE_CHECKING:27 from collections.abc import Awaitable, Callable, Generator, Iterator, Mapping2829 from langchain_protocol.protocol import (30 ContentBlockDeltaData,31 ContentBlockFinishData,32 FinalizedContentBlock,33 InvalidToolCall,34 MessageFinishData,35 MessageMetadata,36 MessagesData,37 MessageStartData,38 ReasoningContentBlock,39 ServerToolCallChunk,40 TextContentBlock,41 ToolCall,42 ToolCallChunk,43 UsageInfo,44 )45 from typing_extensions import Self464748# ---------------------------------------------------------------------------49# Tool-call chunk helpers (shared by tool_call_chunk and server_tool_call_chunk)50# ---------------------------------------------------------------------------515253def _merge_chunk_into_store(54 store: dict[int, dict[str, Any]],55 idx: int,56 block: dict[str, Any],57) -> None:58 """Merge a tool-call-chunk delta: sticky id/name, concat args."""59 existing = store.get(idx, {})60 if block.get("id") and "id" not in existing:61 existing["id"] = block["id"]62 if block.get("name") and "name" not in existing:63 existing["name"] = block["name"]64 existing["args"] = existing.get("args", "") + (block.get("args") or "")65 store[idx] = existing666768def _merge_block_delta_into_store(69 store: dict[int, dict[str, Any]],70 idx: int,71 fields: dict[str, Any],72) -> None:73 """Shallow-merge a block-delta snapshot into an indexed chunk store."""74 existing = store.get(idx, {})75 for key, value in fields.items():76 if value is not None:77 existing[key] = value78 store[idx] = existing798081def _event_content_block(data: Mapping[str, Any]) -> dict[str, Any] | None:82 """Return start/finish content, tolerating the pre-delta field name."""83 block = data.get("content") or data.get("content_block")84 return block if isinstance(block, dict) else None858687def _legacy_block_to_delta(block: Mapping[str, Any]) -> dict[str, Any]:88 """Convert the old content-block delta shape to an explicit delta."""89 btype = block.get("type")90 if btype == "text":91 return {"type": "text-delta", "text": block.get("text", "")}92 if btype == "reasoning":93 return {94 "type": "reasoning-delta",95 "reasoning": block.get("reasoning", ""),96 }97 if "data" in block:98 delta = {"type": "data-delta", "data": block.get("data", "")}99 if block.get("encoding") == "base64":100 delta["encoding"] = "base64"101 return delta102 return {"type": "legacy-block-delta", "fields": block}103104105def _event_delta(data: Mapping[str, Any]) -> dict[str, Any] | None:106 """Return an explicit delta, converting legacy content-block deltas."""107 delta = data.get("delta")108 if isinstance(delta, dict):109 return delta110 block = data.get("content_block")111 if isinstance(block, dict):112 return _legacy_block_to_delta(block)113 return None114115116def _sweep_chunk_store(117 store: dict[int, dict[str, Any]],118 *,119 finalized_type: str,120 finalized_blocks: dict[int, FinalizedContentBlock],121 tool_calls_acc: list[ToolCall] | None,122 invalid_acc: list[InvalidToolCall],123) -> None:124 """Parse each unswept chunk's `args`; record as `finalized_type` or invalid.125126 `tool_calls_acc` is only populated when `finalized_type == "tool_call"`127 (server-side calls don't surface through `.tool_calls`).128129 Deliberately does not backfill `index` onto finalized tool-call blocks:130 matches v1 (`AIMessage.init_tool_calls` drops `index` when substituting131 `tool_call_chunk` → `tool_call`) and prevents `merge_lists` from132 re-merging further chunks into an already-parsed args dict.133 """134 for idx in sorted(store):135 chunk = store[idx]136 # Carry over any non-finalize-rewritten fields the chunk collected137 # (e.g., `extras`). `_merge_chunk_into_store` only populates138 # `id` / `name` / `args`, so this is empty in practice today;139 # future provider-specific fields would flow through here.140 extras = {141 k: v142 for k, v in chunk.items()143 if k not in ("type", "id", "name", "args") and v is not None144 }145 final_block = finalize_tool_call_chunk(146 raw_args=chunk.get("args"),147 id_=chunk.get("id"),148 name=chunk.get("name"),149 extras=extras,150 finalized_type=finalized_type,151 )152 if final_block["type"] == "invalid_tool_call":153 invalid_acc.append(final_block)154 elif tool_calls_acc is not None and finalized_type == "tool_call":155 tool_calls_acc.append(cast("ToolCall", final_block))156 finalized_blocks[idx] = final_block157 store.clear()158159160# ---------------------------------------------------------------------------161# Projection base — shared producer API162# ---------------------------------------------------------------------------163164165class _ProjectionBase:166 """Shared state and producer API for sync and async projections.167168 The `push` / `complete` / `fail` methods are the producer-side169 API — called by the stream as events arrive. Subclasses add the170 consumer protocol (sync iteration or async iteration + await).171172 `done` and `error` are safe read-only views of the terminal state173 for iterators and other siblings that need to observe lifecycle174 without reaching into the underlying fields.175 """176177 __slots__ = ("_deltas", "_done", "_error", "_final_set", "_final_value")178179 def __init__(self) -> None:180 """Initialize empty projection state."""181 self._deltas: list[Any] = []182 self._final_value: Any = None183 self._final_set: bool = False184 self._done: bool = False185 self._error: BaseException | None = None186187 @property188 def done(self) -> bool:189 """Whether the projection has finished (successfully or via error)."""190 return self._done191192 @property193 def error(self) -> BaseException | None:194 """The terminal error, if any."""195 return self._error196197 def push(self, delta: Any) -> None:198 """Append a delta value. Producer-side API."""199 self._deltas.append(delta)200201 def complete(self, final_value: Any) -> None:202 """Set the final accumulated value and mark as done. Producer-side API."""203 self._final_value = final_value204 self._final_set = True205 self._done = True206207 def fail(self, error: BaseException) -> None:208 """Mark as errored. Producer-side API."""209 self._error = error210 self._done = True211212213# ---------------------------------------------------------------------------214# Sync projections215# ---------------------------------------------------------------------------216217218class SyncProjection(_ProjectionBase):219 """Sync iterable of deltas with pull-based backpressure.220221 Follows the same `_request_more` convention as langgraph's222 `EventLog`: when the cursor catches up to the buffer and the223 projection is not done, it calls `_request_more()` to pull more224 events from the producer.225226 Each call to `__iter__` creates a new cursor at position 0.227 Multiple iterators replay all deltas from the start.228 """229230 __slots__ = ("_ensure_started", "_request_more")231232 def __init__(self) -> None:233 """Initialize with no pull callback."""234 super().__init__()235 self._ensure_started: Callable[[], None] | None = None236 self._request_more: Callable[[], bool] | None = None237238 def set_start(self, cb: Callable[[], None] | None) -> None:239 """Install a lazy-start callback invoked on first consumption."""240 self._ensure_started = cb241242 def set_request_more(self, cb: Callable[[], bool] | None) -> None:243 """Install the pull callback the iterator uses to drain the source."""244 self._request_more = cb245246 def __iter__(self) -> Iterator[Any]:247 """Yield deltas, pulling via `_request_more` when caught up."""248 if self._ensure_started is not None:249 self._ensure_started()250 cursor = 0251 while True:252 if cursor < len(self._deltas):253 yield self._deltas[cursor]254 cursor += 1255 elif self._error is not None:256 raise self._error257 elif self._done:258 return259 elif self._request_more is not None:260 while cursor >= len(self._deltas) and not self._done:261 if not self._request_more():262 break263 if cursor >= len(self._deltas):264 if self._error is not None:265 raise self._error266 return267 else:268 return269270 def get(self) -> Any:271 """Drain via `_request_more` and return the final value."""272 if self._ensure_started is not None:273 self._ensure_started()274 if not self._done and self._request_more is not None:275 while not self._done:276 if not self._request_more():277 break278 if self._error is not None:279 raise self._error280 return self._final_value281282283class SyncTextProjection(SyncProjection):284 """String-specialized sync projection.285286 Adds `__str__`, `__bool__`, `__repr__` for ergonomic use with287 `.text` and `.reasoning` projections.288 """289290 __slots__ = ()291292 def __str__(self) -> str:293 """Drain and return the full accumulated string."""294 val = self.get()295 return val if val is not None else ""296297 def __bool__(self) -> bool:298 """Return whether any deltas have been pushed."""299 return len(self._deltas) > 0300301 def __repr__(self) -> str:302 """Return repr of the accumulated text so far."""303 if self._final_set:304 return repr(self._final_value)305 return repr("".join(self._deltas))306307308# ---------------------------------------------------------------------------309# Async projection310# ---------------------------------------------------------------------------311312313class AsyncProjection(_ProjectionBase):314 """Async iterable of deltas that is also awaitable for the final value.315316 Uses an `asyncio.Event` to notify consumers of state changes. Each317 waiter — the awaitable (`__await__`) and each async iterator cursor318 — shares the event and re-checks its own condition on wake. The event319 is cleared before a waiter awaits, so stale "something happened"320 signals don't cause spin loops.321322 This is single-loop only — producers and consumers must share an323 event loop. If cross-thread wake is ever required, revert to a324 list-of-futures pattern with `call_soon_threadsafe`.325 """326327 __slots__ = ("_arequest_more", "_ensure_started", "_event")328329 def __init__(self) -> None:330 """Initialize with an un-set event and no pump callback."""331 super().__init__()332 self._event = asyncio.Event()333 self._arequest_more: Callable[[], Awaitable[bool]] | None = None334 self._ensure_started: Callable[[], Awaitable[None]] | None = None335336 def set_start(self, cb: Callable[[], Awaitable[None]] | None) -> None:337 """Install a lazy-start callback invoked on first consumption."""338 self._ensure_started = cb339340 def set_arequest_more(self, cb: Callable[[], Awaitable[bool]] | None) -> None:341 """Wire the async pull callback iterators use to drive the source.342343 Mirrors `SyncProjection.set_request_more`. Under caller-driven344 streaming, consumers call this callback when their buffer is345 empty so that the owning graph advances one step.346347 Args:348 cb: Async no-arg callable returning `True` when a new event349 was produced, `False` when the source is exhausted. Pass350 `None` to unwire.351 """352 self._arequest_more = cb353354 def push(self, delta: Any) -> None:355 """Append a delta and notify waiters."""356 super().push(delta)357 self._event.set()358359 def complete(self, final_value: Any) -> None:360 """Set the final value, mark done, and notify waiters."""361 super().complete(final_value)362 self._event.set()363364 def fail(self, error: BaseException) -> None:365 """Mark errored and notify waiters."""366 super().fail(error)367 self._event.set()368369 # -- Async iterable (yields deltas) ------------------------------------370371 def __aiter__(self) -> _AsyncProjectionIterator:372 """Return an async iterator over deltas."""373 return _AsyncProjectionIterator(self)374375 # -- Awaitable (returns final value) -----------------------------------376377 def __await__(self) -> Generator[Any, None, Any]:378 """Await the final accumulated value."""379 return self._await_impl().__await__()380381 async def _await_impl(self) -> Any:382 """Wait until the final value is set and return it.383384 When a caller-driven pump is wired via `set_arequest_more`, drive385 it instead of blocking on `self._event`; otherwise fall back to386 the event (used by tests that dispatch manually).387 """388 if self._ensure_started is not None:389 await self._ensure_started()390 while not self._final_set:391 if self._error is not None:392 raise self._error393 if self._arequest_more is not None:394 if not await self._arequest_more() and not self._final_set:395 # Pump exhausted without completing this projection —396 # nothing more will arrive. Return current state and397 # let callers observe the missing final via the398 # returned None / unset error.399 break400 else:401 self._event.clear()402 await self._event.wait()403 if self._error is not None:404 raise self._error405 return self._final_value406407408class _AsyncProjectionIterator:409 """Async iterator over an `AsyncProjection`'s deltas."""410411 __slots__ = ("_offset", "_proj")412413 def __init__(self, proj: AsyncProjection) -> None:414 """Initialize cursor at position 0."""415 self._proj = proj416 self._offset = 0417418 def __aiter__(self) -> _AsyncProjectionIterator:419 """Return self for the async iteration protocol."""420 return self421422 async def __anext__(self) -> Any:423 """Return the next delta, awaiting if necessary.424425 When the projection has an `_arequest_more` pump wired, drain it426 in an inner loop (mirrors `SyncProjection.__iter__`) until this427 cursor advances or the pump reports exhaustion. Without a pump,428 fall back to waiting on the shared event.429 """430 proj = self._proj431 if proj._ensure_started is not None: # noqa: SLF001432 await proj._ensure_started() # noqa: SLF001433 while True:434 # Direct access to the projection's internal list/event is435 # intentional — the iterator is the projection's sidekick and436 # depends on reading the shared buffer by cursor.437 if self._offset < len(proj._deltas): # noqa: SLF001438 item = proj._deltas[self._offset] # noqa: SLF001439 self._offset += 1440 return item441 if proj.error is not None:442 raise proj.error443 if proj.done:444 raise StopAsyncIteration445 if proj._arequest_more is not None: # noqa: SLF001446 # Caller-driven: drive the producer. Pump may land new447 # deltas for a sibling projection — loop until our cursor448 # advances, the projection terminates, or the pump is449 # exhausted.450 while (451 self._offset >= len(proj._deltas) # noqa: SLF001452 and not proj.done453 ):454 if not await proj._arequest_more(): # noqa: SLF001455 break456 if (457 self._offset >= len(proj._deltas) # noqa: SLF001458 and not proj.done459 ):460 if proj.error is not None:461 raise proj.error462 raise StopAsyncIteration463 else:464 proj._event.clear() # noqa: SLF001465 await proj._event.wait() # noqa: SLF001466467468# ---------------------------------------------------------------------------469# Sync stream470# ---------------------------------------------------------------------------471472473class _ChatModelStreamBase:474 """Shared state and event dispatch for chat-model streams.475476 Holds accumulated protocol state (text, reasoning, tool calls,477 usage, metadata) and the event-dispatch machinery that drives the478 typed projections. `ChatModelStream` (sync) and479 `AsyncChatModelStream` (async) inherit from this base and add the480 projection types and consumer APIs for their flavor.481 """482483 # Projection instances — concrete subclasses create them as sync or484 # async variants in their own __init__ after calling super().485 _text_proj: _ProjectionBase486 _reasoning_proj: _ProjectionBase487 _tool_calls_proj: _ProjectionBase488489 def __init__(490 self,491 *,492 namespace: list[str] | None = None,493 node: str | None = None,494 message_id: str | None = None,495 ) -> None:496 self._namespace = namespace or []497 self._node = node498 self._message_id = message_id499500 # Accumulated state501 self._text_acc: str = ""502 self._reasoning_acc: str = ""503 # Per-block text / reasoning storage keyed by wire index. Used to504 # populate the finalized block payload without cross-contaminating505 # other blocks of the same type in the same message. Without506 # per-block storage the message-wide accumulator would bleed507 # earlier block text into later finalized blocks.508 self._text_per_block: dict[int, str] = {}509 self._reasoning_per_block: dict[int, str] = {}510 self._tool_call_chunks: dict[int, dict[str, Any]] = {}511 self._tool_calls_acc: list[ToolCall] = []512 self._invalid_tool_calls_acc: list[InvalidToolCall] = []513 self._server_tool_call_chunks: dict[int, dict[str, Any]] = {}514 # Ordered snapshot of every finalized block, keyed by event index.515 # Single source of truth for .output.content. Typed accumulators516 # (text/reasoning/tool_calls/invalid_tool_calls) continue to serve517 # the public projections.518 self._blocks: dict[int, FinalizedContentBlock] = {}519 self._usage_value: UsageInfo | None = None520 self._start_metadata: MessageMetadata | None = None521 self._finish_metadata: dict[str, Any] | None = None522 self._additional_kwargs: dict[str, Any] | None = None523 self._done: bool = False524 self._error: BaseException | None = None525 self._output_message: AIMessage | None = None526527 # Raw event replay buffer528 self._events: list[MessagesData] = []529530 # -- Common properties ------------------------------------------------531532 @property533 def namespace(self) -> list[str]:534 """Graph namespace path for this message."""535 return self._namespace536537 @property538 def node(self) -> str | None:539 """Graph node that produced this message."""540 return self._node541542 @property543 def message_id(self) -> str | None:544 """Stable message identifier."""545 return self._message_id546547 def set_message_id(self, message_id: str) -> None:548 """Assign the stable message identifier once the run starts.549550 Called by the stream driver (`stream_events(version="v3")` /551 `astream_events(version="v3")`) after `on_chat_model_start` produces a run552 id. Not intended for end-user code.553 """554 self._message_id = message_id555556 @property557 def done(self) -> bool:558 """Whether the stream has finished."""559 return self._done560561 @property562 def has_events(self) -> bool:563 """Whether any protocol events have been recorded."""564 return bool(self._events)565566 @property567 def output_message(self) -> AIMessage | None:568 """The assembled message if the stream has finished, else `None`.569570 Unlike `ChatModelStream.output` (which blocks until the stream571 finishes), this never pumps, blocks, or raises. Intended for the572 stream driver (`stream_events(version="v3")` and its async573 equivalent) to check whether the stream produced a message before574 firing `on_llm_end` callbacks.575 """576 return self._output_message577578 # -- Event ingestion (public) ------------------------------------------579580 def dispatch(self, event: Mapping[str, Any]) -> None:581 """Route a protocol event to the appropriate internal handler.582583 Public entry point for feeding events into the stream. Called by584 the stream driver (the `stream_events(version="v3")` pump and its585 async equivalent) and by any observer or test that needs to586 inject protocol events.587 """588 self._record_event(event)589 event_type = event.get("event")590 if event_type == "message-start":591 self._push_message_start(cast("MessageStartData", event))592 elif event_type == "content-block-delta":593 self._push_content_block_delta(cast("ContentBlockDeltaData", event))594 elif event_type == "content-block-finish":595 self._push_content_block_finish(cast("ContentBlockFinishData", event))596 elif event_type == "message-finish":597 self._finish(cast("MessageFinishData", event))598 elif event_type == "error":599 self.fail(RuntimeError(event.get("message", "Unknown error")))600 # content-block-start is informational — no accumulation needed601602 # -- Internal push API (called by dispatch) ----------------------------603604 def _record_event(self, event: Mapping[str, Any]) -> None:605 """Append a raw event to the replay buffer."""606 self._events.append(cast("MessagesData", event))607608 def _push_message_start(self, data: MessageStartData) -> None:609 """Process a `message-start` event."""610 self._start_metadata = data.get("metadata")611 message_id = data.get("id")612 if message_id:613 self._message_id = message_id614615 def _push_content_block_delta(self, data: ContentBlockDeltaData) -> None:616 """Process a `content-block-delta` event."""617 delta = _event_delta(data)618 if delta is None:619 return620 event_idx = data.get("index")621 dtype = delta.get("type", "")622623 if dtype == "text-delta":624 delta_text = delta.get("text", "")625 if delta_text:626 self._text_acc += delta_text627 if event_idx is not None:628 self._text_per_block[event_idx] = (629 self._text_per_block.get(event_idx, "") + delta_text630 )631 self._text_proj.push(delta_text)632 elif dtype == "reasoning-delta":633 delta_r = delta.get("reasoning", "")634 if delta_r:635 self._reasoning_acc += delta_r636 if event_idx is not None:637 self._reasoning_per_block[event_idx] = (638 self._reasoning_per_block.get(event_idx, "") + delta_r639 )640 self._reasoning_proj.push(delta_r)641 elif dtype == "block-delta":642 fields = delta.get("fields")643 if not isinstance(fields, dict):644 return645 btype = fields.get("type", "")646 if btype == "tool_call_chunk":647 tcc = cast("ToolCallChunk", fields)648 idx = data.get("index")649 if idx is None:650 idx = tcc.get("index", len(self._tool_call_chunks))651 _merge_block_delta_into_store(self._tool_call_chunks, idx, dict(tcc))652 chunk_block: ToolCallChunk = {653 "type": "tool_call_chunk",654 "id": tcc.get("id"),655 "name": tcc.get("name"),656 "args": tcc.get("args"),657 }658 if "index" in tcc:659 chunk_block["index"] = tcc["index"]660 self._tool_calls_proj.push(chunk_block)661 elif btype == "server_tool_call_chunk":662 stcc = cast("ServerToolCallChunk", fields)663 idx = data.get("index")664 if idx is None:665 idx = len(self._server_tool_call_chunks)666 _merge_block_delta_into_store(667 self._server_tool_call_chunks,668 idx,669 dict(stcc),670 )671 elif dtype == "legacy-block-delta":672 fields = delta.get("fields")673 if not isinstance(fields, dict):674 return675 btype = fields.get("type", "")676 if btype == "tool_call_chunk":677 tcc = cast("ToolCallChunk", fields)678 idx = data.get("index")679 if idx is None:680 idx = tcc.get("index", len(self._tool_call_chunks))681 _merge_chunk_into_store(self._tool_call_chunks, idx, dict(tcc))682 legacy_chunk_block: ToolCallChunk = {683 "type": "tool_call_chunk",684 "id": tcc.get("id"),685 "name": tcc.get("name"),686 "args": tcc.get("args"),687 }688 if "index" in tcc:689 legacy_chunk_block["index"] = tcc["index"]690 self._tool_calls_proj.push(legacy_chunk_block)691 elif btype == "server_tool_call_chunk":692 stcc = cast("ServerToolCallChunk", fields)693 idx = data.get("index")694 if idx is None:695 idx = len(self._server_tool_call_chunks)696 _merge_chunk_into_store(697 self._server_tool_call_chunks,698 idx,699 dict(stcc),700 )701 elif dtype == "data-delta":702 # Binary/modal payload deltas are reflected in the final703 # content-block finish event; there is no dedicated projection.704 return705 else:706 # Transitional legacy path for old `content_block` deltas that707 # should not be reachable after `_event_delta` conversion, kept708 # here for custom in-tree test fixtures or third-party emitters.709 block = data.get("content_block")710 if not isinstance(block, dict):711 return712 btype = block.get("type", "")713 if btype != "tool_call_chunk":714 return715 tcc = cast("ToolCallChunk", block)716 idx = data.get("index")717 if idx is None:718 idx = tcc.get("index", len(self._tool_call_chunks))719 _merge_chunk_into_store(self._tool_call_chunks, idx, dict(tcc))720 fallback_chunk_block: ToolCallChunk = {721 "type": "tool_call_chunk",722 "id": tcc.get("id"),723 "name": tcc.get("name"),724 "args": tcc.get("args"),725 }726 if "index" in tcc:727 fallback_chunk_block["index"] = tcc["index"]728 self._tool_calls_proj.push(fallback_chunk_block)729730 def _resolve_block_text(self, idx: int | None, full_text: str) -> str:731 """Return authoritative text for a single text block at `idx`.732733 Prefers per-block delta accumulation; reconciles with the finish734 event's `full_text` when the provider emits authoritative text735 that differs from what the deltas built up.736737 Does not mutate `self._text_acc` (the delta-sum accumulator) —738 the message-wide projection value is derived from per-block739 storage at `_finish` time, so reconciliation remains correct740 regardless of finish ordering across blocks.741 """742 if idx is None:743 # No wire index — legacy behavior: use the message-wide744 # accumulator. Preserved for pre-index semantics; not745 # exercised by the compat bridge or any in-tree provider.746 if full_text and full_text != self._text_acc:747 self._text_acc = full_text748 return self._text_acc749 existing = self._text_per_block.get(idx, "")750 if full_text and full_text != existing:751 if not existing:752 # No deltas arrived for this block — surface the full753 # text as a single delta so the stream projection754 # reflects it.755 self._text_acc += full_text756 self._text_proj.push(full_text)757 elif full_text.startswith(existing):758 # Authoritative text extends the partial deltas — emit759 # the tail so delta consumers see the completion.760 tail = full_text[len(existing) :]761 self._text_acc += tail762 self._text_proj.push(tail)763 # else: authoritative text replaces the partial deltas764 # entirely. No corrective delta is emitted (semantics765 # would be ambiguous mid-stream). `_text_acc` is not766 # spliced — the final value is computed from per-block767 # storage at `_finish`, so this remains correct even when768 # other blocks have added to `_text_acc` in between.769 self._text_per_block[idx] = full_text770 return self._text_per_block.get(idx, "")771772 def _resolve_block_reasoning(self, idx: int | None, full_r: str) -> str:773 """Return authoritative reasoning text for a single block at `idx`.774775 Mirrors `_resolve_block_text` for the reasoning projection.776 """777 if idx is None:778 if full_r and full_r != self._reasoning_acc:779 self._reasoning_acc = full_r780 return self._reasoning_acc781 existing = self._reasoning_per_block.get(idx, "")782 if full_r and full_r != existing:783 if not existing:784 self._reasoning_acc += full_r785 self._reasoning_proj.push(full_r)786 elif full_r.startswith(existing):787 tail = full_r[len(existing) :]788 self._reasoning_acc += tail789 self._reasoning_proj.push(tail)790 self._reasoning_per_block[idx] = full_r791 return self._reasoning_per_block.get(idx, "")792793 def _push_content_block_finish(self, data: ContentBlockFinishData) -> None:794 """Process a `content-block-finish` event."""795 block = _event_content_block(data)796 if block is None:797 return798 btype = block.get("type", "")799 idx = data.get("index")800 finalized: FinalizedContentBlock | None = None801802 if btype == "text":803 text_block = cast("TextContentBlock", block)804 full_text = text_block.get("text", "")805 block_text = self._resolve_block_text(idx, full_text)806 finalized = cast(807 "FinalizedContentBlock",808 {809 **text_block,810 "type": "text",811 "text": block_text,812 },813 )814 elif btype == "reasoning":815 reasoning_block = cast("ReasoningContentBlock", block)816 full_r = reasoning_block.get("reasoning", "")817 block_reasoning = self._resolve_block_reasoning(idx, full_r)818 # Keep provider-specific fields alongside the accumulated819 # reasoning text. Anthropic's `signature` arrives under820 # `extras` and is required on follow-up turns. Only overwrite821 # `reasoning` when we have accumulated content; OpenAI can822 # emit a reasoning block with no text deltas, and writing an823 # empty string there makes downstream serializers synthesize824 # an empty summary entry.825 finalized_dict: dict[str, Any] = {**reasoning_block, "type": "reasoning"}826 if block_reasoning:827 finalized_dict["reasoning"] = block_reasoning828 finalized = cast("FinalizedContentBlock", finalized_dict)829 elif btype == "tool_call":830 tcb = cast("ToolCall", block)831 # Preserve provider-specific fields (extras, etc.) on the832 # content block. `_assemble_message` separately projects the833 # minimal {id, name, args, type} shape onto834 # `AIMessage.tool_calls`. Strip `index` to match v1835 # (`AIMessage.init_tool_calls` rebuilds the block without836 # `index`); see `_finalize_block` in `_compat_bridge.py`.837 tc = cast(838 "ToolCall",839 {840 **{k: v for k, v in tcb.items() if k != "index"},841 "type": "tool_call",842 "id": tcb.get("id", ""),843 "name": tcb.get("name", ""),844 "args": tcb.get("args", {}),845 },846 )847 self._tool_calls_acc.append(tc)848 if idx is not None and idx in self._tool_call_chunks:849 del self._tool_call_chunks[idx]850 finalized = tc851 elif btype == "invalid_tool_call":852 itc = cast("InvalidToolCall", block)853 # Strip `index` on the stored block to stay symmetric with854 # the `tool_call` path.855 itc = cast(856 "InvalidToolCall",857 {k: v for k, v in itc.items() if k != "index"},858 )859 self._invalid_tool_calls_acc.append(itc)860 # Critical: drop the stale chunk so _finish's sweep doesn't revive861 # it as an empty-args ToolCall.862 if idx is not None and idx in self._tool_call_chunks:863 del self._tool_call_chunks[idx]864 if idx is not None and idx in self._server_tool_call_chunks:865 del self._server_tool_call_chunks[idx]866 finalized = itc867 elif btype in (868 "server_tool_call",869 "server_tool_result",870 "image",871 "audio",872 "video",873 "file",874 "non_standard",875 ):876 if btype == "server_tool_call" and idx is not None:877 self._server_tool_call_chunks.pop(idx, None)878 finalized = cast("FinalizedContentBlock", block)879880 if finalized is not None and idx is not None:881 # Backfill the wire index onto the finalized block when the882 # source didn't supply one. `langchain_core.utils._merge`'s883 # block-merger (used by `AIMessageChunk.__add__` /884 # `add_ai_message_chunks`) keys on `block["index"]` to group885 # deltas into the same output block — without it, a v2-886 # assembled `AIMessage` that later re-enters the chunk887 # aggregation path won't merge cleanly. Client-side888 # `tool_call` / `invalid_tool_call` blocks are excluded: v1889 # finalization drops `index` on them so further deltas890 # cannot clobber already-parsed args, and v2 mirrors that.891 if btype not in ("tool_call", "invalid_tool_call"):892 finalized.setdefault("index", idx)893 self._blocks[idx] = finalized894895 def _finish(self, data: MessageFinishData) -> None:896 """Process a `message-finish` event."""897 self._done = True898 self._usage_value = data.get("usage")899 self._finish_metadata = cast("dict[str, Any] | None", data.get("metadata"))900 # Off-spec extension carrying provider-side `additional_kwargs`901 # that don't map onto a typed protocol field (e.g. Gemini's902 # `__gemini_function_call_thought_signatures__`). The compat903 # bridge emits this on `message-finish` so the assembled message904 # carries the same data `ainvoke` would have preserved.905 self._additional_kwargs = cast(906 "dict[str, Any] | None",907 cast("dict[str, Any]", data).get("additional_kwargs"),908 )909910 # Finalize any unswept chunks — both client- and server-side.911 _sweep_chunk_store(912 self._tool_call_chunks,913 finalized_type="tool_call",914 finalized_blocks=self._blocks,915 tool_calls_acc=self._tool_calls_acc,916 invalid_acc=self._invalid_tool_calls_acc,917 )918 _sweep_chunk_store(919 self._server_tool_call_chunks,920 finalized_type="server_tool_call",921 finalized_blocks=self._blocks,922 tool_calls_acc=None,923 invalid_acc=self._invalid_tool_calls_acc,924 )925926 # Prefer the per-block sum when any indexed text / reasoning927 # arrived — it stays correct regardless of finish ordering and928 # of whether finish events carried authoritative text that929 # differed from the deltas. Fall back to the delta-sum930 # accumulator only for the legacy no-index path.931 if self._text_per_block:932 text_final = "".join(933 self._text_per_block[i] for i in sorted(self._text_per_block)934 )935 else:936 text_final = self._text_acc937 if self._reasoning_per_block:938 reasoning_final = "".join(939 self._reasoning_per_block[i] for i in sorted(self._reasoning_per_block)940 )941 else:942 reasoning_final = self._reasoning_acc943944 self._text_proj.complete(text_final)945 self._reasoning_proj.complete(reasoning_final)946 self._tool_calls_proj.complete(self._tool_calls_acc)947 self._output_message = self._assemble_message()948949 def fail(self, error: BaseException) -> None:950 """Mark the stream as errored and propagate to all projections.951952 Public API — called by the stream driver (`stream_events(version="v3")` /953 `astream_events(version="v3")`) when the underlying producer raises, by954 `dispatch` when an `error` protocol event arrives, and by955 cancellation paths.956 """957 self._done = True958 self._error = error959 self._text_proj.fail(error)960 self._reasoning_proj.fail(error)961 self._tool_calls_proj.fail(error)962963 def _assemble_message(self) -> AIMessage:964 """Build an `AIMessage` from accumulated state.965966 Content is built from `self._blocks`, an index-ordered snapshot of967 finalized protocol blocks. The bare-string fast path is used when968 the message has exactly one `text` block (the common chat case);969 otherwise content is a list of protocol-shape block dicts.970 """971 content: Any972 if not self._blocks:973 # No protocol blocks ever arrived. Fall back to the accumulated974 # text (possibly empty) as bare-string content.975 content = self._text_acc976 else:977 # `ChatModelStream` is the v1 content-block surface: content978 # is always a list of protocol blocks when any block arrived.979 # Do not collapse a single text block down to a bare string —980 # that would drop block-level fields (`id`, `index`,981 # annotations, extras) that downstream serializers need to982 # round-trip the message on a follow-up turn.983 ordered_blocks = [self._blocks[idx] for idx in sorted(self._blocks)]984 content = [dict(b) for b in ordered_blocks]985986 response_metadata: dict[str, Any] = {}987 if self._start_metadata:988 if "provider" in self._start_metadata:989 response_metadata["model_provider"] = self._start_metadata["provider"]990 if "model" in self._start_metadata:991 response_metadata["model_name"] = self._start_metadata["model"]992 if self._finish_metadata:993 response_metadata.update(self._finish_metadata)994 # Pin `output_version` last: `stream_events(version="v3")` always995 # assembles content as v1 protocol blocks, regardless of the996 # provider's configured output format.997 # A provider-supplied `output_version` in finish metadata (e.g.998 # `"responses/v1"` from `ChatOpenAI(use_responses_api=True, ...)`) would999 # otherwise cause `AIMessage.content_blocks` to re-run the wrong1000 # translator on already-v1 content.1001 response_metadata["output_version"] = "v1"10021003 tool_calls = [1004 {1005 "id": tc.get("id", ""),1006 "name": tc.get("name", ""),1007 "args": tc.get("args", {}),1008 "type": "tool_call",1009 }1010 for tc in self._tool_calls_acc1011 ]10121013 invalid_tool_calls = [1014 {1015 "type": "invalid_tool_call",1016 "id": itc.get("id") or None,1017 "name": itc.get("name") or None,1018 "args": itc.get("args") or None,1019 "error": itc.get("error"),1020 }1021 for itc in self._invalid_tool_calls_acc1022 ]10231024 message_kwargs: dict[str, Any] = {1025 "content": content,1026 "id": self._message_id,1027 "tool_calls": tool_calls,1028 "invalid_tool_calls": invalid_tool_calls,1029 "usage_metadata": self._usage_value,1030 "response_metadata": response_metadata,1031 }1032 if self._additional_kwargs:1033 message_kwargs["additional_kwargs"] = dict(self._additional_kwargs)1034 return AIMessage(**message_kwargs)103510361037# ---------------------------------------------------------------------------1038# Sync stream1039# ---------------------------------------------------------------------------104010411042class ChatModelStream(_ChatModelStreamBase):1043 """Synchronous per-message streaming object for a single LLM response.10441045 Returned by `BaseChatModel.stream_events(version="v3")`. Content-block protocol1046 events are fed into this object and accumulated into typed projections.10471048 Projections (always return the same cached object):10491050 - `.text` — iterable of `str` deltas; `str()` for full text1051 - `.reasoning` — same as `.text` for reasoning content1052 - `.tool_calls` — iterable of `ToolCallChunk` deltas;1053 `.get()` returns `list[ToolCall]`1054 - `.output` — blocking property, returns assembled `AIMessage`10551056 Usage info is available on `.output.usage_metadata` once the stream1057 has finished.10581059 !!! note "Output shape is always v1 content blocks"10601061 `.output.content` is always a list of v1 protocol blocks1062 (text, reasoning, tool_call, image, …), regardless of the1063 underlying model's `output_version` setting. That attribute1064 only controls the legacy `stream()` / `astream()` / `invoke()`1065 paths; `ChatModelStream` is built on the content-block1066 protocol and emits v1 shapes by construction.10671068 Raw event iteration::10691070 for event in stream:1071 print(event) # MessagesData dicts1072 """10731074 _text_proj: SyncTextProjection1075 _reasoning_proj: SyncTextProjection1076 _tool_calls_proj: SyncProjection10771078 def __init__( # noqa: D1071079 self,1080 *,1081 namespace: list[str] | None = None,1082 node: str | None = None,1083 message_id: str | None = None,1084 ) -> None:1085 super().__init__(namespace=namespace, node=node, message_id=message_id)1086 # Projections — created eagerly1087 self._text_proj = SyncTextProjection()1088 self._reasoning_proj = SyncTextProjection()1089 self._tool_calls_proj = SyncProjection()1090 # Pull callback (set by bind_pump or set_request_more)1091 self._ensure_started: Callable[[], None] | None = None1092 self._request_more: Callable[[], bool] | None = None10931094 # -- Pump/pull wiring --------------------------------------------------10951096 def bind_pump(self, pump_one: Callable[[], bool]) -> None:1097 """Bind a pump for standalone streaming.10981099 Delegates to `set_request_more`. Used by1100 `BaseChatModel.stream_events(version="v3")`.1101 """1102 self.set_request_more(pump_one)11031104 def set_start(self, cb: Callable[[], None] | None) -> None:1105 """Install a lazy-start callback on this stream and its projections."""1106 self._ensure_started = cb1107 self._text_proj.set_start(cb)1108 self._reasoning_proj.set_start(cb)1109 self._tool_calls_proj.set_start(cb)11101111 def set_request_more(self, cb: Callable[[], bool]) -> None:1112 """Set the pull callback on this stream and all its projections.11131114 Used by langgraph's `GraphRunStream._wire_request_more` to1115 connect the shared graph pump.1116 """1117 self._request_more = cb1118 self._text_proj.set_request_more(cb)1119 self._reasoning_proj.set_request_more(cb)1120 self._tool_calls_proj.set_request_more(cb)11211122 # -- Public projections ------------------------------------------------11231124 @property1125 def text(self) -> SyncTextProjection:1126 """Text content — iterable of `str` deltas, `str()` for full."""1127 return self._text_proj11281129 @property1130 def reasoning(self) -> SyncTextProjection:1131 """Reasoning content — same interface as :attr:`text`."""1132 return self._reasoning_proj11331134 @property1135 def tool_calls(self) -> SyncProjection:1136 """Tool calls — iterable of `ToolCallChunk` deltas.11371138 `.get()` returns finalized `list[ToolCall]`.1139 """1140 return self._tool_calls_proj11411142 @property1143 def output(self) -> AIMessage:1144 """Assembled `AIMessage` — blocks until the stream finishes."""1145 self._drain()1146 if self._error is not None:1147 raise self._error1148 if self._output_message is None:1149 msg = "Stream finished without producing a message"1150 raise RuntimeError(msg)1151 return self._output_message11521153 # -- Raw event iteration (replay buffer) -------------------------------11541155 def __iter__(self) -> Iterator[MessagesData]:1156 """Iterate raw protocol events with replay-buffer semantics."""1157 if self._ensure_started is not None:1158 self._ensure_started()1159 cursor = 01160 while True:1161 if cursor < len(self._events):1162 yield self._events[cursor]1163 cursor += 11164 elif self._error is not None:1165 raise self._error1166 elif self._done:1167 return1168 elif self._request_more is not None:1169 while cursor >= len(self._events) and not self._done:1170 if not self._request_more():1171 break1172 if cursor >= len(self._events):1173 if self._error is not None:1174 raise self._error1175 return1176 else:1177 return11781179 # -- Internal helpers --------------------------------------------------11801181 def _drain(self) -> None:1182 """Pull all remaining events until done."""1183 if self._done:1184 return1185 if self._ensure_started is not None:1186 self._ensure_started()1187 if self._request_more is not None:1188 while not self._done:1189 if not self._request_more():1190 break119111921193# ---------------------------------------------------------------------------1194# Async stream1195# ---------------------------------------------------------------------------119611971198class AsyncChatModelStream(_ChatModelStreamBase):1199 """Asynchronous per-message streaming object for a single LLM response.12001201 Returned by `BaseChatModel.astream_events(version="v3")`. Content-block events1202 are fed into this object by a background producer task.12031204 Projections:12051206 - `.text` — async iterable of text deltas; awaitable for full text1207 - `.reasoning` — async iterable of reasoning deltas; awaitable1208 - `.tool_calls` — async iterable of `ToolCallChunk` deltas;1209 awaitable for `list[ToolCall]`1210 - `.output` — awaitable for assembled `AIMessage`12111212 Usage info is available on `.output.usage_metadata` once the stream1213 has finished.12141215 !!! note "Output shape is always v1 content blocks"12161217 The assembled message's content is always a list of v11218 protocol blocks, regardless of the model's `output_version`1219 setting — see `ChatModelStream` for the full rationale.12201221 The stream itself is awaitable (`msg = await stream`) and1222 async-iterable (`async for event in stream`).1223 """12241225 _text_proj: AsyncProjection1226 _reasoning_proj: AsyncProjection1227 _tool_calls_proj: AsyncProjection12281229 def __init__( # noqa: D1071230 self,1231 *,1232 namespace: list[str] | None = None,1233 node: str | None = None,1234 message_id: str | None = None,1235 ) -> None:1236 super().__init__(namespace=namespace, node=node, message_id=message_id)1237 self._text_proj = AsyncProjection()1238 self._reasoning_proj = AsyncProjection()1239 self._tool_calls_proj = AsyncProjection()1240 self._output_proj = AsyncProjection()1241 self._events_proj = AsyncProjection()1242 self._ensure_started: Callable[[], Awaitable[None]] | None = None1243 self._producer_task: asyncio.Task[None] | None = None1244 # Teardown callback invoked by `aclose()` only when the producer1245 # task was cancelled before its body ran (so the normal1246 # `_produce` CancelledError handler — which fires1247 # `on_llm_error` — never executed). Set by `astream_events(version="v3")`.1248 self._on_aclose_fail: Callable[[BaseException], Awaitable[None]] | None = None12491250 # -- Pump/pull wiring (async) ------------------------------------------12511252 def set_arequest_more(self, cb: Callable[[], Awaitable[bool]] | None) -> None:1253 """Fan the async pump callback out to every projection.12541255 Used by langgraph's `AsyncGraphRunStream._wire_arequest_more` so1256 cursors on `stream.text`, `stream.reasoning`, etc. can drive the1257 shared graph pump when their buffer is empty.12581259 Args:1260 cb: Async no-arg callable returning `True` when a new event1261 was produced, `False` when the source is exhausted. Pass1262 `None` to unwire.1263 """1264 for proj in (1265 self._text_proj,1266 self._reasoning_proj,1267 self._tool_calls_proj,1268 self._output_proj,1269 self._events_proj,1270 ):1271 proj.set_arequest_more(cb)12721273 def set_start(self, cb: Callable[[], Awaitable[None]] | None) -> None:1274 """Install a lazy-start callback on this stream and its projections."""1275 self._ensure_started = cb1276 for proj in (1277 self._text_proj,1278 self._reasoning_proj,1279 self._tool_calls_proj,1280 self._output_proj,1281 self._events_proj,1282 ):1283 proj.set_start(cb)12841285 # -- Public projections ------------------------------------------------12861287 @property1288 def text(self) -> AsyncProjection:1289 """Text content — async iterable of deltas, awaitable for full."""1290 return self._text_proj12911292 @property1293 def reasoning(self) -> AsyncProjection:1294 """Reasoning content — same interface as :attr:`text`."""1295 return self._reasoning_proj12961297 @property1298 def tool_calls(self) -> AsyncProjection:1299 """Tool calls — async iterable, awaitable for finalized list."""1300 return self._tool_calls_proj13011302 @property1303 def output(self) -> AsyncProjection:1304 """Assembled `AIMessage` — awaitable."""1305 return self._output_proj13061307 def __await__(self) -> Generator[Any, None, AIMessage]:1308 """Await the assembled `AIMessage` and full producer lifecycle.13091310 The producer task is awaited after the output projection resolves so1311 that post-stream work (notably `on_llm_end` callbacks) has run by1312 the time the caller's `await` returns.1313 """1314 return self._await_full().__await__()13151316 async def _await_full(self) -> AIMessage:1317 if self._ensure_started is not None:1318 await self._ensure_started()1319 message: AIMessage = await self._output_proj1320 if self._producer_task is not None:1321 await self._producer_task1322 return message13231324 def __aiter__(self) -> _AsyncProjectionIterator:1325 """Iterate raw protocol events asynchronously."""1326 return _AsyncProjectionIterator(self._events_proj)13271328 # -- Cleanup -----------------------------------------------------------13291330 async def aclose(self) -> None:1331 """Cancel the background producer task and release resources.13321333 If a consumer cancels mid-stream or decides to stop iterating1334 early, the producer task keeps pumping the provider HTTP call to1335 completion because `asyncio.Task` has no implicit link to its1336 awaiter. Call this method to cancel the producer explicitly; the1337 stream transitions to an errored state with `CancelledError`.13381339 If the stream has already produced a message successfully (for1340 example, after `await stream.output`), the producer may still be1341 running post-stream work such as `on_llm_end` callbacks. In that1342 case `aclose()` awaits the task rather than cancelling it —1343 turning a successful run into a cancelled one would drop the1344 end callback and corrupt tracing.13451346 Idempotent: safe to call multiple times, including after the1347 stream has finished normally. Also invoked by the async context1348 manager protocol on `__aexit__`.1349 """1350 if self._ensure_started is not None and self._producer_task is None:1351 await self._ensure_started()13521353 task = self._producer_task1354 if task is None:1355 return1356 if task.done() and self._done:1357 return13581359 we_cancelled = not (self._output_message is not None and self._error is None)1360 if we_cancelled and not task.done():1361 task.cancel()13621363 # Wait for the task via a linked `Future`, not by awaiting the1364 # task directly. Awaiting the task would raise `CancelledError`1365 # in two indistinguishable cases: (1) the task we just cancelled1366 # completed, (2) our caller cancelled us. `asyncio.Task.cancelling()`1367 # disambiguates on 3.11+ but doesn't exist on 3.10.1368 #1369 # The `done_future` resolves with `None` whenever the task1370 # finishes (any reason). It is not a `Task` itself, so its1371 # `await` only raises when our caller is cancelled — giving us1372 # a portable, unambiguous signal to propagate.1373 if not task.done():1374 loop = asyncio.get_running_loop()1375 done_future: asyncio.Future[None] = loop.create_future()13761377 def _link(_: asyncio.Task[None]) -> None:1378 if not done_future.done():1379 done_future.set_result(None)13801381 task.add_done_callback(_link)1382 try:1383 await done_future1384 finally:1385 task.remove_done_callback(_link)13861387 # If the task was cancelled before `_produce` ran (e.g.1388 # `astream_events(version="v3")` immediately followed by `aclose()`), the stream1389 # never reached `_produce`'s CancelledError handler — its1390 # projections are still pending and no end-of-lifecycle callback1391 # has fired. Resolve both here so callers of `await stream.output`1392 # don't hang and tracing sees a matching end event.1393 if we_cancelled and not self._done:1394 cancel_exc = asyncio.CancelledError()1395 self.fail(cancel_exc)1396 teardown = self._on_aclose_fail1397 if teardown is not None:1398 with contextlib.suppress(Exception):1399 await teardown(cancel_exc)14001401 async def __aenter__(self) -> Self:1402 """Enter the async context — returns self."""1403 return self14041405 async def __aexit__(1406 self,1407 exc_type: type[BaseException] | None,1408 exc: BaseException | None,1409 tb: object,1410 ) -> None:1411 """Exit the async context — cancels the producer via `aclose()`."""1412 del exc_type, exc, tb1413 await self.aclose()14141415 # -- Internal API (extend base to drive async projections) -------------14161417 def _record_event(self, event: Mapping[str, Any]) -> None:1418 """Record event and push to async event replay projection."""1419 super()._record_event(event)1420 self._events_proj.push(cast("MessagesData", event))14211422 def _finish(self, data: MessageFinishData) -> None:1423 """Finish base projections and async-only projections."""1424 super()._finish(data)1425 self._output_proj.complete(self._output_message)1426 self._events_proj.complete(self._events)14271428 def fail(self, error: BaseException) -> None:1429 """Fail base projections and async-only projections."""1430 super().fail(error)1431 self._output_proj.fail(error)1432 self._events_proj.fail(error)143314341435__all__ = [1436 "AsyncChatModelStream",1437 "AsyncProjection",1438 "ChatModelStream",1439 "SyncProjection",1440 "SyncTextProjection",1441]
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.