Overuse may indicate design issues; consider polymorphism
if isinstance(block, dict) and block.get("type") == "tool_call"
1"""AI message."""23import itertools4import json5import logging6import operator7from collections.abc import Sequence8from typing import Any, Literal, cast, overload910from pydantic import Field, model_validator11from typing_extensions import NotRequired, Self, TypedDict, override1213from langchain_core.messages import content as types14from langchain_core.messages.base import (15 BaseMessage,16 BaseMessageChunk,17 _extract_reasoning_from_additional_kwargs,18 merge_content,19)20from langchain_core.messages.content import InvalidToolCall21from langchain_core.messages.tool import (22 ToolCall,23 ToolCallChunk,24 default_tool_chunk_parser,25 default_tool_parser,26)27from langchain_core.messages.tool import invalid_tool_call as create_invalid_tool_call28from langchain_core.messages.tool import tool_call as create_tool_call29from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk30from langchain_core.utils._merge import merge_dicts, merge_lists31from langchain_core.utils.json import parse_partial_json32from langchain_core.utils.usage import _dict_int_op33from langchain_core.utils.utils import LC_AUTO_PREFIX, LC_ID_PREFIX3435logger = logging.getLogger(__name__)363738class InputTokenDetails(TypedDict, total=False):39 """Breakdown of input token counts.4041 Does *not* need to sum to full input token count. Does *not* need to have all keys.4243 Example:44 ```python45 {46 "audio": 10,47 "cache_creation": 200,48 "cache_read": 100,49 }50 ```5152 May also hold extra provider-specific keys.5354 !!! version-added "Added in `langchain-core` 0.3.9"55 """5657 audio: int58 """Audio input tokens."""5960 cache_creation: int61 """Input tokens that were cached and there was a cache miss.6263 Since there was a cache miss, the cache was created from these tokens.64 """6566 cache_read: int67 """Input tokens that were cached and there was a cache hit.6869 Since there was a cache hit, the tokens were read from the cache. More precisely,70 the model state given these tokens was read from the cache.71 """727374class OutputTokenDetails(TypedDict, total=False):75 """Breakdown of output token counts.7677 Does *not* need to sum to full output token count. Does *not* need to have all keys.7879 Example:80 ```python81 {82 "audio": 10,83 "reasoning": 200,84 }85 ```8687 May also hold extra provider-specific keys.8889 !!! version-added "Added in `langchain-core` 0.3.9"9091 """9293 audio: int94 """Audio output tokens."""9596 reasoning: int97 """Reasoning output tokens.9899 Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1100 models) that are not returned as part of model output.101 """102103104class UsageMetadata(TypedDict):105 """Usage metadata for a message, such as token counts.106107 This is a standard representation of token usage that is consistent across models.108109 Example:110 ```python111 {112 "input_tokens": 350,113 "output_tokens": 240,114 "total_tokens": 590,115 "input_token_details": {116 "audio": 10,117 "cache_creation": 200,118 "cache_read": 100,119 },120 "output_token_details": {121 "audio": 10,122 "reasoning": 200,123 },124 }125 ```126127 !!! warning "Behavior changed in `langchain-core` 0.3.9"128129 Added `input_token_details` and `output_token_details`.130131 !!! note "LangSmith SDK"132133 The LangSmith SDK also has a `UsageMetadata` class. While the two share fields,134 LangSmith's `UsageMetadata` has additional fields to capture cost information135 used by the LangSmith platform.136 """137138 input_tokens: int139 """Count of input (or prompt) tokens. Sum of all input token types."""140141 output_tokens: int142 """Count of output (or completion) tokens. Sum of all output token types."""143144 total_tokens: int145 """Total token count. Sum of `input_tokens` + `output_tokens`."""146147 input_token_details: NotRequired[InputTokenDetails]148 """Breakdown of input token counts.149150 Does *not* need to sum to full input token count. Does *not* need to have all keys.151 """152153 output_token_details: NotRequired[OutputTokenDetails]154 """Breakdown of output token counts.155156 Does *not* need to sum to full output token count. Does *not* need to have all keys.157 """158159160class AIMessage(BaseMessage):161 """Message from an AI.162163 An `AIMessage` is returned from a chat model as a response to a prompt.164165 This message represents the output of the model and consists of both166 the raw output as returned by the model and standardized fields167 (e.g., tool calls, usage metadata) added by the LangChain framework.168 """169170 tool_calls: list[ToolCall] = Field(default_factory=list)171 """If present, tool calls associated with the message."""172173 invalid_tool_calls: list[InvalidToolCall] = Field(default_factory=list)174 """If present, tool calls with parsing errors associated with the message."""175176 usage_metadata: UsageMetadata | None = None177 """If present, usage metadata for a message, such as token counts.178179 This is a standard representation of token usage that is consistent across models.180 """181182 type: Literal["ai"] = "ai"183 """The type of the message (used for deserialization)."""184185 @overload186 def __init__(187 self,188 content: str | list[str | dict],189 **kwargs: Any,190 ) -> None: ...191192 @overload193 def __init__(194 self,195 content: str | list[str | dict] | None = None,196 content_blocks: list[types.ContentBlock] | None = None,197 **kwargs: Any,198 ) -> None: ...199200 def __init__(201 self,202 content: str | list[str | dict] | None = None,203 content_blocks: list[types.ContentBlock] | None = None,204 **kwargs: Any,205 ) -> None:206 """Initialize an `AIMessage`.207208 Specify `content` as positional arg or `content_blocks` for typing.209210 Args:211 content: The content of the message.212 content_blocks: Typed standard content.213 **kwargs: Additional arguments to pass to the parent class.214 """215 if content_blocks is not None:216 # If there are tool calls in content_blocks, but not in tool_calls, add them217 content_tool_calls = [218 block for block in content_blocks if block.get("type") == "tool_call"219 ]220 if content_tool_calls and "tool_calls" not in kwargs:221 kwargs["tool_calls"] = content_tool_calls222223 super().__init__(224 content=cast("str | list[str | dict]", content_blocks),225 **kwargs,226 )227 else:228 super().__init__(content=content, **kwargs)229230 @property231 def lc_attributes(self) -> dict:232 """Attributes to be serialized.233234 Includes all attributes, even if they are derived from other initialization235 arguments.236 """237 return {238 "tool_calls": self.tool_calls,239 "invalid_tool_calls": self.invalid_tool_calls,240 }241242 @property243 def content_blocks(self) -> list[types.ContentBlock]:244 """Return standard, typed `ContentBlock` dicts from the message.245246 If the message has a known model provider, use the provider-specific translator247 first before falling back to best-effort parsing. For details, see the property248 on `BaseMessage`.249 """250 if self.response_metadata.get("output_version") == "v1":251 return cast("list[types.ContentBlock]", self.content)252253 model_provider = self.response_metadata.get("model_provider")254 if model_provider:255 from langchain_core.messages.block_translators import ( # noqa: PLC0415256 get_translator,257 )258259 translator = get_translator(model_provider)260 if translator:261 try:262 return translator["translate_content"](self)263 except NotImplementedError:264 pass265266 # Otherwise, use best-effort parsing267 blocks = super().content_blocks268269 if self.tool_calls:270 # Add from tool_calls if missing from content271 content_tool_call_ids = {272 block.get("id")273 for block in self.content274 if isinstance(block, dict) and block.get("type") == "tool_call"275 }276 for tool_call in self.tool_calls:277 if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids:278 tool_call_block: types.ToolCall = {279 "type": "tool_call",280 "id": id_,281 "name": tool_call["name"],282 "args": tool_call["args"],283 }284 if "index" in tool_call:285 tool_call_block["index"] = tool_call["index"] # type: ignore[typeddict-item]286 if "extras" in tool_call:287 tool_call_block["extras"] = tool_call["extras"] # type: ignore[typeddict-item]288 blocks.append(tool_call_block)289290 # Best-effort reasoning extraction from additional_kwargs291 # Only add reasoning if not already present292 # Insert before all other blocks to keep reasoning at the start293 has_reasoning = any(block.get("type") == "reasoning" for block in blocks)294 if not has_reasoning and (295 reasoning_block := _extract_reasoning_from_additional_kwargs(self)296 ):297 blocks.insert(0, reasoning_block)298299 return blocks300301 # TODO: remove this logic if possible, reducing breaking nature of changes302 @model_validator(mode="before")303 @classmethod304 def _backwards_compat_tool_calls(cls, values: dict) -> Any:305 check_additional_kwargs = not any(306 values.get(k)307 for k in ("tool_calls", "invalid_tool_calls", "tool_call_chunks")308 )309 if check_additional_kwargs and (310 raw_tool_calls := values.get("additional_kwargs", {}).get("tool_calls")311 ):312 try:313 if issubclass(cls, AIMessageChunk):314 values["tool_call_chunks"] = default_tool_chunk_parser(315 raw_tool_calls316 )317 else:318 parsed_tool_calls, parsed_invalid_tool_calls = default_tool_parser(319 raw_tool_calls320 )321 values["tool_calls"] = parsed_tool_calls322 values["invalid_tool_calls"] = parsed_invalid_tool_calls323 except Exception:324 logger.debug("Failed to parse tool calls", exc_info=True)325326 # Ensure "type" is properly set on all tool call-like dicts.327 if tool_calls := values.get("tool_calls"):328 values["tool_calls"] = [329 create_tool_call(330 **{k: v for k, v in tc.items() if k not in {"type", "extras"}}331 )332 for tc in tool_calls333 ]334 if invalid_tool_calls := values.get("invalid_tool_calls"):335 values["invalid_tool_calls"] = [336 create_invalid_tool_call(**{k: v for k, v in tc.items() if k != "type"})337 for tc in invalid_tool_calls338 ]339340 if tool_call_chunks := values.get("tool_call_chunks"):341 values["tool_call_chunks"] = [342 create_tool_call_chunk(**{k: v for k, v in tc.items() if k != "type"})343 for tc in tool_call_chunks344 ]345346 return values347348 @override349 def pretty_repr(self, html: bool = False) -> str:350 """Return a pretty representation of the message for display.351352 Args:353 html: Whether to return an HTML-formatted string.354355 Returns:356 A pretty representation of the message.357358 Example:359 ```python360 from langchain_core.messages import AIMessage361362 msg = AIMessage(363 content="Let me check the weather.",364 tool_calls=[365 {"name": "get_weather", "args": {"city": "Paris"}, "id": "1"}366 ],367 )368 ```369370 Results in:371 ```python372 >>> print(msg.pretty_repr())373 ================================== Ai Message ==================================374375 Let me check the weather.376 Tool Calls:377 get_weather (1)378 Call ID: 1379 Args:380 city: Paris381 ```382 """ # noqa: E501383 base = super().pretty_repr(html=html)384 lines = []385386 def _format_tool_args(tc: ToolCall | InvalidToolCall) -> list[str]:387 lines = [388 f" {tc.get('name', 'Tool')} ({tc.get('id')})",389 f" Call ID: {tc.get('id')}",390 ]391 if tc.get("error"):392 lines.append(f" Error: {tc.get('error')}")393 lines.append(" Args:")394 args = tc.get("args")395 if isinstance(args, str):396 lines.append(f" {args}")397 elif isinstance(args, dict):398 for arg, value in args.items():399 lines.append(f" {arg}: {value}")400 return lines401402 if self.tool_calls:403 lines.append("Tool Calls:")404 for tc in self.tool_calls:405 lines.extend(_format_tool_args(tc))406 if self.invalid_tool_calls:407 lines.append("Invalid Tool Calls:")408 for itc in self.invalid_tool_calls:409 lines.extend(_format_tool_args(itc))410 return (base.strip() + "\n" + "\n".join(lines)).strip()411412413class AIMessageChunk(AIMessage, BaseMessageChunk):414 """Message chunk from an AI (yielded when streaming)."""415416 # Ignoring mypy re-assignment here since we're overriding the value417 # to make sure that the chunk variant can be discriminated from the418 # non-chunk variant.419 type: Literal["AIMessageChunk"] = "AIMessageChunk" # type: ignore[assignment]420 """The type of the message (used for deserialization)."""421422 tool_call_chunks: list[ToolCallChunk] = Field(default_factory=list)423 """If provided, tool call chunks associated with the message."""424425 chunk_position: Literal["last"] | None = None426 """Optional span represented by an aggregated `AIMessageChunk`.427428 If a chunk with `chunk_position="last"` is aggregated into a stream,429 `tool_call_chunks` in message content will be parsed into `tool_calls`.430 """431432 @property433 @override434 def lc_attributes(self) -> dict:435 return {436 "tool_calls": self.tool_calls,437 "invalid_tool_calls": self.invalid_tool_calls,438 }439440 @property441 def content_blocks(self) -> list[types.ContentBlock]:442 """Return standard, typed `ContentBlock` dicts from the message."""443 if self.response_metadata.get("output_version") == "v1":444 return cast("list[types.ContentBlock]", self.content)445446 model_provider = self.response_metadata.get("model_provider")447 if model_provider:448 from langchain_core.messages.block_translators import ( # noqa: PLC0415449 get_translator,450 )451452 translator = get_translator(model_provider)453 if translator:454 try:455 return translator["translate_content_chunk"](self)456 except NotImplementedError:457 pass458459 # Otherwise, use best-effort parsing460 blocks = super().content_blocks461462 if (463 self.tool_call_chunks464 and not self.content465 and self.chunk_position != "last" # keep tool_calls if aggregated466 ):467 blocks = [468 block469 for block in blocks470 if block["type"] not in {"tool_call", "invalid_tool_call"}471 ]472 for tool_call_chunk in self.tool_call_chunks:473 tc: types.ToolCallChunk = {474 "type": "tool_call_chunk",475 "id": tool_call_chunk.get("id"),476 "name": tool_call_chunk.get("name"),477 "args": tool_call_chunk.get("args"),478 }479 if (idx := tool_call_chunk.get("index")) is not None:480 tc["index"] = idx481 blocks.append(tc)482483 # Best-effort reasoning extraction from additional_kwargs484 # Only add reasoning if not already present485 # Insert before all other blocks to keep reasoning at the start486 has_reasoning = any(block.get("type") == "reasoning" for block in blocks)487 if not has_reasoning and (488 reasoning_block := _extract_reasoning_from_additional_kwargs(self)489 ):490 blocks.insert(0, reasoning_block)491492 return blocks493494 @model_validator(mode="after")495 def init_tool_calls(self) -> Self:496 """Initialize tool calls from tool call chunks.497498 Returns:499 The values with tool calls initialized.500501 Raises:502 ValueError: If the tool call chunks are malformed.503 """504 if not self.tool_call_chunks:505 if self.tool_calls:506 self.tool_call_chunks = [507 create_tool_call_chunk(508 name=tc["name"],509 args=json.dumps(tc["args"]),510 id=tc["id"],511 index=None,512 )513 for tc in self.tool_calls514 ]515 if self.invalid_tool_calls:516 tool_call_chunks = self.tool_call_chunks517 tool_call_chunks.extend(518 [519 create_tool_call_chunk(520 name=tc["name"], args=tc["args"], id=tc["id"], index=None521 )522 for tc in self.invalid_tool_calls523 ]524 )525 self.tool_call_chunks = tool_call_chunks526527 return self528 tool_calls = []529 invalid_tool_calls = []530531 def add_chunk_to_invalid_tool_calls(chunk: ToolCallChunk) -> None:532 invalid_tool_calls.append(533 create_invalid_tool_call(534 name=chunk["name"],535 args=chunk["args"],536 id=chunk["id"],537 error=None,538 )539 )540541 for chunk in self.tool_call_chunks:542 try:543 args_ = parse_partial_json(chunk["args"]) if chunk["args"] else {}544 if isinstance(args_, dict):545 tool_calls.append(546 create_tool_call(547 name=chunk["name"] or "",548 args=args_,549 id=chunk["id"],550 )551 )552 else:553 add_chunk_to_invalid_tool_calls(chunk)554 except Exception:555 add_chunk_to_invalid_tool_calls(chunk)556 self.tool_calls = tool_calls557 self.invalid_tool_calls = invalid_tool_calls558559 if (560 self.chunk_position == "last"561 and self.tool_call_chunks562 and self.response_metadata.get("output_version") == "v1"563 and isinstance(self.content, list)564 ):565 id_to_tc: dict[str, types.ToolCall] = {566 cast("str", tc.get("id")): {567 "type": "tool_call",568 "name": tc["name"],569 "args": tc["args"],570 "id": tc.get("id"),571 }572 for tc in self.tool_calls573 if "id" in tc574 }575 for idx, block in enumerate(self.content):576 if (577 isinstance(block, dict)578 and block.get("type") == "tool_call_chunk"579 and (call_id := block.get("id"))580 and call_id in id_to_tc581 ):582 self.content[idx] = cast("dict[str, Any]", id_to_tc[call_id])583 if "extras" in block:584 # mypy does not account for instance check for dict above585 self.content[idx]["extras"] = block["extras"] # type: ignore[index]586587 return self588589 @model_validator(mode="after")590 def init_server_tool_calls(self) -> Self:591 """Initialize server tool calls.592593 Parse `server_tool_call_chunks` from594 [`ServerToolCallChunk`][langchain.messages.ServerToolCallChunk] objects.595 """596 if (597 self.chunk_position == "last"598 and self.response_metadata.get("output_version") == "v1"599 and isinstance(self.content, list)600 ):601 for idx, block in enumerate(self.content):602 if (603 isinstance(block, dict)604 and block.get("type")605 in {"server_tool_call", "server_tool_call_chunk"}606 and (args_str := block.get("args"))607 and isinstance(args_str, str)608 ):609 try:610 args = json.loads(args_str)611 if isinstance(args, dict):612 self.content[idx]["type"] = "server_tool_call" # type: ignore[index]613 self.content[idx]["args"] = args # type: ignore[index]614 except json.JSONDecodeError:615 pass616 return self617618 @overload # type: ignore[override] # summing BaseMessages gives ChatPromptTemplate619 def __add__(self, other: "AIMessageChunk") -> "AIMessageChunk": ...620621 @overload622 def __add__(self, other: Sequence["AIMessageChunk"]) -> "AIMessageChunk": ...623624 @overload625 def __add__(self, other: Any) -> BaseMessageChunk: ...626627 @override628 def __add__(self, other: Any) -> BaseMessageChunk:629 if isinstance(other, AIMessageChunk):630 return add_ai_message_chunks(self, other)631 if isinstance(other, (list, tuple)) and all(632 isinstance(o, AIMessageChunk) for o in other633 ):634 return add_ai_message_chunks(self, *other)635 return super().__add__(other)636637638def add_ai_message_chunks(639 left: AIMessageChunk, *others: AIMessageChunk640) -> AIMessageChunk:641 """Add multiple `AIMessageChunk`s together.642643 Args:644 left: The first `AIMessageChunk`.645 *others: Other `AIMessageChunk`s to add.646647 Returns:648 The resulting `AIMessageChunk`.649650 """651 content = merge_content(left.content, *(o.content for o in others))652 additional_kwargs = merge_dicts(653 left.additional_kwargs, *(o.additional_kwargs for o in others)654 )655 response_metadata = merge_dicts(656 left.response_metadata, *(o.response_metadata for o in others)657 )658659 # Merge tool call chunks660 if raw_tool_calls := merge_lists(661 left.tool_call_chunks, *(o.tool_call_chunks for o in others)662 ):663 tool_call_chunks = [664 create_tool_call_chunk(665 name=rtc.get("name"),666 args=rtc.get("args"),667 index=rtc.get("index"),668 id=rtc.get("id"),669 )670 for rtc in raw_tool_calls671 ]672 else:673 tool_call_chunks = []674675 # Token usage676 if left.usage_metadata or any(o.usage_metadata is not None for o in others):677 usage_metadata: UsageMetadata | None = left.usage_metadata678 for other in others:679 usage_metadata = add_usage(usage_metadata, other.usage_metadata)680 else:681 usage_metadata = None682683 # Ranks are defined by the order of preference. Higher is better:684 # 2. Provider-assigned IDs (non lc_* and non lc_run-*)685 # 1. lc_run-* IDs686 # 0. lc_* and other remaining IDs687 best_rank = -1688 chunk_id = None689 candidates = itertools.chain([left.id], (o.id for o in others))690691 for id_ in candidates:692 if not id_:693 continue694695 if not id_.startswith(LC_ID_PREFIX) and not id_.startswith(LC_AUTO_PREFIX):696 chunk_id = id_697 # Highest rank, return instantly698 break699700 rank = 1 if id_.startswith(LC_ID_PREFIX) else 0701702 if rank > best_rank:703 best_rank = rank704 chunk_id = id_705706 chunk_position: Literal["last"] | None = (707 "last" if any(x.chunk_position == "last" for x in [left, *others]) else None708 )709710 return left.__class__(711 content=content,712 additional_kwargs=additional_kwargs,713 tool_call_chunks=tool_call_chunks,714 response_metadata=response_metadata,715 usage_metadata=usage_metadata,716 id=chunk_id,717 chunk_position=chunk_position,718 )719720721def add_usage(left: UsageMetadata | None, right: UsageMetadata | None) -> UsageMetadata:722 """Recursively add two UsageMetadata objects.723724 Example:725 ```python726 from langchain_core.messages.ai import add_usage727728 left = UsageMetadata(729 input_tokens=5,730 output_tokens=0,731 total_tokens=5,732 input_token_details=InputTokenDetails(cache_read=3),733 )734 right = UsageMetadata(735 input_tokens=0,736 output_tokens=10,737 total_tokens=10,738 output_token_details=OutputTokenDetails(reasoning=4),739 )740741 add_usage(left, right)742 ```743744 results in745746 ```python747 UsageMetadata(748 input_tokens=5,749 output_tokens=10,750 total_tokens=15,751 input_token_details=InputTokenDetails(cache_read=3),752 output_token_details=OutputTokenDetails(reasoning=4),753 )754 ```755 Args:756 left: The first `UsageMetadata` object.757 right: The second `UsageMetadata` object.758759 Returns:760 The sum of the two `UsageMetadata` objects.761762 """763 if not (left or right):764 return UsageMetadata(input_tokens=0, output_tokens=0, total_tokens=0)765 if not (left and right):766 return cast("UsageMetadata", left or right)767768 return UsageMetadata(769 **cast(770 "UsageMetadata",771 _dict_int_op(772 cast("dict", left),773 cast("dict", right),774 operator.add,775 ),776 )777 )778779780def subtract_usage(781 left: UsageMetadata | None, right: UsageMetadata | None782) -> UsageMetadata:783 """Recursively subtract two `UsageMetadata` objects.784785 Token counts cannot be negative so the actual operation is `max(left - right, 0)`.786787 Example:788 ```python789 from langchain_core.messages.ai import subtract_usage790791 left = UsageMetadata(792 input_tokens=5,793 output_tokens=10,794 total_tokens=15,795 input_token_details=InputTokenDetails(cache_read=4),796 )797 right = UsageMetadata(798 input_tokens=3,799 output_tokens=8,800 total_tokens=11,801 output_token_details=OutputTokenDetails(reasoning=4),802 )803804 subtract_usage(left, right)805 ```806807 results in808809 ```python810 UsageMetadata(811 input_tokens=2,812 output_tokens=2,813 total_tokens=4,814 input_token_details=InputTokenDetails(cache_read=4),815 output_token_details=OutputTokenDetails(reasoning=0),816 )817 ```818 Args:819 left: The first `UsageMetadata` object.820 right: The second `UsageMetadata` object.821822 Returns:823 The resulting `UsageMetadata` after subtraction.824825 """826 if not (left or right):827 return UsageMetadata(input_tokens=0, output_tokens=0, total_tokens=0)828 if not (left and right):829 return cast("UsageMetadata", left or right)830831 return UsageMetadata(832 **cast(833 "UsageMetadata",834 _dict_int_op(835 cast("dict", left),836 cast("dict", right),837 (lambda le, ri: max(le - ri, 0)),838 ),839 )840 )
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.