Overuse may indicate design issues; consider polymorphism
if isinstance(v, dict) and "type" in v:
1"""Module contains utility functions for working with messages.23Some examples of what you can do with these functions include:45* Convert messages to strings (serialization)6* Convert messages from dicts to Message objects (deserialization)7* Filter messages from a list of messages based on name, type or id etc.8"""910from __future__ import annotations1112import base6413import inspect14import json15import logging16import math17from collections.abc import Callable, Iterable, Sequence18from functools import partial, wraps19from typing import (20 TYPE_CHECKING,21 Annotated,22 Any,23 Concatenate,24 Literal,25 ParamSpec,26 Protocol,27 TypeVar,28 cast,29 overload,30)31from xml.sax.saxutils import escape, quoteattr3233from pydantic import Discriminator, Field, Tag3435from langchain_core.exceptions import ErrorCode, create_message36from langchain_core.messages.ai import AIMessage, AIMessageChunk37from langchain_core.messages.base import BaseMessage, BaseMessageChunk38from langchain_core.messages.block_translators.openai import (39 convert_to_openai_data_block,40)41from langchain_core.messages.chat import ChatMessage, ChatMessageChunk42from langchain_core.messages.content import (43 is_data_content_block,44)45from langchain_core.messages.function import FunctionMessage, FunctionMessageChunk46from langchain_core.messages.human import HumanMessage, HumanMessageChunk47from langchain_core.messages.modifier import RemoveMessage48from langchain_core.messages.system import SystemMessage, SystemMessageChunk49from langchain_core.messages.tool import ToolCall, ToolMessage, ToolMessageChunk50from langchain_core.utils.function_calling import convert_to_openai_tool5152if TYPE_CHECKING:53 from langchain_core.language_models import BaseLanguageModel54 from langchain_core.prompt_values import PromptValue55 from langchain_core.runnables.base import Runnable56 from langchain_core.tools import BaseTool5758try:59 from langchain_text_splitters import TextSplitter6061 _HAS_LANGCHAIN_TEXT_SPLITTERS = True62except ImportError:63 _HAS_LANGCHAIN_TEXT_SPLITTERS = False6465logger = logging.getLogger(__name__)666768def _get_type(v: Any) -> str:69 """Get the type associated with the object for serialization purposes."""70 if isinstance(v, dict) and "type" in v:71 result = v["type"]72 elif hasattr(v, "type"):73 result = v.type74 else:75 msg = (76 f"Expected either a dictionary with a 'type' key or an object "77 f"with a 'type' attribute. Instead got type {type(v)}."78 )79 raise TypeError(msg)80 if not isinstance(result, str):81 msg = f"Expected 'type' to be a str, got {type(result).__name__}"82 raise TypeError(msg)83 return result848586AnyMessage = Annotated[87 Annotated[AIMessage, Tag(tag="ai")]88 | Annotated[HumanMessage, Tag(tag="human")]89 | Annotated[ChatMessage, Tag(tag="chat")]90 | Annotated[SystemMessage, Tag(tag="system")]91 | Annotated[FunctionMessage, Tag(tag="function")]92 | Annotated[ToolMessage, Tag(tag="tool")]93 | Annotated[AIMessageChunk, Tag(tag="AIMessageChunk")]94 | Annotated[HumanMessageChunk, Tag(tag="HumanMessageChunk")]95 | Annotated[ChatMessageChunk, Tag(tag="ChatMessageChunk")]96 | Annotated[SystemMessageChunk, Tag(tag="SystemMessageChunk")]97 | Annotated[FunctionMessageChunk, Tag(tag="FunctionMessageChunk")]98 | Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],99 Field(discriminator=Discriminator(_get_type)),100]101"""A type representing any defined `Message` or `MessageChunk` type."""102103104def _has_base64_data(block: dict) -> bool:105 """Check if a content block contains base64 encoded data.106107 Args:108 block: A content block dictionary.109110 Returns:111 Whether the block contains base64 data.112 """113 # Check for explicit base64 field (standard content blocks)114 if block.get("base64"):115 return True116117 # Check for data: URL in url field118 url = block.get("url", "")119 if isinstance(url, str) and url.startswith("data:"):120 return True121122 # Check for OpenAI-style image_url with data: URL123 image_url = block.get("image_url", {})124 if isinstance(image_url, dict):125 url = image_url.get("url", "")126 if isinstance(url, str) and url.startswith("data:"):127 return True128129 return False130131132_XML_CONTENT_BLOCK_MAX_LEN = 500133134135def _truncate(text: str, max_len: int = _XML_CONTENT_BLOCK_MAX_LEN) -> str:136 """Truncate text to `max_len` characters, adding ellipsis if truncated."""137 if len(text) <= max_len:138 return text139 return text[:max_len] + "..."140141142def _format_content_block_xml(block: dict) -> str | None:143 """Format a content block as XML.144145 Args:146 block: A LangChain content block.147148 Returns:149 XML string representation of the block, or `None` if the block should be150 skipped.151152 Note:153 Plain text document content, server tool call arguments, and server tool154 result outputs are truncated to 500 characters.155 """156 block_type = block.get("type", "")157158 # Skip blocks with base64 encoded data159 if _has_base64_data(block):160 return None161162 # Text blocks163 if block_type == "text":164 text = block.get("text", "")165 return escape(text) if text else None166167 # Reasoning blocks168 if block_type == "reasoning":169 reasoning = block.get("reasoning", "")170 if reasoning:171 return f"<reasoning>{escape(reasoning)}</reasoning>"172 return None173174 # Image blocks (URL only, base64 already filtered)175 if block_type == "image":176 url = block.get("url")177 file_id = block.get("file_id")178 if url:179 return f"<image url={quoteattr(url)} />"180 if file_id:181 return f"<image file_id={quoteattr(file_id)} />"182 return None183184 # OpenAI-style image_url blocks185 if block_type == "image_url":186 image_url = block.get("image_url", {})187 if isinstance(image_url, dict):188 url = image_url.get("url", "")189 if url and not url.startswith("data:"):190 return f"<image url={quoteattr(url)} />"191 return None192193 # Audio blocks (URL only)194 if block_type == "audio":195 url = block.get("url")196 file_id = block.get("file_id")197 if url:198 return f"<audio url={quoteattr(url)} />"199 if file_id:200 return f"<audio file_id={quoteattr(file_id)} />"201 return None202203 # Video blocks (URL only)204 if block_type == "video":205 url = block.get("url")206 file_id = block.get("file_id")207 if url:208 return f"<video url={quoteattr(url)} />"209 if file_id:210 return f"<video file_id={quoteattr(file_id)} />"211 return None212213 # Plain text document blocks214 if block_type == "text-plain":215 text = block.get("text", "")216 return escape(_truncate(text)) if text else None217218 # Server tool call blocks (from AI messages)219 if block_type == "server_tool_call":220 tc_id = quoteattr(str(block.get("id") or ""))221 tc_name = quoteattr(str(block.get("name") or ""))222 tc_args_json = json.dumps(block.get("args", {}), ensure_ascii=False)223 tc_args = escape(_truncate(tc_args_json))224 return (225 f"<server_tool_call id={tc_id} name={tc_name}>{tc_args}</server_tool_call>"226 )227228 # Server tool result blocks229 if block_type == "server_tool_result":230 tool_call_id = quoteattr(str(block.get("tool_call_id") or ""))231 status = quoteattr(str(block.get("status") or ""))232 output = block.get("output")233 if output:234 output_json = json.dumps(output, ensure_ascii=False)235 output_str = escape(_truncate(output_json))236 else:237 output_str = ""238 return (239 f"<server_tool_result tool_call_id={tool_call_id} status={status}>"240 f"{output_str}</server_tool_result>"241 )242243 # Unknown block type - skip silently244 return None245246247def _get_message_type_str(248 m: BaseMessage,249 human_prefix: str,250 ai_prefix: str,251 system_prefix: str,252 function_prefix: str,253 tool_prefix: str,254) -> str:255 """Get the type string for XML message element.256257 Args:258 m: The message to get the type string for.259 human_prefix: The prefix to use for `HumanMessage`.260 ai_prefix: The prefix to use for `AIMessage`.261 system_prefix: The prefix to use for `SystemMessage`.262 function_prefix: The prefix to use for `FunctionMessage`.263 tool_prefix: The prefix to use for `ToolMessage`.264265 Returns:266 The type string for the message element.267268 Raises:269 ValueError: If an unsupported message type is encountered.270 """271 if isinstance(m, HumanMessage):272 return human_prefix.lower()273 if isinstance(m, AIMessage):274 return ai_prefix.lower()275 if isinstance(m, SystemMessage):276 return system_prefix.lower()277 if isinstance(m, FunctionMessage):278 return function_prefix.lower()279 if isinstance(m, ToolMessage):280 return tool_prefix.lower()281 if isinstance(m, ChatMessage):282 return m.role283 msg = f"Got unsupported message type: {m}"284 raise ValueError(msg)285286287def get_buffer_string(288 messages: Sequence[BaseMessage],289 human_prefix: str = "Human",290 ai_prefix: str = "AI",291 *,292 system_prefix: str = "System",293 function_prefix: str = "Function",294 tool_prefix: str = "Tool",295 message_separator: str = "\n",296 format: Literal["prefix", "xml"] = "prefix", # noqa: A002297) -> str:298 r"""Convert a sequence of messages to strings and concatenate them into one string.299300 Args:301 messages: Messages to be converted to strings.302 human_prefix: The prefix to prepend to contents of `HumanMessage`s.303 ai_prefix: The prefix to prepend to contents of `AIMessage`.304 system_prefix: The prefix to prepend to contents of `SystemMessage`s.305 function_prefix: The prefix to prepend to contents of `FunctionMessage`s.306 tool_prefix: The prefix to prepend to contents of `ToolMessage`s.307 message_separator: The separator to use between messages.308 format: The output format. `'prefix'` uses `Role: content` format (default).309310 `'xml'` uses XML-style `<message type='role'>` format with proper character311 escaping, which is useful when message content may contain role-like312 prefixes that could cause ambiguity.313314 Returns:315 A single string concatenation of all input messages.316317 Raises:318 ValueError: If an unsupported message type is encountered.319320 !!! warning321322 If a message is an `AIMessage` and contains both tool calls under `tool_calls`323 and a function call under `additional_kwargs["function_call"]`, only the tool324 calls will be appended to the string representation.325326 !!! note "XML format"327328 When using `format='xml'`:329330 - All messages use uniform `<message type="role">content</message>` format.331 - The `type` attribute uses `human_prefix` (lowercased) for `HumanMessage`,332 `ai_prefix` (lowercased) for `AIMessage`, `system_prefix` (lowercased)333 for `SystemMessage`, `function_prefix` (lowercased) for `FunctionMessage`,334 `tool_prefix` (lowercased) for `ToolMessage`, and the original role335 (unchanged) for `ChatMessage`.336 - Message content is escaped using `xml.sax.saxutils.escape()`.337 - Attribute values are escaped using `xml.sax.saxutils.quoteattr()`.338 - AI messages with tool calls use nested structure with `<content>` and339 `<tool_call>` elements.340 - For multi-modal content (list of content blocks), supported block types341 are: `text`, `reasoning`, `image` (URL/file_id only), `image_url`342 (OpenAI-style, URL only), `audio` (URL/file_id only), `video` (URL/file_id343 only), `text-plain`, `server_tool_call`, and `server_tool_result`.344 - Content blocks with base64-encoded data are skipped (including blocks345 with `base64` field or `data:` URLs).346 - Unknown block types are skipped.347 - Plain text document content (`text-plain`), server tool call arguments,348 and server tool result outputs are truncated to 500 characters.349350 Example:351 Default prefix format:352353 ```python354 from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string355356 messages = [357 HumanMessage(content="Hi, how are you?"),358 AIMessage(content="Good, how are you?"),359 ]360 get_buffer_string(messages)361 # -> "Human: Hi, how are you?\nAI: Good, how are you?"362 ```363364 XML format (useful when content contains role-like prefixes):365366 ```python367 messages = [368 HumanMessage(content="Example: Human: some text"),369 AIMessage(content="I see the example."),370 ]371 get_buffer_string(messages, format="xml")372 # -> '<message type="human">Example: Human: some text</message>\\n'373 # -> '<message type="ai">I see the example.</message>'374 ```375376 XML format with special characters (automatically escaped):377378 ```python379 messages = [380 HumanMessage(content="Is 5 < 10 & 10 > 5?"),381 ]382 get_buffer_string(messages, format="xml")383 # -> '<message type="human">Is 5 < 10 & 10 > 5?</message>'384 ```385386 XML format with tool calls:387388 ```python389 messages = [390 AIMessage(391 content="I'll search for that.",392 tool_calls=[393 {"id": "call_123", "name": "search", "args": {"query": "weather"}}394 ],395 ),396 ]397 get_buffer_string(messages, format="xml")398 # -> '<message type="ai">\\n'399 # -> ' <content>I\\'ll search for that.</content>\\n'400 # -> ' <tool_call id="call_123" name="search">'401 # -> '{"query": "weather"}</tool_call>\\n'402 # -> '</message>'403 ```404 """405 if format not in {"prefix", "xml"}:406 msg = (407 f"Unrecognized format={format!r}. Supported formats are 'prefix' and 'xml'."408 )409 raise ValueError(msg)410411 string_messages = []412 for m in messages:413 if isinstance(m, HumanMessage):414 role = human_prefix415 elif isinstance(m, AIMessage):416 role = ai_prefix417 elif isinstance(m, SystemMessage):418 role = system_prefix419 elif isinstance(m, FunctionMessage):420 role = function_prefix421 elif isinstance(m, ToolMessage):422 role = tool_prefix423 elif isinstance(m, ChatMessage):424 role = m.role425 else:426 msg = f"Got unsupported message type: {m}"427 raise ValueError(msg) # noqa: TRY004428429 if format == "xml":430 msg_type = _get_message_type_str(431 m, human_prefix, ai_prefix, system_prefix, function_prefix, tool_prefix432 )433434 # Format content blocks435 if isinstance(m.content, str):436 content_parts = [escape(m.content)] if m.content else []437 else:438 # List of content blocks439 content_parts = []440 for block in m.content:441 if isinstance(block, str):442 if block:443 content_parts.append(escape(block))444 else:445 formatted = _format_content_block_xml(block)446 if formatted:447 content_parts.append(formatted)448449 # Check if this is an AIMessage with tool calls450 has_tool_calls = isinstance(m, AIMessage) and m.tool_calls451 has_function_call = (452 isinstance(m, AIMessage)453 and not m.tool_calls454 and "function_call" in m.additional_kwargs455 )456457 if has_tool_calls or has_function_call:458 # Use nested structure for AI messages with tool calls459 # Type narrowing: at this point m is AIMessage (verified above)460 ai_msg = cast("AIMessage", m)461 parts = [f"<message type={quoteattr(msg_type)}>"]462 if content_parts:463 parts.append(f" <content>{' '.join(content_parts)}</content>")464465 if has_tool_calls:466 for tc in ai_msg.tool_calls:467 tc_id = quoteattr(str(tc.get("id") or ""))468 tc_name = quoteattr(str(tc.get("name") or ""))469 tc_args = escape(470 json.dumps(tc.get("args", {}), ensure_ascii=False)471 )472 parts.append(473 f" <tool_call id={tc_id} name={tc_name}>"474 f"{tc_args}</tool_call>"475 )476 elif has_function_call:477 fc = ai_msg.additional_kwargs["function_call"]478 fc_name = quoteattr(str(fc.get("name") or ""))479 fc_args = escape(str(fc.get("arguments") or "{}"))480 parts.append(481 f" <function_call name={fc_name}>{fc_args}</function_call>"482 )483484 parts.append("</message>")485 message = "\n".join(parts)486 else:487 # Simple structure for messages without tool calls488 joined_content = " ".join(content_parts)489 message = (490 f"<message type={quoteattr(msg_type)}>{joined_content}</message>"491 )492 else: # format == "prefix"493 content = m.text494 message = f"{role}: {content}"495 tool_info = ""496 if isinstance(m, AIMessage):497 if m.tool_calls:498 tool_info = str(m.tool_calls)499 elif "function_call" in m.additional_kwargs:500 # Legacy behavior assumes only one function call per message501 tool_info = str(m.additional_kwargs["function_call"])502 if tool_info:503 message += tool_info # Preserve original behavior504505 string_messages.append(message)506507 return message_separator.join(string_messages)508509510def _message_from_dict(message: dict) -> BaseMessage:511 type_ = message["type"]512 if type_ == "human":513 return HumanMessage(**message["data"])514 if type_ == "ai":515 return AIMessage(**message["data"])516 if type_ == "system":517 return SystemMessage(**message["data"])518 if type_ == "chat":519 return ChatMessage(**message["data"])520 if type_ == "function":521 return FunctionMessage(**message["data"])522 if type_ == "tool":523 return ToolMessage(**message["data"])524 if type_ == "remove":525 return RemoveMessage(**message["data"])526 if type_ == "AIMessageChunk":527 return AIMessageChunk(**message["data"])528 if type_ == "HumanMessageChunk":529 return HumanMessageChunk(**message["data"])530 if type_ == "FunctionMessageChunk":531 return FunctionMessageChunk(**message["data"])532 if type_ == "ToolMessageChunk":533 return ToolMessageChunk(**message["data"])534 if type_ == "SystemMessageChunk":535 return SystemMessageChunk(**message["data"])536 if type_ == "ChatMessageChunk":537 return ChatMessageChunk(**message["data"])538 msg = f"Got unexpected message type: {type_}"539 raise ValueError(msg)540541542def messages_from_dict(messages: Sequence[dict]) -> list[BaseMessage]:543 """Convert a sequence of messages from dicts to `Message` objects.544545 Args:546 messages: Sequence of messages (as dicts) to convert.547548 Returns:549 list of messages (BaseMessages).550551 """552 return [_message_from_dict(m) for m in messages]553554555def message_chunk_to_message(chunk: BaseMessage) -> BaseMessage:556 """Convert a message chunk to a `Message`.557558 Args:559 chunk: Message chunk to convert.560561 Returns:562 Message.563 """564 if not isinstance(chunk, BaseMessageChunk):565 return chunk566 # chunk classes always have the equivalent non-chunk class as their first parent567 ignore_keys = ["type"]568 if isinstance(chunk, AIMessageChunk):569 ignore_keys.extend(["tool_call_chunks", "chunk_position"])570 return cast(571 "BaseMessage",572 chunk.__class__.__mro__[1](573 **{k: v for k, v in chunk.__dict__.items() if k not in ignore_keys}574 ),575 )576577578MessageLikeRepresentation = (579 BaseMessage | list[str] | tuple[str, str] | str | dict[str, Any]580)581"""A type representing the various ways a message can be represented."""582583584def _create_message_from_message_type(585 message_type: str,586 content: str,587 name: str | None = None,588 tool_call_id: str | None = None,589 tool_calls: list[dict[str, Any]] | None = None,590 id: str | None = None,591 **additional_kwargs: Any,592) -> BaseMessage:593 """Create a message from a `Message` type and content string.594595 Args:596 message_type: the type of the message (e.g., `'human'`, `'ai'`, etc.).597 content: the content string.598 name: the name of the message.599 tool_call_id: the tool call id.600 tool_calls: the tool calls.601 id: the id of the message.602 additional_kwargs: additional keyword arguments.603604 Returns:605 a message of the appropriate type.606607 Raises:608 ValueError: if the message type is not one of `'human'`, `'user'`, `'ai'`,609 `'assistant'`, `'function'`, `'tool'`, `'system'`, or610 `'developer'`.611 """612 kwargs: dict[str, Any] = {}613 if name is not None:614 kwargs["name"] = name615 if tool_call_id is not None:616 kwargs["tool_call_id"] = tool_call_id617 if additional_kwargs:618 if response_metadata := additional_kwargs.pop("response_metadata", None):619 kwargs["response_metadata"] = response_metadata620 kwargs["additional_kwargs"] = additional_kwargs621 additional_kwargs.update(additional_kwargs.pop("additional_kwargs", {}))622 if id is not None:623 kwargs["id"] = id624 if tool_calls is not None:625 kwargs["tool_calls"] = []626 for tool_call in tool_calls:627 # Convert OpenAI-format tool call to LangChain format.628 if "function" in tool_call:629 args = tool_call["function"]["arguments"]630 if isinstance(args, str):631 args = json.loads(args, strict=False)632 kwargs["tool_calls"].append(633 {634 "name": tool_call["function"]["name"],635 "args": args,636 "id": tool_call["id"],637 "type": "tool_call",638 }639 )640 else:641 kwargs["tool_calls"].append(tool_call)642 if message_type in {"human", "user"}:643 if example := kwargs.get("additional_kwargs", {}).pop("example", False):644 kwargs["example"] = example645 message: BaseMessage = HumanMessage(content=content, **kwargs)646 elif message_type in {"ai", "assistant"}:647 if example := kwargs.get("additional_kwargs", {}).pop("example", False):648 kwargs["example"] = example649 message = AIMessage(content=content, **kwargs)650 elif message_type in {"system", "developer"}:651 if message_type == "developer":652 kwargs["additional_kwargs"] = kwargs.get("additional_kwargs") or {}653 kwargs["additional_kwargs"]["__openai_role__"] = "developer"654 message = SystemMessage(content=content, **kwargs)655 elif message_type == "function":656 message = FunctionMessage(content=content, **kwargs)657 elif message_type == "tool":658 artifact = kwargs.get("additional_kwargs", {}).pop("artifact", None)659 status = kwargs.get("additional_kwargs", {}).pop("status", None)660 if status is not None:661 kwargs["status"] = status662 message = ToolMessage(content=content, artifact=artifact, **kwargs)663 elif message_type == "remove":664 message = RemoveMessage(**kwargs)665 else:666 msg = (667 f"Unexpected message type: '{message_type}'. Use one of 'human',"668 f" 'user', 'ai', 'assistant', 'function', 'tool', 'system', or 'developer'."669 )670 msg = create_message(message=msg, error_code=ErrorCode.MESSAGE_COERCION_FAILURE)671 raise ValueError(msg)672 return message673674675# Map of class names emitted in the `Serializable` constructor-envelope676# (`{"lc": 1, "type": "constructor", "id": [..., "<ClassName>"],677# "kwargs": {...}}`) to the message-type strings678# `_create_message_from_message_type` accepts. Read by679# `_convert_to_message`'s dict branch when unpacking that wire shape.680# Kept as a hardcoded allowlist of strings rather than a class registry681# lookup so dispatch never resolves to a class chosen by the caller.682_LC_CONSTRUCTOR_NAME_TO_TYPE: dict[str, str] = {683 "HumanMessage": "human",684 "HumanMessageChunk": "human",685 "AIMessage": "ai",686 "AIMessageChunk": "ai",687 "SystemMessage": "system",688 "SystemMessageChunk": "system",689 "FunctionMessage": "function",690 "FunctionMessageChunk": "function",691 "ToolMessage": "tool",692 "ToolMessageChunk": "tool",693 "RemoveMessage": "remove",694}695696697def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:698 """Instantiate a `Message` from a variety of message formats.699700 The message format can be one of the following:701702 - `BaseMessagePromptTemplate`703 - `BaseMessage`704 - 2-tuple of (role string, template); e.g., (`'human'`, `'{user_input}'`)705 - dict: a message dict with role and content keys706 - dict: the `Serializable` constructor-envelope wire shape707 `{"lc": 1, "type": "constructor", "id": [..., "<ClassName>"],708 "kwargs": {...}}` — unpacked structurally and routed through the709 standard dict-with-type dispatch.710 - string: shorthand for (`'human'`, template); e.g., `'{user_input}'`711712 Args:713 message: a representation of a message in one of the supported formats.714715 Returns:716 An instance of a message or a message template.717718 Raises:719 NotImplementedError: if the message type is not supported.720 ValueError: if the message dict does not contain the required keys.721722 """723 if isinstance(message, BaseMessage):724 message_ = message725 elif isinstance(message, Sequence):726 if isinstance(message, str):727 message_ = _create_message_from_message_type("human", message)728 else:729 try:730 message_type_str, template = message731 except ValueError as e:732 msg = "Message as a sequence must be (role string, template)"733 raise NotImplementedError(msg) from e734 message_ = _create_message_from_message_type(message_type_str, template)735 elif isinstance(message, dict):736 # `Serializable` constructor-envelope wire shape. Detect structurally, map737 # the class name to a known message-type string via a hardcoded738 # allowlist, and recurse with the canonical739 # `{"type": ..., **kwargs}` shape — no `load()`, no dynamic740 # class instantiation.741 if (742 message.get("lc") == 1743 and message.get("type") == "constructor"744 and isinstance(message.get("id"), list)745 and message["id"]746 and isinstance(message.get("kwargs"), dict)747 ):748 mapped = _LC_CONSTRUCTOR_NAME_TO_TYPE.get(message["id"][-1])749 if mapped is not None:750 return _convert_to_message({"type": mapped, **message["kwargs"]})751752 msg_kwargs = message.copy()753 try:754 try:755 msg_type = msg_kwargs.pop("role")756 except KeyError:757 msg_type = msg_kwargs.pop("type")758 # None msg content is not allowed759 msg_content = msg_kwargs.pop("content") or ""760 except KeyError as e:761 msg = f"Message dict must contain 'role' and 'content' keys, got {message}"762 msg = create_message(763 message=msg, error_code=ErrorCode.MESSAGE_COERCION_FAILURE764 )765 raise ValueError(msg) from e766 message_ = _create_message_from_message_type(767 msg_type, msg_content, **msg_kwargs768 )769 else:770 msg = f"Unsupported message type: {type(message)}"771 msg = create_message(message=msg, error_code=ErrorCode.MESSAGE_COERCION_FAILURE)772 raise NotImplementedError(msg)773774 return message_775776777def convert_to_messages(778 messages: Iterable[MessageLikeRepresentation] | PromptValue,779) -> list[BaseMessage]:780 """Convert a sequence of messages to a list of messages.781782 Args:783 messages: Sequence of messages to convert.784785 Returns:786 list of messages (BaseMessages).787788 """789 # Import here to avoid circular imports790 from langchain_core.prompt_values import PromptValue # noqa: PLC0415791792 if isinstance(messages, PromptValue):793 return messages.to_messages()794 return [_convert_to_message(m) for m in messages]795796797_P = ParamSpec("_P")798_R_co = TypeVar("_R_co", covariant=True)799800801class _RunnableSupportCallable(Protocol[_P, _R_co]):802 @overload803 def __call__(804 self,805 messages: None = None,806 *args: _P.args,807 **kwargs: _P.kwargs,808 ) -> Runnable[Sequence[MessageLikeRepresentation], _R_co]: ...809810 @overload811 def __call__(812 self,813 messages: Sequence[MessageLikeRepresentation] | PromptValue,814 *args: _P.args,815 **kwargs: _P.kwargs,816 ) -> _R_co: ...817818 def __call__(819 self,820 messages: Sequence[MessageLikeRepresentation] | PromptValue | None = None,821 *args: _P.args,822 **kwargs: _P.kwargs,823 ) -> _R_co | Runnable[Sequence[MessageLikeRepresentation], _R_co]: ...824825826def _runnable_support(827 func: Callable[828 Concatenate[Sequence[MessageLikeRepresentation] | PromptValue, _P], _R_co829 ],830) -> _RunnableSupportCallable[_P, _R_co]:831 @wraps(func)832 def wrapped(833 messages: Sequence[MessageLikeRepresentation] | PromptValue | None = None,834 *args: _P.args,835 **kwargs: _P.kwargs,836 ) -> _R_co | Runnable[Sequence[MessageLikeRepresentation], _R_co]:837 # Import locally to prevent circular import.838 from langchain_core.runnables.base import RunnableLambda # noqa: PLC0415839840 if messages is not None:841 return func(messages, *args, **kwargs)842 return RunnableLambda(partial(func, **kwargs), name=func.__name__)843844 return cast("_RunnableSupportCallable[_P, _R_co]", wrapped)845846847@_runnable_support848def filter_messages(849 messages: Iterable[MessageLikeRepresentation] | PromptValue,850 *,851 include_names: Sequence[str] | None = None,852 exclude_names: Sequence[str] | None = None,853 include_types: Sequence[str | type[BaseMessage]] | None = None,854 exclude_types: Sequence[str | type[BaseMessage]] | None = None,855 include_ids: Sequence[str] | None = None,856 exclude_ids: Sequence[str] | None = None,857 exclude_tool_calls: Sequence[str] | bool | None = None,858) -> list[BaseMessage]:859 """Filter messages based on `name`, `type` or `id`.860861 Args:862 messages: Sequence Message-like objects to filter.863 include_names: Message names to include.864 exclude_names: Messages names to exclude.865 include_types: Message types to include. Can be specified as string names866 (e.g. `'system'`, `'human'`, `'ai'`, ...) or as `BaseMessage`867 classes (e.g. `SystemMessage`, `HumanMessage`, `AIMessage`, ...).868869 exclude_types: Message types to exclude. Can be specified as string names870 (e.g. `'system'`, `'human'`, `'ai'`, ...) or as `BaseMessage`871 classes (e.g. `SystemMessage`, `HumanMessage`, `AIMessage`, ...).872873 include_ids: Message IDs to include.874 exclude_ids: Message IDs to exclude.875 exclude_tool_calls: Tool call IDs to exclude.876 Can be one of the following:877 - `True`: All `AIMessage` objects with tool calls and all `ToolMessage`878 objects will be excluded.879 - a sequence of tool call IDs to exclude:880 - `ToolMessage` objects with the corresponding tool call ID will be881 excluded.882 - The `tool_calls` in the AIMessage will be updated to exclude883 matching tool calls. If all `tool_calls` are filtered from an884 AIMessage, the whole message is excluded.885886 Returns:887 A list of Messages that meets at least one of the `incl_*` conditions and none888 of the `excl_*` conditions. If not `incl_*` conditions are specified then889 anything that is not explicitly excluded will be included.890891 Raises:892 ValueError: If two incompatible arguments are provided.893894 Example:895 ```python896 from langchain_core.messages import (897 filter_messages,898 AIMessage,899 HumanMessage,900 SystemMessage,901 )902903 messages = [904 SystemMessage("you're a good assistant."),905 HumanMessage("what's your name", id="foo", name="example_user"),906 AIMessage("steve-o", id="bar", name="example_assistant"),907 HumanMessage(908 "what's your favorite color",909 id="baz",910 ),911 AIMessage(912 "silicon blue",913 id="blah",914 ),915 ]916917 filter_messages(918 messages,919 include_names=("example_user", "example_assistant"),920 include_types=("system",),921 exclude_ids=("bar",),922 )923 ```924925 ```python926 [927 SystemMessage("you're a good assistant."),928 HumanMessage("what's your name", id="foo", name="example_user"),929 ]930 ```931 """932 messages = convert_to_messages(messages)933 filtered: list[BaseMessage] = []934 for msg in messages:935 if (936 (exclude_names and msg.name in exclude_names)937 or (exclude_types and _is_message_type(msg, exclude_types))938 or (exclude_ids and msg.id in exclude_ids)939 ):940 continue941942 if exclude_tool_calls is True and (943 (isinstance(msg, AIMessage) and msg.tool_calls)944 or isinstance(msg, ToolMessage)945 ):946 continue947948 new_msg = msg949 if isinstance(exclude_tool_calls, (list, tuple, set)):950 if isinstance(msg, AIMessage) and msg.tool_calls:951 tool_calls = [952 tool_call953 for tool_call in msg.tool_calls954 if tool_call["id"] not in exclude_tool_calls955 ]956 if not tool_calls:957 continue958959 content = msg.content960 # handle Anthropic content blocks961 if isinstance(msg.content, list):962 content = [963 content_block964 for content_block in msg.content965 if (966 not isinstance(content_block, dict)967 or content_block.get("type") != "tool_use"968 or content_block.get("id") not in exclude_tool_calls969 )970 ]971972 new_msg = msg.model_copy(973 update={"tool_calls": tool_calls, "content": content}974 )975 elif (976 isinstance(msg, ToolMessage) and msg.tool_call_id in exclude_tool_calls977 ):978 continue979980 # default to inclusion when no inclusion criteria given.981 if (982 not (include_types or include_ids or include_names)983 or (include_names and new_msg.name in include_names)984 or (include_types and _is_message_type(new_msg, include_types))985 or (include_ids and new_msg.id in include_ids)986 ):987 filtered.append(new_msg)988989 return filtered990991992@_runnable_support993def merge_message_runs(994 messages: Iterable[MessageLikeRepresentation] | PromptValue,995 *,996 chunk_separator: str = "\n",997) -> list[BaseMessage]:998 r"""Merge consecutive Messages of the same type.9991000 !!! note1001 `ToolMessage` objects are not merged, as each has a distinct tool call id that1002 can't be merged.10031004 Args:1005 messages: Sequence Message-like objects to merge.1006 chunk_separator: Specify the string to be inserted between message chunks.10071008 Returns:1009 list of BaseMessages with consecutive runs of message types merged into single1010 messages. By default, if two messages being merged both have string contents,1011 the merged content is a concatenation of the two strings with a new-line1012 separator.1013 The separator inserted between message chunks can be controlled by specifying1014 any string with `chunk_separator`. If at least one of the messages has a list1015 of content blocks, the merged content is a list of content blocks.10161017 Example:1018 ```python1019 from langchain_core.messages import (1020 merge_message_runs,1021 AIMessage,1022 HumanMessage,1023 SystemMessage,1024 ToolCall,1025 )10261027 messages = [1028 SystemMessage("you're a good assistant."),1029 HumanMessage(1030 "what's your favorite color",1031 id="foo",1032 ),1033 HumanMessage(1034 "wait your favorite food",1035 id="bar",1036 ),1037 AIMessage(1038 "my favorite colo",1039 tool_calls=[1040 ToolCall(1041 name="blah_tool", args={"x": 2}, id="123", type="tool_call"1042 )1043 ],1044 id="baz",1045 ),1046 AIMessage(1047 [{"type": "text", "text": "my favorite dish is lasagna"}],1048 tool_calls=[1049 ToolCall(1050 name="blah_tool",1051 args={"x": -10},1052 id="456",1053 type="tool_call",1054 )1055 ],1056 id="blur",1057 ),1058 ]10591060 merge_message_runs(messages)1061 ```10621063 ```python1064 [1065 SystemMessage("you're a good assistant."),1066 HumanMessage(1067 "what's your favorite color\\n"1068 "wait your favorite food", id="foo",1069 ),1070 AIMessage(1071 [1072 "my favorite colo",1073 {"type": "text", "text": "my favorite dish is lasagna"}1074 ],1075 tool_calls=[1076 ToolCall({1077 "name": "blah_tool",1078 "args": {"x": 2},1079 "id": "123",1080 "type": "tool_call"1081 }),1082 ToolCall({1083 "name": "blah_tool",1084 "args": {"x": -10},1085 "id": "456",1086 "type": "tool_call"1087 })1088 ]1089 id="baz"1090 ),1091 ]10921093 ```1094 """1095 if not messages:1096 return []1097 messages = convert_to_messages(messages)1098 merged: list[BaseMessage] = []1099 for msg in messages:1100 last = merged.pop() if merged else None1101 if not last:1102 merged.append(msg)1103 elif isinstance(msg, ToolMessage) or not isinstance(msg, last.__class__):1104 merged.extend([last, msg])1105 else:1106 last_chunk = _msg_to_chunk(last)1107 curr_chunk = _msg_to_chunk(msg)1108 if curr_chunk.response_metadata:1109 curr_chunk.response_metadata.clear()1110 if (1111 isinstance(last_chunk.content, str)1112 and isinstance(curr_chunk.content, str)1113 and last_chunk.content1114 and curr_chunk.content1115 ):1116 last_chunk.content += chunk_separator1117 merged.append(_chunk_to_msg(last_chunk + curr_chunk))1118 return merged111911201121# TODO: Update so validation errors (for token_counter, for example) are raised on1122# init not at runtime.1123@_runnable_support1124def trim_messages(1125 messages: Iterable[MessageLikeRepresentation] | PromptValue,1126 *,1127 max_tokens: int,1128 token_counter: Callable[[list[BaseMessage]], int]1129 | Callable[[BaseMessage], int]1130 | BaseLanguageModel1131 | Literal["approximate"],1132 strategy: Literal["first", "last"] = "last",1133 allow_partial: bool = False,1134 end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,1135 start_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,1136 include_system: bool = False,1137 text_splitter: Callable[[str], list[str]] | TextSplitter | None = None,1138) -> list[BaseMessage]:1139 r"""Trim messages to be below a token count.11401141 `trim_messages` can be used to reduce the size of a chat history to a specified1142 token or message count.11431144 In either case, if passing the trimmed chat history back into a chat model1145 directly, the resulting chat history should usually satisfy the following1146 properties:11471148 1. The resulting chat history should be valid. Most chat models expect that chat1149 history starts with either (1) a `HumanMessage` or (2) a `SystemMessage`1150 followed by a `HumanMessage`. To achieve this, set `start_on='human'`.1151 In addition, generally a `ToolMessage` can only appear after an `AIMessage`1152 that involved a tool call.1153 2. It includes recent messages and drops old messages in the chat history.1154 To achieve this set the `strategy='last'`.1155 3. Usually, the new chat history should include the `SystemMessage` if it1156 was present in the original chat history since the `SystemMessage` includes1157 special instructions to the chat model. The `SystemMessage` is almost always1158 the first message in the history if present. To achieve this set the1159 `include_system=True`.11601161 !!! note1162 The examples below show how to configure `trim_messages` to achieve a behavior1163 consistent with the above properties.11641165 Args:1166 messages: Sequence of Message-like objects to trim.1167 max_tokens: Max token count of trimmed messages.1168 token_counter: Function or llm for counting tokens in a `BaseMessage` or a1169 list of `BaseMessage`.11701171 If a `BaseLanguageModel` is passed in then1172 `BaseLanguageModel.get_num_tokens_from_messages()` will be used. Set to1173 `len` to count the number of **messages** in the chat history.11741175 You can also use string shortcuts for convenience:11761177 - `'approximate'`: Uses `count_tokens_approximately` for fast, approximate1178 token counts.11791180 !!! note11811182 `count_tokens_approximately` (or the shortcut `'approximate'`) is1183 recommended for using `trim_messages` on the hot path, where exact token1184 counting is not necessary.11851186 strategy: Strategy for trimming.11871188 - `'first'`: Keep the first `<= n_count` tokens of the messages.1189 - `'last'`: Keep the last `<= n_count` tokens of the messages.1190 allow_partial: Whether to split a message if only part of the message can be1191 included.11921193 If `strategy='last'` then the last partial contents of a message are1194 included. If `strategy='first'` then the first partial contents of a1195 message are included.1196 end_on: The message type to end on.11971198 If specified then every message after the last occurrence of this type is1199 ignored. If `strategy='last'` then this is done before we attempt to get the1200 last `max_tokens`. If `strategy='first'` then this is done after we get the1201 first `max_tokens`. Can be specified as string names (e.g. `'system'`,1202 `'human'`, `'ai'`, ...) or as `BaseMessage` classes (e.g. `SystemMessage`,1203 `HumanMessage`, `AIMessage`, ...). Can be a single type or a list of types.12041205 start_on: The message type to start on.12061207 Should only be specified if `strategy='last'`. If specified then every1208 message before the first occurrence of this type is ignored. This is done1209 after we trim the initial messages to the last `max_tokens`. Does not apply1210 to a `SystemMessage` at index 0 if `include_system=True`. Can be specified1211 as string names (e.g. `'system'`, `'human'`, `'ai'`, ...) or as1212 `BaseMessage` classes (e.g. `SystemMessage`, `HumanMessage`, `AIMessage`,1213 ...). Can be a single type or a list of types.12141215 include_system: Whether to keep the `SystemMessage` if there is one at index1216 `0`.12171218 Should only be specified if `strategy="last"`.1219 text_splitter: Function or `langchain_text_splitters.TextSplitter` for1220 splitting the string contents of a message.12211222 Only used if `allow_partial=True`. If `strategy='last'` then the last split1223 tokens from a partial message will be included. if `strategy='first'` then1224 the first split tokens from a partial message will be included. Token1225 splitter assumes that separators are kept, so that split contents can be1226 directly concatenated to recreate the original text. Defaults to splitting1227 on newlines.12281229 Returns:1230 List of trimmed `BaseMessage`.12311232 Raises:1233 ValueError: if two incompatible arguments are specified or an unrecognized1234 `strategy` is specified.12351236 Example:1237 Trim chat history based on token count, keeping the `SystemMessage` if1238 present, and ensuring that the chat history starts with a `HumanMessage` (or a1239 `SystemMessage` followed by a `HumanMessage`).12401241 ```python1242 from langchain_core.messages import (1243 AIMessage,1244 HumanMessage,1245 BaseMessage,1246 SystemMessage,1247 trim_messages,1248 )12491250 messages = [1251 SystemMessage("you're a good assistant, you always respond with a joke."),1252 HumanMessage("i wonder why it's called langchain"),1253 AIMessage(1254 'Well, I guess they thought "WordRope" and "SentenceString" just '1255 "didn't have the same ring to it!"1256 ),1257 HumanMessage("and who is harrison chasing anyways"),1258 AIMessage(1259 "Hmmm let me think.\n\nWhy, he's probably chasing after the last "1260 "cup of coffee in the office!"1261 ),1262 HumanMessage("what do you call a speechless parrot"),1263 ]126412651266 trim_messages(1267 messages,1268 max_tokens=45,1269 strategy="last",1270 token_counter=ChatOpenAI(model="openai:gpt-5.5"),1271 # Most chat models expect that chat history starts with either:1272 # (1) a HumanMessage or1273 # (2) a SystemMessage followed by a HumanMessage1274 start_on="human",1275 # Usually, we want to keep the SystemMessage1276 # if it's present in the original history.1277 # The SystemMessage has special instructions for the model.1278 include_system=True,1279 allow_partial=False,1280 )1281 ```12821283 ```python1284 [1285 SystemMessage(1286 content="you're a good assistant, you always respond with a joke."1287 ),1288 HumanMessage(content="what do you call a speechless parrot"),1289 ]1290 ```12911292 Trim chat history using approximate token counting with `'approximate'`:12931294 ```python1295 trim_messages(1296 messages,1297 max_tokens=45,1298 strategy="last",1299 # Using the "approximate" shortcut for fast token counting1300 token_counter="approximate",1301 start_on="human",1302 include_system=True,1303 )13041305 # This is equivalent to using `count_tokens_approximately` directly1306 from langchain_core.messages.utils import count_tokens_approximately13071308 trim_messages(1309 messages,1310 max_tokens=45,1311 strategy="last",1312 token_counter=count_tokens_approximately,1313 start_on="human",1314 include_system=True,1315 )1316 ```13171318 Trim chat history based on the message count, keeping the `SystemMessage` if1319 present, and ensuring that the chat history starts with a HumanMessage (1320 or a `SystemMessage` followed by a `HumanMessage`).13211322 trim_messages(1323 messages,1324 # When `len` is passed in as the token counter function,1325 # max_tokens will count the number of messages in the chat history.1326 max_tokens=4,1327 strategy="last",1328 # Passing in `len` as a token counter function will1329 # count the number of messages in the chat history.1330 token_counter=len,1331 # Most chat models expect that chat history starts with either:1332 # (1) a HumanMessage or1333 # (2) a SystemMessage followed by a HumanMessage1334 start_on="human",1335 # Usually, we want to keep the SystemMessage1336 # if it's present in the original history.1337 # The SystemMessage has special instructions for the model.1338 include_system=True,1339 allow_partial=False,1340 )13411342 ```python1343 [1344 SystemMessage(1345 content="you're a good assistant, you always respond with a joke."1346 ),1347 HumanMessage(content="and who is harrison chasing anyways"),1348 AIMessage(1349 content="Hmmm let me think.\n\nWhy, he's probably chasing after "1350 "the last cup of coffee in the office!"1351 ),1352 HumanMessage(content="what do you call a speechless parrot"),1353 ]1354 ```1355 Trim chat history using a custom token counter function that counts the1356 number of tokens in each message.13571358 ```python1359 messages = [1360 SystemMessage("This is a 4 token text. The full message is 10 tokens."),1361 HumanMessage(1362 "This is a 4 token text. The full message is 10 tokens.", id="first"1363 ),1364 AIMessage(1365 [1366 {"type": "text", "text": "This is the FIRST 4 token block."},1367 {"type": "text", "text": "This is the SECOND 4 token block."},1368 ],1369 id="second",1370 ),1371 HumanMessage(1372 "This is a 4 token text. The full message is 10 tokens.", id="third"1373 ),1374 AIMessage(1375 "This is a 4 token text. The full message is 10 tokens.",1376 id="fourth",1377 ),1378 ]137913801381 def dummy_token_counter(messages: list[BaseMessage]) -> int:1382 # treat each message like it adds 3 default tokens at the beginning1383 # of the message and at the end of the message. 3 + 4 + 3 = 10 tokens1384 # per message.13851386 default_content_len = 41387 default_msg_prefix_len = 31388 default_msg_suffix_len = 313891390 count = 01391 for msg in messages:1392 if isinstance(msg.content, str):1393 count += (1394 default_msg_prefix_len1395 + default_content_len1396 + default_msg_suffix_len1397 )1398 if isinstance(msg.content, list):1399 count += (1400 default_msg_prefix_len1401 + len(msg.content) * default_content_len1402 + default_msg_suffix_len1403 )1404 return count1405 ```14061407 First 30 tokens, allowing partial messages:1408 ```python1409 trim_messages(1410 messages,1411 max_tokens=30,1412 token_counter=dummy_token_counter,1413 strategy="first",1414 allow_partial=True,1415 )1416 ```14171418 ```python1419 [1420 SystemMessage("This is a 4 token text. The full message is 10 tokens."),1421 HumanMessage(1422 "This is a 4 token text. The full message is 10 tokens.",1423 id="first",1424 ),1425 AIMessage(1426 [{"type": "text", "text": "This is the FIRST 4 token block."}],1427 id="second",1428 ),1429 ]1430 ```1431 """1432 # Validate arguments1433 if start_on and strategy == "first":1434 msg = "start_on parameter is only valid with strategy='last'"1435 raise ValueError(msg)1436 if include_system and strategy == "first":1437 msg = "include_system parameter is only valid with strategy='last'"1438 raise ValueError(msg)14391440 messages = convert_to_messages(messages)14411442 # Handle string shortcuts for token counter1443 if isinstance(token_counter, str):1444 if token_counter in _TOKEN_COUNTER_SHORTCUTS:1445 actual_token_counter = _TOKEN_COUNTER_SHORTCUTS[token_counter]1446 else:1447 available_shortcuts = ", ".join(1448 f"'{key}'" for key in _TOKEN_COUNTER_SHORTCUTS1449 )1450 msg = (1451 f"Invalid token_counter shortcut '{token_counter}'. "1452 f"Available shortcuts: {available_shortcuts}."1453 )1454 raise ValueError(msg)1455 else:1456 # Type narrowing: at this point token_counter is not a str1457 actual_token_counter = token_counter # type: ignore[assignment]14581459 if hasattr(actual_token_counter, "get_num_tokens_from_messages"):1460 list_token_counter = actual_token_counter.get_num_tokens_from_messages1461 elif callable(actual_token_counter):1462 if (1463 next(1464 iter(inspect.signature(actual_token_counter).parameters.values())1465 ).annotation1466 is BaseMessage1467 ):14681469 def list_token_counter(messages: Sequence[BaseMessage]) -> int:1470 return sum(actual_token_counter(msg) for msg in messages) # type: ignore[arg-type, misc]14711472 else:1473 list_token_counter = actual_token_counter1474 else:1475 msg = (1476 f"'token_counter' expected to be a model that implements "1477 f"'get_num_tokens_from_messages()' or a function. Received object of type "1478 f"{type(actual_token_counter)}."1479 )1480 raise ValueError(msg)14811482 if _HAS_LANGCHAIN_TEXT_SPLITTERS and isinstance(text_splitter, TextSplitter):1483 text_splitter_fn = text_splitter.split_text1484 elif text_splitter:1485 text_splitter_fn = cast("Callable", text_splitter)1486 else:1487 text_splitter_fn = _default_text_splitter14881489 if strategy == "first":1490 return _first_max_tokens(1491 messages,1492 max_tokens=max_tokens,1493 token_counter=list_token_counter,1494 text_splitter=text_splitter_fn,1495 partial_strategy="first" if allow_partial else None,1496 end_on=end_on,1497 )1498 if strategy == "last":1499 return _last_max_tokens(1500 messages,1501 max_tokens=max_tokens,1502 token_counter=list_token_counter,1503 allow_partial=allow_partial,1504 include_system=include_system,1505 start_on=start_on,1506 end_on=end_on,1507 text_splitter=text_splitter_fn,1508 )1509 msg = f"Unrecognized {strategy=}. Supported strategies are 'last' and 'first'."1510 raise ValueError(msg)151115121513_SingleMessage = BaseMessage | str | dict[str, Any]1514_T = TypeVar("_T", bound=_SingleMessage)1515# A sequence of _SingleMessage that is NOT a bare str1516_MultipleMessages = Sequence[_T]151715181519@overload1520def convert_to_openai_messages(1521 messages: _SingleMessage,1522 *,1523 text_format: Literal["string", "block"] = "string",1524 include_id: bool = False,1525 pass_through_unknown_blocks: bool = True,1526) -> dict: ...152715281529@overload1530def convert_to_openai_messages(1531 messages: _MultipleMessages,1532 *,1533 text_format: Literal["string", "block"] = "string",1534 include_id: bool = False,1535 pass_through_unknown_blocks: bool = True,1536) -> list[dict]: ...153715381539def convert_to_openai_messages(1540 messages: MessageLikeRepresentation | Sequence[MessageLikeRepresentation],1541 *,1542 text_format: Literal["string", "block"] = "string",1543 include_id: bool = False,1544 pass_through_unknown_blocks: bool = True,1545) -> dict | list[dict]:1546 """Convert LangChain messages into OpenAI message dicts.15471548 Args:1549 messages: Message-like object or iterable of objects whose contents are1550 in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.1551 text_format: How to format string or text block contents:1552 - `'string'`:1553 If a message has a string content, this is left as a string. If1554 a message has content blocks that are all of type `'text'`, these1555 are joined with a newline to make a single string. If a message has1556 content blocks and at least one isn't of type `'text'`, then1557 all blocks are left as dicts.1558 - `'block'`:1559 If a message has a string content, this is turned into a list1560 with a single content block of type `'text'`. If a message has1561 content blocks these are left as is.1562 include_id: Whether to include message IDs in the openai messages, if they1563 are present in the source messages.1564 pass_through_unknown_blocks: Whether to include content blocks with unknown1565 formats in the output. If `False`, an error is raised if an unknown1566 content block is encountered.15671568 Raises:1569 ValueError: if an unrecognized `text_format` is specified, or if a message1570 content block is missing expected keys.15711572 Returns:1573 The return type depends on the input type:15741575 - dict:1576 If a single message-like object is passed in, a single OpenAI message1577 dict is returned.1578 - list[dict]:1579 If a sequence of message-like objects are passed in, a list of OpenAI1580 message dicts is returned.15811582 Example:1583 ```python1584 from langchain_core.messages import (1585 convert_to_openai_messages,1586 AIMessage,1587 SystemMessage,1588 ToolMessage,1589 )15901591 messages = [1592 SystemMessage([{"type": "text", "text": "foo"}]),1593 {1594 "role": "user",1595 "content": [1596 {"type": "text", "text": "what's in this"},1597 {1598 "type": "image_url",1599 "image_url": {"url": "data:image/png;base64,'/9j/4AAQSk'"},1600 },1601 ],1602 },1603 AIMessage(1604 "",1605 tool_calls=[1606 {1607 "name": "analyze",1608 "args": {"baz": "buz"},1609 "id": "1",1610 "type": "tool_call",1611 }1612 ],1613 ),1614 ToolMessage("foobar", tool_call_id="1", name="bar"),1615 {"role": "assistant", "content": "that's nice"},1616 ]1617 oai_messages = convert_to_openai_messages(messages)1618 # -> [1619 # {'role': 'system', 'content': 'foo'},1620 # {'role': 'user', 'content': [{'type': 'text', 'text': 'what's in this'}, {'type': 'image_url', 'image_url': {'url': "data:image/png;base64,'/9j/4AAQSk'"}}]},1621 # {'role': 'assistant', 'tool_calls': [{'type': 'function', 'id': '1','function': {'name': 'analyze', 'arguments': '{"baz": "buz"}'}}], 'content': ''},1622 # {'role': 'tool', 'name': 'bar', 'content': 'foobar'},1623 # {'role': 'assistant', 'content': 'that's nice'}1624 # ]1625 ```16261627 !!! version-added "Added in `langchain-core` 0.3.11"16281629 """ # noqa: E5011630 if text_format not in {"string", "block"}:1631 err = f"Unrecognized {text_format=}, expected one of 'string' or 'block'."1632 raise ValueError(err)16331634 oai_messages: list[dict] = []16351636 if is_single := isinstance(messages, (BaseMessage, dict, str)):1637 messages = [messages]16381639 messages = convert_to_messages(messages)16401641 for i, message in enumerate(messages):1642 oai_msg: dict = {"role": _get_message_openai_role(message)}1643 tool_messages: list = []1644 content: str | list[dict]16451646 if message.name:1647 oai_msg["name"] = message.name1648 if isinstance(message, AIMessage) and message.tool_calls:1649 oai_msg["tool_calls"] = _convert_to_openai_tool_calls(message.tool_calls)1650 if message.additional_kwargs.get("refusal"):1651 oai_msg["refusal"] = message.additional_kwargs["refusal"]1652 if isinstance(message, ToolMessage):1653 oai_msg["tool_call_id"] = message.tool_call_id1654 if include_id and message.id:1655 oai_msg["id"] = message.id16561657 if not message.content:1658 content = "" if text_format == "string" else []1659 elif isinstance(message.content, str):1660 if text_format == "string":1661 content = message.content1662 else:1663 content = [{"type": "text", "text": message.content}]1664 elif text_format == "string" and all(1665 isinstance(block, str) or block.get("type") == "text"1666 for block in message.content1667 ):1668 content = "\n".join(1669 block if isinstance(block, str) else block["text"]1670 for block in message.content1671 )1672 else:1673 content = []1674 for j, block in enumerate(message.content):1675 # OpenAI format1676 if isinstance(block, str):1677 content.append({"type": "text", "text": block})1678 elif block.get("type") == "text":1679 if missing := [k for k in ("text",) if k not in block]:1680 err = (1681 f"Unrecognized content block at "1682 f"messages[{i}].content[{j}] has 'type': 'text' "1683 f"but is missing expected key(s) "1684 f"{missing}. Full content block:\n\n{block}"1685 )1686 raise ValueError(err)1687 content.append({"type": block["type"], "text": block["text"]})1688 elif block.get("type") == "image_url":1689 if missing := [k for k in ("image_url",) if k not in block]:1690 err = (1691 f"Unrecognized content block at "1692 f"messages[{i}].content[{j}] has 'type': 'image_url' "1693 f"but is missing expected key(s) "1694 f"{missing}. Full content block:\n\n{block}"1695 )1696 raise ValueError(err)1697 content.append(1698 {1699 "type": "image_url",1700 "image_url": block["image_url"],1701 }1702 )1703 # Standard multi-modal content block1704 elif is_data_content_block(block):1705 formatted_block = convert_to_openai_data_block(block)1706 if (1707 formatted_block.get("type") == "file"1708 and "file" in formatted_block1709 and "filename" not in formatted_block["file"]1710 ):1711 logger.info("Generating a fallback filename.")1712 formatted_block["file"]["filename"] = "LC_AUTOGENERATED"1713 content.append(formatted_block)1714 # Anthropic and Bedrock converse format1715 elif (block.get("type") == "image") or "image" in block:1716 # Anthropic1717 if source := block.get("source"):1718 if missing := [1719 k for k in ("media_type", "type", "data") if k not in source1720 ]:1721 err = (1722 f"Unrecognized content block at "1723 f"messages[{i}].content[{j}] has 'type': 'image' "1724 f"but 'source' is missing expected key(s) "1725 f"{missing}. Full content block:\n\n{block}"1726 )1727 raise ValueError(err)1728 content.append(1729 {1730 "type": "image_url",1731 "image_url": {1732 "url": (1733 f"data:{source['media_type']};"1734 f"{source['type']},{source['data']}"1735 )1736 },1737 }1738 )1739 # Bedrock converse1740 elif image := block.get("image"):1741 if missing := [1742 k for k in ("source", "format") if k not in image1743 ]:1744 err = (1745 f"Unrecognized content block at "1746 f"messages[{i}].content[{j}] has key 'image', "1747 f"but 'image' is missing expected key(s) "1748 f"{missing}. Full content block:\n\n{block}"1749 )1750 raise ValueError(err)1751 b64_image = _bytes_to_b64_str(image["source"]["bytes"])1752 content.append(1753 {1754 "type": "image_url",1755 "image_url": {1756 "url": (1757 f"data:image/{image['format']};base64,{b64_image}"1758 )1759 },1760 }1761 )1762 else:1763 err = (1764 f"Unrecognized content block at "1765 f"messages[{i}].content[{j}] has 'type': 'image' "1766 f"but does not have a 'source' or 'image' key. Full "1767 f"content block:\n\n{block}"1768 )1769 raise ValueError(err)1770 # OpenAI file format1771 elif (1772 block.get("type") == "file"1773 and isinstance(block.get("file"), dict)1774 and isinstance(block.get("file", {}).get("file_data"), str)1775 ):1776 if block.get("file", {}).get("filename") is None:1777 logger.info("Generating a fallback filename.")1778 block["file"]["filename"] = "LC_AUTOGENERATED"1779 content.append(block)1780 # OpenAI audio format1781 elif (1782 block.get("type") == "input_audio"1783 and isinstance(block.get("input_audio"), dict)1784 and isinstance(block.get("input_audio", {}).get("data"), str)1785 and isinstance(block.get("input_audio", {}).get("format"), str)1786 ):1787 content.append(block)1788 elif block.get("type") == "tool_use":1789 if missing := [1790 k for k in ("id", "name", "input") if k not in block1791 ]:1792 err = (1793 f"Unrecognized content block at "1794 f"messages[{i}].content[{j}] has 'type': "1795 f"'tool_use', but is missing expected key(s) "1796 f"{missing}. Full content block:\n\n{block}"1797 )1798 raise ValueError(err)1799 if not any(1800 tool_call["id"] == block["id"]1801 for tool_call in cast("AIMessage", message).tool_calls1802 ):1803 oai_msg["tool_calls"] = oai_msg.get("tool_calls", [])1804 oai_msg["tool_calls"].append(1805 {1806 "type": "function",1807 "id": block["id"],1808 "function": {1809 "name": block["name"],1810 "arguments": json.dumps(1811 block["input"], ensure_ascii=False1812 ),1813 },1814 }1815 )1816 elif block.get("type") == "function_call": # OpenAI Responses1817 if not any(1818 tool_call["id"] == block.get("call_id")1819 for tool_call in cast("AIMessage", message).tool_calls1820 ):1821 if missing := [1822 k1823 for k in ("call_id", "name", "arguments")1824 if k not in block1825 ]:1826 err = (1827 f"Unrecognized content block at "1828 f"messages[{i}].content[{j}] has 'type': "1829 f"'tool_use', but is missing expected key(s) "1830 f"{missing}. Full content block:\n\n{block}"1831 )1832 raise ValueError(err)1833 oai_msg["tool_calls"] = oai_msg.get("tool_calls", [])1834 oai_msg["tool_calls"].append(1835 {1836 "type": "function",1837 "id": block.get("call_id"),1838 "function": {1839 "name": block.get("name"),1840 "arguments": block.get("arguments"),1841 },1842 }1843 )1844 if pass_through_unknown_blocks:1845 content.append(block)1846 elif block.get("type") == "tool_result":1847 if missing := [1848 k for k in ("content", "tool_use_id") if k not in block1849 ]:1850 msg = (1851 f"Unrecognized content block at "1852 f"messages[{i}].content[{j}] has 'type': "1853 f"'tool_result', but is missing expected key(s) "1854 f"{missing}. Full content block:\n\n{block}"1855 )1856 raise ValueError(msg)1857 tool_message = ToolMessage(1858 block["content"],1859 tool_call_id=block["tool_use_id"],1860 status="error" if block.get("is_error") else "success",1861 )1862 # Recurse to make sure tool message contents are OpenAI format.1863 tool_messages.extend(1864 convert_to_openai_messages(1865 [tool_message], text_format=text_format1866 )1867 )1868 elif (block.get("type") == "json") or "json" in block:1869 if "json" not in block:1870 msg = (1871 f"Unrecognized content block at "1872 f"messages[{i}].content[{j}] has 'type': 'json' "1873 f"but does not have a 'json' key. Full "1874 f"content block:\n\n{block}"1875 )1876 raise ValueError(msg)1877 content.append(1878 {1879 "type": "text",1880 "text": json.dumps(block["json"]),1881 }1882 )1883 elif (block.get("type") == "guard_content") or "guard_content" in block:1884 if (1885 "guard_content" not in block1886 or "text" not in block["guard_content"]1887 ):1888 msg = (1889 f"Unrecognized content block at "1890 f"messages[{i}].content[{j}] has 'type': "1891 f"'guard_content' but does not have a "1892 f"messages[{i}].content[{j}]['guard_content']['text'] "1893 f"key. Full content block:\n\n{block}"1894 )1895 raise ValueError(msg)1896 text = block["guard_content"]["text"]1897 if isinstance(text, dict):1898 text = text["text"]1899 content.append({"type": "text", "text": text})1900 # VertexAI format1901 elif block.get("type") == "media":1902 if missing := [k for k in ("mime_type", "data") if k not in block]:1903 err = (1904 f"Unrecognized content block at "1905 f"messages[{i}].content[{j}] has 'type': "1906 f"'media' but does not have key(s) {missing}. Full "1907 f"content block:\n\n{block}"1908 )1909 raise ValueError(err)1910 if "image" not in block["mime_type"]:1911 err = (1912 f"OpenAI messages can only support text and image data."1913 f" Received content block with media of type:"1914 f" {block['mime_type']}"1915 )1916 raise ValueError(err)1917 b64_image = _bytes_to_b64_str(block["data"])1918 content.append(1919 {1920 "type": "image_url",1921 "image_url": {1922 "url": (f"data:{block['mime_type']};base64,{b64_image}")1923 },1924 }1925 )1926 elif (1927 block.get("type") in {"thinking", "reasoning"}1928 or pass_through_unknown_blocks1929 ):1930 content.append(block)1931 else:1932 err = (1933 f"Unrecognized content block at "1934 f"messages[{i}].content[{j}] does not match OpenAI, "1935 f"Anthropic, Bedrock Converse, or VertexAI format. Full "1936 f"content block:\n\n{block}"1937 )1938 raise ValueError(err)1939 if text_format == "string" and not any(1940 block["type"] != "text" for block in content1941 ):1942 content = "\n".join(block["text"] for block in content)1943 oai_msg["content"] = content1944 if message.content and not oai_msg["content"] and tool_messages:1945 oai_messages.extend(tool_messages)1946 else:1947 oai_messages.extend([oai_msg, *tool_messages])19481949 if is_single:1950 return oai_messages[0]1951 return oai_messages195219531954def _first_max_tokens(1955 messages: Sequence[BaseMessage],1956 *,1957 max_tokens: int,1958 token_counter: Callable[[list[BaseMessage]], int],1959 text_splitter: Callable[[str], list[str]],1960 partial_strategy: Literal["first", "last"] | None = None,1961 end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,1962) -> list[BaseMessage]:1963 messages = list(messages)1964 if not messages:1965 return messages19661967 # Check if all messages already fit within token limit1968 if token_counter(messages) <= max_tokens:1969 # When all messages fit, only apply end_on filtering if needed1970 if end_on:1971 for _ in range(len(messages)):1972 if not _is_message_type(messages[-1], end_on):1973 messages.pop()1974 else:1975 break1976 return messages19771978 # Use binary search to find the maximum number of messages within token limit1979 left, right = 0, len(messages)1980 max_iterations = len(messages).bit_length()1981 for _ in range(max_iterations):1982 if left >= right:1983 break1984 mid = (left + right + 1) // 21985 if token_counter(messages[:mid]) <= max_tokens:1986 left = mid1987 idx = mid1988 else:1989 right = mid - 119901991 # idx now contains the maximum number of complete messages we can include1992 idx = left19931994 if partial_strategy and idx < len(messages):1995 included_partial = False1996 copied = False1997 if isinstance(messages[idx].content, list):1998 excluded = messages[idx].model_copy(deep=True)1999 copied = True2000 num_block = len(excluded.content)
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.