1import base642import json3import math4import re5from collections.abc import Callable, Sequence6from typing import Any, TypedDict78import pytest9from typing_extensions import NotRequired, override1011from langchain_core.language_models.fake_chat_models import FakeChatModel12from langchain_core.messages import (13 AIMessage,14 BaseMessage,15 ChatMessage,16 FunctionMessage,17 HumanMessage,18 SystemMessage,19 ToolCall,20 ToolMessage,21)22from langchain_core.messages.utils import (23 MessageLikeRepresentation,24 convert_to_messages,25 convert_to_openai_messages,26 count_tokens_approximately,27 filter_messages,28 get_buffer_string,29 merge_message_runs,30 trim_messages,31)32from langchain_core.tools import BaseTool, tool333435@pytest.mark.parametrize("msg_cls", [HumanMessage, AIMessage, SystemMessage])36def test_merge_message_runs_str(msg_cls: type[BaseMessage]) -> None:37 messages = [msg_cls("foo"), msg_cls("bar"), msg_cls("baz")]38 messages_model_copy = [m.model_copy(deep=True) for m in messages]39 expected = [msg_cls("foo\nbar\nbaz")]40 actual = merge_message_runs(messages)41 assert actual == expected42 assert messages == messages_model_copy434445@pytest.mark.parametrize("msg_cls", [HumanMessage, AIMessage, SystemMessage])46def test_merge_message_runs_str_with_specified_separator(47 msg_cls: type[BaseMessage],48) -> None:49 messages = [msg_cls("foo"), msg_cls("bar"), msg_cls("baz")]50 messages_model_copy = [m.model_copy(deep=True) for m in messages]51 expected = [msg_cls("foo<sep>bar<sep>baz")]52 actual = merge_message_runs(messages, chunk_separator="<sep>")53 assert actual == expected54 assert messages == messages_model_copy555657@pytest.mark.parametrize("msg_cls", [HumanMessage, AIMessage, SystemMessage])58def test_merge_message_runs_str_without_separator(59 msg_cls: type[BaseMessage],60) -> None:61 messages = [msg_cls("foo"), msg_cls("bar"), msg_cls("baz")]62 messages_model_copy = [m.model_copy(deep=True) for m in messages]63 expected = [msg_cls("foobarbaz")]64 actual = merge_message_runs(messages, chunk_separator="")65 assert actual == expected66 assert messages == messages_model_copy676869def test_merge_message_runs_response_metadata() -> None:70 messages = [71 AIMessage("foo", id="1", response_metadata={"input_tokens": 1}),72 AIMessage("bar", id="2", response_metadata={"input_tokens": 2}),73 ]74 expected = [75 AIMessage(76 "foo\nbar",77 id="1",78 response_metadata={"input_tokens": 1},79 )80 ]81 actual = merge_message_runs(messages)82 assert actual == expected83 # Check it's not mutated84 assert messages[1].response_metadata == {"input_tokens": 2}858687def test_merge_message_runs_content() -> None:88 messages = [89 AIMessage("foo", id="1"),90 AIMessage(91 [92 {"text": "bar", "type": "text"},93 {"image_url": "...", "type": "image_url"},94 ],95 tool_calls=[96 ToolCall(name="foo_tool", args={"x": 1}, id="tool1", type="tool_call")97 ],98 id="2",99 ),100 AIMessage(101 "baz",102 tool_calls=[103 ToolCall(name="foo_tool", args={"x": 5}, id="tool2", type="tool_call")104 ],105 id="3",106 ),107 ]108 messages_model_copy = [m.model_copy(deep=True) for m in messages]109 expected = [110 AIMessage(111 [112 "foo",113 {"text": "bar", "type": "text"},114 {"image_url": "...", "type": "image_url"},115 "baz",116 ],117 tool_calls=[118 ToolCall(name="foo_tool", args={"x": 1}, id="tool1", type="tool_call"),119 ToolCall(name="foo_tool", args={"x": 5}, id="tool2", type="tool_call"),120 ],121 id="1",122 ),123 ]124 actual = merge_message_runs(messages)125 assert actual == expected126 invoked = merge_message_runs().invoke(messages)127 assert actual == invoked128 assert messages == messages_model_copy129130131def test_merge_messages_tool_messages() -> None:132 messages = [133 ToolMessage("foo", tool_call_id="1"),134 ToolMessage("bar", tool_call_id="2"),135 ]136 messages_model_copy = [m.model_copy(deep=True) for m in messages]137 actual = merge_message_runs(messages)138 assert actual == messages139 assert messages == messages_model_copy140141142class FilterFields(TypedDict):143 include_names: NotRequired[Sequence[str]]144 exclude_names: NotRequired[Sequence[str]]145 include_types: NotRequired[Sequence[str | type[BaseMessage]]]146 exclude_types: NotRequired[Sequence[str | type[BaseMessage]]]147 include_ids: NotRequired[Sequence[str]]148 exclude_ids: NotRequired[Sequence[str]]149 exclude_tool_calls: NotRequired[Sequence[str] | bool]150151152@pytest.mark.parametrize(153 "filters",154 [155 {"include_names": ["blur"]},156 {"exclude_names": ["blah"]},157 {"include_ids": ["2"]},158 {"exclude_ids": ["1"]},159 {"include_types": "human"},160 {"include_types": ["human"]},161 {"include_types": HumanMessage},162 {"include_types": [HumanMessage]},163 {"exclude_types": "system"},164 {"exclude_types": ["system"]},165 {"exclude_types": SystemMessage},166 {"exclude_types": [SystemMessage]},167 {"include_names": ["blah", "blur"], "exclude_types": [SystemMessage]},168 ],169)170def test_filter_message(filters: FilterFields) -> None:171 messages = [172 SystemMessage("foo", name="blah", id="1"),173 HumanMessage("bar", name="blur", id="2"),174 ]175 messages_model_copy = [m.model_copy(deep=True) for m in messages]176 expected = messages[1:2]177 actual = filter_messages(messages, **filters)178 assert expected == actual179 invoked = filter_messages(**filters).invoke(messages)180 assert invoked == actual181 assert messages == messages_model_copy182183184def test_filter_message_exclude_tool_calls() -> None:185 tool_calls = [186 {"name": "foo", "id": "1", "args": {}, "type": "tool_call"},187 {"name": "bar", "id": "2", "args": {}, "type": "tool_call"},188 ]189 messages = [190 HumanMessage("foo", name="blah", id="1"),191 AIMessage("foo-response", name="blah", id="2"),192 HumanMessage("bar", name="blur", id="3"),193 AIMessage(194 "bar-response",195 tool_calls=tool_calls,196 id="4",197 ),198 ToolMessage("baz", tool_call_id="1", id="5"),199 ToolMessage("qux", tool_call_id="2", id="6"),200 ]201 messages_model_copy = [m.model_copy(deep=True) for m in messages]202 expected = messages[:3]203204 # test excluding all tool calls205 actual = filter_messages(messages, exclude_tool_calls=True)206 assert expected == actual207208 # test explicitly excluding all tool calls209 actual = filter_messages(messages, exclude_tool_calls=["1", "2"])210 assert expected == actual211212 # test excluding a specific tool call213 expected = messages[:5]214 expected[3] = expected[3].model_copy(update={"tool_calls": [tool_calls[0]]})215 actual = filter_messages(messages, exclude_tool_calls=["2"])216 assert expected == actual217218 # assert that we didn't mutate the original messages219 assert messages == messages_model_copy220221222def test_filter_message_exclude_tool_calls_content_blocks() -> None:223 tool_calls = [224 {"name": "foo", "id": "1", "args": {}, "type": "tool_call"},225 {"name": "bar", "id": "2", "args": {}, "type": "tool_call"},226 ]227 messages = [228 HumanMessage("foo", name="blah", id="1"),229 AIMessage("foo-response", name="blah", id="2"),230 HumanMessage("bar", name="blur", id="3"),231 AIMessage(232 [233 {"text": "bar-response", "type": "text"},234 {"name": "foo", "type": "tool_use", "id": "1"},235 {"name": "bar", "type": "tool_use", "id": "2"},236 ],237 tool_calls=tool_calls,238 id="4",239 ),240 ToolMessage("baz", tool_call_id="1", id="5"),241 ToolMessage("qux", tool_call_id="2", id="6"),242 ]243 messages_model_copy = [m.model_copy(deep=True) for m in messages]244 expected = messages[:3]245246 # test excluding all tool calls247 actual = filter_messages(messages, exclude_tool_calls=True)248 assert expected == actual249250 # test explicitly excluding all tool calls251 actual = filter_messages(messages, exclude_tool_calls=["1", "2"])252 assert expected == actual253254 # test excluding a specific tool call255 expected = messages[:4] + messages[-1:]256 expected[3] = expected[3].model_copy(257 update={258 "tool_calls": [tool_calls[1]],259 "content": [260 {"text": "bar-response", "type": "text"},261 {"name": "bar", "type": "tool_use", "id": "2"},262 ],263 }264 )265 actual = filter_messages(messages, exclude_tool_calls=["1"])266 assert expected == actual267268 # assert that we didn't mutate the original messages269 assert messages == messages_model_copy270271272_MESSAGES_TO_TRIM = [273 SystemMessage("This is a 4 token text."),274 HumanMessage("This is a 4 token text.", id="first"),275 AIMessage(276 [277 {"type": "text", "text": "This is the FIRST 4 token block."},278 {"type": "text", "text": "This is the SECOND 4 token block."},279 ],280 id="second",281 ),282 HumanMessage("This is a 4 token text.", id="third"),283 AIMessage("This is a 4 token text.", id="fourth"),284]285_MESSAGES_TO_TRIM_COPY = [m.model_copy(deep=True) for m in _MESSAGES_TO_TRIM]286287288def test_trim_messages_first_30() -> None:289 expected = [290 SystemMessage("This is a 4 token text."),291 HumanMessage("This is a 4 token text.", id="first"),292 ]293 actual = trim_messages(294 _MESSAGES_TO_TRIM,295 max_tokens=30,296 token_counter=dummy_token_counter,297 strategy="first",298 )299 assert actual == expected300 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY301302303def test_trim_messages_first_30_allow_partial() -> None:304 expected = [305 SystemMessage("This is a 4 token text."),306 HumanMessage("This is a 4 token text.", id="first"),307 AIMessage(308 [{"type": "text", "text": "This is the FIRST 4 token block."}], id="second"309 ),310 ]311 actual = trim_messages(312 _MESSAGES_TO_TRIM,313 max_tokens=30,314 token_counter=dummy_token_counter,315 strategy="first",316 allow_partial=True,317 )318 assert actual == expected319 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY320321322def test_trim_messages_first_30_allow_partial_end_on_human() -> None:323 expected = [324 SystemMessage("This is a 4 token text."),325 HumanMessage("This is a 4 token text.", id="first"),326 ]327328 actual = trim_messages(329 _MESSAGES_TO_TRIM,330 max_tokens=30,331 token_counter=dummy_token_counter,332 strategy="first",333 allow_partial=True,334 end_on="human",335 )336 assert actual == expected337 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY338339340def test_trim_messages_last_30_include_system() -> None:341 expected = [342 SystemMessage("This is a 4 token text."),343 HumanMessage("This is a 4 token text.", id="third"),344 AIMessage("This is a 4 token text.", id="fourth"),345 ]346347 actual = trim_messages(348 _MESSAGES_TO_TRIM,349 max_tokens=30,350 include_system=True,351 token_counter=dummy_token_counter,352 strategy="last",353 )354 assert actual == expected355 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY356357358def test_trim_messages_last_40_include_system_allow_partial() -> None:359 expected = [360 SystemMessage("This is a 4 token text."),361 AIMessage(362 [363 {"type": "text", "text": "This is the SECOND 4 token block."},364 ],365 id="second",366 ),367 HumanMessage("This is a 4 token text.", id="third"),368 AIMessage("This is a 4 token text.", id="fourth"),369 ]370371 actual = trim_messages(372 _MESSAGES_TO_TRIM,373 max_tokens=40,374 token_counter=dummy_token_counter,375 strategy="last",376 allow_partial=True,377 include_system=True,378 )379380 assert actual == expected381 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY382383384def test_trim_messages_last_30_include_system_allow_partial_end_on_human() -> None:385 expected = [386 SystemMessage("This is a 4 token text."),387 AIMessage(388 [389 {"type": "text", "text": "This is the SECOND 4 token block."},390 ],391 id="second",392 ),393 HumanMessage("This is a 4 token text.", id="third"),394 ]395396 actual = trim_messages(397 _MESSAGES_TO_TRIM,398 max_tokens=30,399 token_counter=dummy_token_counter,400 strategy="last",401 allow_partial=True,402 include_system=True,403 end_on="human",404 )405406 assert actual == expected407 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY408409410def test_trim_messages_last_40_include_system_allow_partial_start_on_human() -> None:411 expected = [412 SystemMessage("This is a 4 token text."),413 HumanMessage("This is a 4 token text.", id="third"),414 AIMessage("This is a 4 token text.", id="fourth"),415 ]416417 actual = trim_messages(418 _MESSAGES_TO_TRIM,419 max_tokens=30,420 token_counter=dummy_token_counter,421 strategy="last",422 allow_partial=True,423 include_system=True,424 start_on="human",425 )426427 assert actual == expected428 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY429430431def test_trim_messages_allow_partial_one_message() -> None:432 expected = [433 HumanMessage("Th", id="third"),434 ]435436 actual = trim_messages(437 [HumanMessage("This is a funky text.", id="third")],438 max_tokens=2,439 token_counter=lambda messages: sum(len(m.content) for m in messages),440 text_splitter=list,441 strategy="first",442 allow_partial=True,443 )444445 assert actual == expected446 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY447448449def test_trim_messages_last_allow_partial_one_message() -> None:450 expected = [451 HumanMessage("t.", id="third"),452 ]453454 actual = trim_messages(455 [HumanMessage("This is a funky text.", id="third")],456 max_tokens=2,457 token_counter=lambda messages: sum(len(m.content) for m in messages),458 text_splitter=list,459 strategy="last",460 allow_partial=True,461 )462463 assert actual == expected464 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY465466467def test_trim_messages_allow_partial_text_splitter() -> None:468 expected = [469 HumanMessage("a 4 token text.", id="third"),470 AIMessage("This is a 4 token text.", id="fourth"),471 ]472473 def count_words(msgs: list[BaseMessage]) -> int:474 count = 0475 for msg in msgs:476 if isinstance(msg.content, str):477 count += len(msg.content.split(" "))478 else:479 count += len(480 " ".join(block["text"] for block in msg.content).split(" ") # type: ignore[index]481 )482 return count483484 def _split_on_space(text: str) -> list[str]:485 splits = text.split(" ")486 return [s + " " for s in splits[:-1]] + splits[-1:]487488 actual = trim_messages(489 _MESSAGES_TO_TRIM,490 max_tokens=10,491 token_counter=count_words,492 strategy="last",493 allow_partial=True,494 text_splitter=_split_on_space,495 )496 assert actual == expected497 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY498499500def test_trim_messages_include_system_strategy_last_empty_messages() -> None:501 expected: list[BaseMessage] = []502503 actual = trim_messages(504 max_tokens=10,505 token_counter=dummy_token_counter,506 strategy="last",507 include_system=True,508 ).invoke([])509510 assert actual == expected511512513def test_trim_messages_invoke() -> None:514 actual = trim_messages(max_tokens=10, token_counter=dummy_token_counter).invoke(515 _MESSAGES_TO_TRIM516 )517 expected = trim_messages(518 _MESSAGES_TO_TRIM, max_tokens=10, token_counter=dummy_token_counter519 )520 assert actual == expected521522523def test_trim_messages_bound_model_token_counter() -> None:524 trimmer = trim_messages(525 max_tokens=10,526 token_counter=FakeTokenCountingModel().bind(foo="bar"), # type: ignore[call-overload]527 )528 trimmer.invoke([HumanMessage("foobar")])529530531def test_trim_messages_bad_token_counter() -> None:532 trimmer = trim_messages(max_tokens=10, token_counter={}) # type: ignore[call-overload]533 with pytest.raises(534 ValueError,535 match=re.escape(536 "'token_counter' expected to be a model that implements "537 "'get_num_tokens_from_messages()' or a function. "538 "Received object of type <class 'dict'>."539 ),540 ):541 trimmer.invoke([HumanMessage("foobar")])542543544def dummy_token_counter(messages: list[BaseMessage]) -> int:545 # treat each message like it adds 3 default tokens at the beginning546 # of the message and at the end of the message. 3 + 4 + 3 = 10 tokens547 # per message.548549 default_content_len = 4550 default_msg_prefix_len = 3551 default_msg_suffix_len = 3552553 count = 0554 for msg in messages:555 if isinstance(msg.content, str):556 count += (557 default_msg_prefix_len + default_content_len + default_msg_suffix_len558 )559 if isinstance(msg.content, list):560 count += (561 default_msg_prefix_len562 + len(msg.content) * default_content_len563 + default_msg_suffix_len564 )565 return count566567568def test_trim_messages_partial_text_splitting() -> None:569 messages = [HumanMessage(content="This is a long message that needs trimming")]570 messages_copy = [m.model_copy(deep=True) for m in messages]571572 def count_characters(msgs: list[BaseMessage]) -> int:573 return sum(len(m.content) if isinstance(m.content, str) else 0 for m in msgs)574575 # Return individual characters to test text splitting576 def char_splitter(text: str) -> list[str]:577 return list(text)578579 result = trim_messages(580 messages,581 max_tokens=10, # Only allow 10 characters582 token_counter=count_characters,583 strategy="first",584 allow_partial=True,585 text_splitter=char_splitter,586 )587588 assert len(result) == 1589 assert result[0].content == "This is a " # First 10 characters590 assert messages == messages_copy591592593def test_trim_messages_mixed_content_with_partial() -> None:594 messages = [595 AIMessage(596 content=[597 {"type": "text", "text": "First part of text."},598 {"type": "text", "text": "Second part that should be trimmed."},599 ]600 )601 ]602 messages_copy = [m.model_copy(deep=True) for m in messages]603604 # Count total length of all text parts605 def count_text_length(msgs: list[BaseMessage]) -> int:606 total = 0607 for msg in msgs:608 if isinstance(msg.content, list):609 for block in msg.content:610 if isinstance(block, dict) and block.get("type") == "text":611 total += len(block["text"])612 elif isinstance(msg.content, str):613 total += len(msg.content)614 return total615616 result = trim_messages(617 messages,618 max_tokens=20, # Only allow first text block619 token_counter=count_text_length,620 strategy="first",621 allow_partial=True,622 )623624 assert len(result) == 1625 assert len(result[0].content) == 1626 content = result[0].content[0]627 assert isinstance(content, dict)628 assert content["text"] == "First part of text."629 assert messages == messages_copy630631632def test_trim_messages_exact_token_boundary() -> None:633 messages = [634 SystemMessage(content="10 tokens exactly."),635 HumanMessage(content="Another 10 tokens."),636 ]637638 # First message only639 result1 = trim_messages(640 messages,641 max_tokens=10, # Exactly the size of first message642 token_counter=dummy_token_counter,643 strategy="first",644 )645 assert len(result1) == 1646 assert result1[0].content == "10 tokens exactly."647648 # Both messages exactly fit649 result2 = trim_messages(650 messages,651 max_tokens=20, # Exactly the size of both messages652 token_counter=dummy_token_counter,653 strategy="first",654 )655 assert len(result2) == 2656 assert result2 == messages657658659def test_trim_messages_start_on_with_allow_partial() -> None:660 messages = [661 HumanMessage(content="First human message"),662 AIMessage(content="AI response"),663 HumanMessage(content="Second human message"),664 ]665 messages_copy = [m.model_copy(deep=True) for m in messages]666 result = trim_messages(667 messages,668 max_tokens=20,669 token_counter=dummy_token_counter,670 strategy="last",671 allow_partial=True,672 start_on="human",673 )674675 assert len(result) == 1676 assert result[0].content == "Second human message"677 assert messages == messages_copy678679680def test_trim_messages_token_counter_shortcut_approximate() -> None:681 """Test that `'approximate'` shortcut works for `token_counter`."""682 messages = [683 SystemMessage("This is a test message"),684 HumanMessage("Another test message", id="first"),685 AIMessage("AI response here", id="second"),686 ]687 messages_copy = [m.model_copy(deep=True) for m in messages]688689 # Test using the "approximate" shortcut690 result_shortcut = trim_messages(691 messages,692 max_tokens=50,693 token_counter="approximate",694 strategy="last",695 )696697 # Test using count_tokens_approximately directly698 result_direct = trim_messages(699 messages,700 max_tokens=50,701 token_counter=count_tokens_approximately,702 strategy="last",703 )704705 # Both should produce the same result706 assert result_shortcut == result_direct707 assert messages == messages_copy708709710def test_trim_messages_token_counter_shortcut_invalid() -> None:711 """Test that invalid `token_counter` shortcut raises `ValueError`."""712 messages = [713 SystemMessage("This is a test message"),714 HumanMessage("Another test message"),715 ]716717 # Test with invalid shortcut - intentionally passing invalid string to verify718 # runtime error handling for dynamically-constructed inputs719 with pytest.raises(ValueError, match="Invalid token_counter shortcut 'invalid'"):720 trim_messages( # type: ignore[call-overload]721 messages,722 max_tokens=50,723 token_counter="invalid",724 strategy="last",725 )726727728def test_trim_messages_token_counter_shortcut_with_options() -> None:729 """Test that `'approximate'` shortcut works with different trim options."""730 messages = [731 SystemMessage("System instructions"),732 HumanMessage("First human message", id="first"),733 AIMessage("First AI response", id="ai1"),734 HumanMessage("Second human message", id="second"),735 AIMessage("Second AI response", id="ai2"),736 ]737 messages_copy = [m.model_copy(deep=True) for m in messages]738739 # Test with various options740 result = trim_messages(741 messages,742 max_tokens=100,743 token_counter="approximate",744 strategy="last",745 include_system=True,746 start_on="human",747 )748749 # Should include system message and start on human750 assert len(result) >= 2751 assert isinstance(result[0], SystemMessage)752 assert any(isinstance(msg, HumanMessage) for msg in result[1:])753 assert messages == messages_copy754755756class FakeTokenCountingModel(FakeChatModel):757 @override758 def get_num_tokens_from_messages(759 self,760 messages: list[BaseMessage],761 tools: Sequence[dict[str, Any] | type | Callable[..., Any] | BaseTool]762 | None = None,763 ) -> int:764 return dummy_token_counter(messages)765766767def test_convert_to_messages() -> None:768 message_like: list = [769 # BaseMessage770 SystemMessage("1"),771 SystemMessage("1.1", additional_kwargs={"__openai_role__": "developer"}),772 HumanMessage([{"type": "image_url", "image_url": {"url": "2.1"}}], name="2.2"),773 AIMessage(774 [775 {"type": "text", "text": "3.1"},776 {777 "type": "tool_use",778 "id": "3.2",779 "name": "3.3",780 "input": {"3.4": "3.5"},781 },782 ]783 ),784 AIMessage(785 [786 {"type": "text", "text": "4.1"},787 {788 "type": "tool_use",789 "id": "4.2",790 "name": "4.3",791 "input": {"4.4": "4.5"},792 },793 ],794 tool_calls=[795 {796 "name": "4.3",797 "args": {"4.4": "4.5"},798 "id": "4.2",799 "type": "tool_call",800 }801 ],802 ),803 ToolMessage("5.1", tool_call_id="5.2", name="5.3"),804 # OpenAI dict805 {"role": "system", "content": "6"},806 {"role": "developer", "content": "6.1"},807 {808 "role": "user",809 "content": [{"type": "image_url", "image_url": {"url": "7.1"}}],810 "name": "7.2",811 },812 {813 "role": "assistant",814 "content": [{"type": "text", "text": "8.1"}],815 "tool_calls": [816 {817 "type": "function",818 "function": {819 "arguments": json.dumps({"8.2": "8.3"}),820 "name": "8.4",821 },822 "id": "8.5",823 }824 ],825 "name": "8.6",826 },827 {"role": "tool", "content": "10.1", "tool_call_id": "10.2"},828 # Tuple/List829 ("system", "11.1"),830 ("developer", "11.2"),831 ("human", [{"type": "image_url", "image_url": {"url": "12.1"}}]),832 (833 "ai",834 [835 {"type": "text", "text": "13.1"},836 {837 "type": "tool_use",838 "id": "13.2",839 "name": "13.3",840 "input": {"13.4": "13.5"},841 },842 ],843 ),844 # String845 "14.1",846 # LangChain dict847 {848 "role": "ai",849 "content": [{"type": "text", "text": "15.1"}],850 "tool_calls": [{"args": {"15.2": "15.3"}, "name": "15.4", "id": "15.5"}],851 "name": "15.6",852 },853 ]854 expected = [855 SystemMessage(content="1"),856 SystemMessage(857 content="1.1", additional_kwargs={"__openai_role__": "developer"}858 ),859 HumanMessage(860 content=[{"type": "image_url", "image_url": {"url": "2.1"}}], name="2.2"861 ),862 AIMessage(863 content=[864 {"type": "text", "text": "3.1"},865 {866 "type": "tool_use",867 "id": "3.2",868 "name": "3.3",869 "input": {"3.4": "3.5"},870 },871 ]872 ),873 AIMessage(874 content=[875 {"type": "text", "text": "4.1"},876 {877 "type": "tool_use",878 "id": "4.2",879 "name": "4.3",880 "input": {"4.4": "4.5"},881 },882 ],883 tool_calls=[884 {885 "name": "4.3",886 "args": {"4.4": "4.5"},887 "id": "4.2",888 "type": "tool_call",889 }890 ],891 ),892 ToolMessage(content="5.1", name="5.3", tool_call_id="5.2"),893 SystemMessage(content="6"),894 SystemMessage(895 content="6.1", additional_kwargs={"__openai_role__": "developer"}896 ),897 HumanMessage(898 content=[{"type": "image_url", "image_url": {"url": "7.1"}}], name="7.2"899 ),900 AIMessage(901 content=[{"type": "text", "text": "8.1"}],902 name="8.6",903 tool_calls=[904 {905 "name": "8.4",906 "args": {"8.2": "8.3"},907 "id": "8.5",908 "type": "tool_call",909 }910 ],911 ),912 ToolMessage(content="10.1", tool_call_id="10.2"),913 SystemMessage(content="11.1"),914 SystemMessage(915 content="11.2", additional_kwargs={"__openai_role__": "developer"}916 ),917 HumanMessage(content=[{"type": "image_url", "image_url": {"url": "12.1"}}]),918 AIMessage(919 content=[920 {"type": "text", "text": "13.1"},921 {922 "type": "tool_use",923 "id": "13.2",924 "name": "13.3",925 "input": {"13.4": "13.5"},926 },927 ]928 ),929 HumanMessage(content="14.1"),930 AIMessage(931 content=[{"type": "text", "text": "15.1"}],932 name="15.6",933 tool_calls=[934 {935 "name": "15.4",936 "args": {"15.2": "15.3"},937 "id": "15.5",938 "type": "tool_call",939 }940 ],941 ),942 ]943 actual = convert_to_messages(message_like)944 assert expected == actual945946947def test_convert_to_messages_openai_refusal() -> None:948 actual = convert_to_messages(949 [{"role": "assistant", "content": "", "refusal": "9.1"}]950 )951 expected = [AIMessage("", additional_kwargs={"refusal": "9.1"})]952 assert actual == expected953954 # Raises error if content is missing.955 with pytest.raises(956 ValueError, match="Message dict must contain 'role' and 'content' keys"957 ):958 convert_to_messages([{"role": "assistant", "refusal": "9.1"}])959960961def create_image_data() -> str:962 return "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAABAAEDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD3+iiigD//2Q==" # noqa: E501963964965def create_base64_image(image_format: str = "jpeg") -> str:966 data = create_image_data()967 return f"data:image/{image_format};base64,{data}"968969970def test_convert_to_openai_messages_string() -> None:971 message = "Hello"972 result = convert_to_openai_messages(message)973 assert result == {"role": "user", "content": "Hello"}974975976def test_convert_to_openai_messages_single_message() -> None:977 message: BaseMessage = HumanMessage(content="Hello")978 result = convert_to_openai_messages(message)979 assert result == {"role": "user", "content": "Hello"}980981 # Test IDs982 result = convert_to_openai_messages(message, include_id=True)983 assert result == {"role": "user", "content": "Hello"} # no ID984985 message = AIMessage(content="Hello", id="resp_123")986 result = convert_to_openai_messages(message)987 assert result == {"role": "assistant", "content": "Hello"}988989 result = convert_to_openai_messages(message, include_id=True)990 assert result == {"role": "assistant", "content": "Hello", "id": "resp_123"}991992993def test_convert_to_openai_messages_multiple_messages() -> None:994 messages = [995 SystemMessage(content="System message"),996 HumanMessage(content="Human message"),997 AIMessage(content="AI message"),998 ]999 result = convert_to_openai_messages(messages)1000 expected = [1001 {"role": "system", "content": "System message"},1002 {"role": "user", "content": "Human message"},1003 {"role": "assistant", "content": "AI message"},1004 ]1005 assert result == expected100610071008def test_convert_to_openai_messages_openai_string() -> None:1009 messages = [1010 HumanMessage(1011 content=[1012 {"type": "text", "text": "Hello"},1013 {"type": "text", "text": "World"},1014 ]1015 ),1016 AIMessage(1017 content=[{"type": "text", "text": "Hi"}, {"type": "text", "text": "there"}]1018 ),1019 ]1020 result = convert_to_openai_messages(messages)1021 expected = [1022 {"role": "user", "content": "Hello\nWorld"},1023 {"role": "assistant", "content": "Hi\nthere"},1024 ]1025 assert result == expected102610271028def test_convert_to_openai_messages_openai_block() -> None:1029 messages = [HumanMessage(content="Hello"), AIMessage(content="Hi there")]1030 result = convert_to_openai_messages(messages, text_format="block")1031 expected = [1032 {"role": "user", "content": [{"type": "text", "text": "Hello"}]},1033 {"role": "assistant", "content": [{"type": "text", "text": "Hi there"}]},1034 ]1035 assert result == expected103610371038def test_convert_to_openai_messages_invalid_format() -> None:1039 with pytest.raises(ValueError, match="Unrecognized text_format="):1040 convert_to_openai_messages( # type: ignore[call-overload]1041 [HumanMessage(content="Hello")],1042 text_format="invalid",1043 )104410451046def test_convert_to_openai_messages_openai_image() -> None:1047 base64_image = create_base64_image()1048 messages = [1049 HumanMessage(1050 content=[1051 {"type": "text", "text": "Here's an image:"},1052 {"type": "image_url", "image_url": {"url": base64_image}},1053 ]1054 )1055 ]1056 result = convert_to_openai_messages(messages, text_format="block")1057 expected = [1058 {1059 "role": "user",1060 "content": [1061 {"type": "text", "text": "Here's an image:"},1062 {"type": "image_url", "image_url": {"url": base64_image}},1063 ],1064 }1065 ]1066 assert result == expected106710681069def test_convert_to_openai_messages_anthropic() -> None:1070 image_data = create_image_data()1071 messages = [1072 HumanMessage(1073 content=[1074 {1075 "type": "text",1076 "text": "Here's an image:",1077 "cache_control": {"type": "ephemeral"},1078 },1079 {1080 "type": "image",1081 "source": {1082 "type": "base64",1083 "media_type": "image/jpeg",1084 "data": image_data,1085 },1086 },1087 ]1088 ),1089 AIMessage(1090 content=[1091 {"type": "tool_use", "name": "foo", "input": {"bar": "baz"}, "id": "1"}1092 ]1093 ),1094 HumanMessage(1095 content=[1096 {1097 "type": "tool_result",1098 "tool_use_id": "1",1099 "is_error": False,1100 "content": [1101 {1102 "type": "image",1103 "source": {1104 "type": "base64",1105 "media_type": "image/jpeg",1106 "data": image_data,1107 },1108 },1109 ],1110 }1111 ]1112 ),1113 ]1114 result = convert_to_openai_messages(messages)1115 expected = [1116 {1117 "role": "user",1118 "content": [1119 {"type": "text", "text": "Here's an image:"},1120 {"type": "image_url", "image_url": {"url": create_base64_image()}},1121 ],1122 },1123 {1124 "role": "assistant",1125 "content": "",1126 "tool_calls": [1127 {1128 "type": "function",1129 "function": {1130 "name": "foo",1131 "arguments": json.dumps({"bar": "baz"}),1132 },1133 "id": "1",1134 }1135 ],1136 },1137 {1138 "role": "tool",1139 "content": [1140 {"type": "image_url", "image_url": {"url": create_base64_image()}}1141 ],1142 "tool_call_id": "1",1143 },1144 ]1145 assert result == expected11461147 # Test thinking blocks (pass through)1148 thinking_block = {1149 "signature": "abc123",1150 "thinking": "Thinking text.",1151 "type": "thinking",1152 }1153 text_block = {"text": "Response text.", "type": "text"}1154 messages = [AIMessage([thinking_block, text_block])]1155 result = convert_to_openai_messages(messages)1156 expected = [{"role": "assistant", "content": [thinking_block, text_block]}]1157 assert result == expected115811591160def test_convert_to_openai_messages_bedrock_converse_image() -> None:1161 image_data = create_image_data()1162 messages = [1163 HumanMessage(1164 content=[1165 {"type": "text", "text": "Here's an image:"},1166 {1167 "image": {1168 "format": "jpeg",1169 "source": {"bytes": base64.b64decode(image_data)},1170 }1171 },1172 ]1173 )1174 ]1175 result = convert_to_openai_messages(messages)1176 assert result[0]["content"][1]["type"] == "image_url"1177 assert result[0]["content"][1]["image_url"]["url"] == create_base64_image()117811791180def test_convert_to_openai_messages_vertexai_image() -> None:1181 image_data = create_image_data()1182 messages = [1183 HumanMessage(1184 content=[1185 {"type": "text", "text": "Here's an image:"},1186 {1187 "type": "media",1188 "mime_type": "image/jpeg",1189 "data": base64.b64decode(image_data),1190 },1191 ]1192 )1193 ]1194 result = convert_to_openai_messages(messages)1195 assert result[0]["content"][1]["type"] == "image_url"1196 assert result[0]["content"][1]["image_url"]["url"] == create_base64_image()119711981199def test_convert_to_openai_messages_tool_message() -> None:1200 tool_message = ToolMessage(content="Tool result", tool_call_id="123")1201 result = convert_to_openai_messages([tool_message], text_format="block")1202 assert len(result) == 11203 assert result[0]["content"] == [{"type": "text", "text": "Tool result"}]1204 assert result[0]["tool_call_id"] == "123"120512061207def test_convert_to_openai_messages_tool_use() -> None:1208 messages = [1209 AIMessage(1210 content=[1211 {1212 "type": "tool_use",1213 "id": "123",1214 "name": "calculator",1215 "input": {"a": "b"},1216 }1217 ]1218 )1219 ]1220 result = convert_to_openai_messages(messages, text_format="block")1221 assert result[0]["tool_calls"][0]["type"] == "function"1222 assert result[0]["tool_calls"][0]["id"] == "123"1223 assert result[0]["tool_calls"][0]["function"]["name"] == "calculator"1224 assert result[0]["tool_calls"][0]["function"]["arguments"] == json.dumps({"a": "b"})122512261227def test_convert_to_openai_messages_tool_use_unicode() -> None:1228 """Test that Unicode characters in tool call args are preserved correctly."""1229 messages = [1230 AIMessage(1231 content=[1232 {1233 "type": "tool_use",1234 "id": "123",1235 "name": "create_customer",1236 "input": {"customer_name": "你好啊集团"},1237 }1238 ]1239 )1240 ]1241 result = convert_to_openai_messages(messages, text_format="block")1242 assert result[0]["tool_calls"][0]["type"] == "function"1243 assert result[0]["tool_calls"][0]["id"] == "123"1244 assert result[0]["tool_calls"][0]["function"]["name"] == "create_customer"1245 # Ensure Unicode characters are preserved, not escaped as \\uXXXX1246 arguments_str = result[0]["tool_calls"][0]["function"]["arguments"]1247 parsed_args = json.loads(arguments_str)1248 assert parsed_args["customer_name"] == "你好啊集团"1249 # Also ensure the raw JSON string contains Unicode, not escaped sequences1250 assert "你好啊集团" in arguments_str1251 assert "\\u4f60" not in arguments_str # Should not contain escaped Unicode125212531254def test_convert_to_openai_messages_json() -> None:1255 json_data = {"key": "value"}1256 messages = [HumanMessage(content=[{"type": "json", "json": json_data}])]1257 result = convert_to_openai_messages(messages, text_format="block")1258 assert result[0]["content"][0]["type"] == "text"1259 assert json.loads(result[0]["content"][0]["text"]) == json_data126012611262def test_convert_to_openai_messages_guard_content() -> None:1263 messages = [1264 HumanMessage(1265 content=[1266 {1267 "type": "guard_content",1268 "guard_content": {"text": "Protected content"},1269 }1270 ]1271 )1272 ]1273 result = convert_to_openai_messages(messages, text_format="block")1274 assert result[0]["content"][0]["type"] == "text"1275 assert result[0]["content"][0]["text"] == "Protected content"127612771278def test_convert_to_openai_messages_invalid_block() -> None:1279 messages = [HumanMessage(content=[{"type": "invalid", "foo": "bar"}])]1280 with pytest.raises(ValueError, match="Unrecognized content block"):1281 convert_to_openai_messages(1282 messages,1283 text_format="block",1284 pass_through_unknown_blocks=False,1285 )1286 # Accept by default1287 result = convert_to_openai_messages(messages, text_format="block")1288 assert result == [{"role": "user", "content": [{"type": "invalid", "foo": "bar"}]}]128912901291def test_handle_openai_responses_blocks() -> None:1292 blocks: str | list[str | dict[str, Any]] = [1293 {"type": "reasoning", "id": "1"},1294 {1295 "type": "function_call",1296 "name": "multiply",1297 "arguments": '{"x":5,"y":4}',1298 "call_id": "call_abc123",1299 "id": "fc_abc123",1300 "status": "completed",1301 },1302 ]1303 message = AIMessage(content=blocks)13041305 expected_tool_call = {1306 "type": "function",1307 "function": {1308 "name": "multiply",1309 "arguments": '{"x":5,"y":4}',1310 },1311 "id": "call_abc123",1312 }1313 result = convert_to_openai_messages(message)1314 assert isinstance(result, dict)1315 assert result["content"] == blocks1316 assert result["tool_calls"] == [expected_tool_call]13171318 result = convert_to_openai_messages(message, pass_through_unknown_blocks=False)1319 assert isinstance(result, dict)1320 assert result["content"] == [{"type": "reasoning", "id": "1"}]1321 assert result["tool_calls"] == [expected_tool_call]132213231324def test_convert_to_openai_messages_empty_message() -> None:1325 result = convert_to_openai_messages(HumanMessage(content=""))1326 assert result == {"role": "user", "content": ""}132713281329def test_convert_to_openai_messages_empty_list() -> None:1330 result = convert_to_openai_messages([])1331 assert result == []133213331334def test_convert_to_openai_messages_mixed_content_types() -> None:1335 messages = [1336 HumanMessage(1337 content=[1338 "Text message",1339 {"type": "text", "text": "Structured text"},1340 {"type": "image_url", "image_url": {"url": create_base64_image()}},1341 ]1342 )1343 ]1344 result = convert_to_openai_messages(messages, text_format="block")1345 assert len(result[0]["content"]) == 31346 assert isinstance(result[0]["content"][0], dict)1347 assert isinstance(result[0]["content"][1], dict)1348 assert isinstance(result[0]["content"][2], dict)134913501351def test_convert_to_openai_messages_developer() -> None:1352 messages: list[MessageLikeRepresentation] = [1353 SystemMessage("a", additional_kwargs={"__openai_role__": "developer"}),1354 {"role": "developer", "content": "a"},1355 ]1356 result = convert_to_openai_messages(messages)1357 assert result == [{"role": "developer", "content": "a"}] * 2135813591360def test_convert_to_openai_messages_multimodal() -> None:1361 """v0 and v1 content to OpenAI messages conversion."""1362 messages = [1363 HumanMessage(1364 content=[1365 # Prior v0 blocks1366 {"type": "text", "text": "Text message"},1367 {1368 "type": "image",1369 "url": "https://example.com/test.png",1370 },1371 {1372 "type": "image",1373 "source_type": "base64",1374 "data": "<base64 string>",1375 "mime_type": "image/png",1376 },1377 {1378 "type": "file",1379 "source_type": "base64",1380 "data": "<base64 string>",1381 "mime_type": "application/pdf",1382 "filename": "test.pdf",1383 },1384 {1385 # OpenAI Chat Completions file format1386 "type": "file",1387 "file": {1388 "filename": "draconomicon.pdf",1389 "file_data": "data:application/pdf;base64,<base64 string>",1390 },1391 },1392 {1393 "type": "file",1394 "source_type": "id",1395 "id": "file-abc123",1396 },1397 {1398 "type": "audio",1399 "source_type": "base64",1400 "data": "<base64 string>",1401 "mime_type": "audio/wav",1402 },1403 {1404 "type": "input_audio",1405 "input_audio": {1406 "data": "<base64 string>",1407 "format": "wav",1408 },1409 },1410 # v1 Additions1411 {1412 "type": "image",1413 "source_type": "url", # backward compatibility v0 block field1414 "url": "https://example.com/test.png",1415 },1416 {1417 "type": "image",1418 "base64": "<base64 string>",1419 "mime_type": "image/png",1420 },1421 {1422 "type": "file",1423 "base64": "<base64 string>",1424 "mime_type": "application/pdf",1425 "filename": "test.pdf", # backward compatibility v0 block field1426 },1427 {1428 "type": "file",1429 "file_id": "file-abc123",1430 },1431 {1432 "type": "audio",1433 "base64": "<base64 string>",1434 "mime_type": "audio/wav",1435 },1436 ]1437 )1438 ]1439 result = convert_to_openai_messages(messages, text_format="block")1440 assert len(result) == 11441 message = result[0]1442 assert len(message["content"]) == 1314431444 # Test auto-adding filename1445 messages = [1446 HumanMessage(1447 content=[1448 {1449 "type": "file",1450 "base64": "<base64 string>",1451 "mime_type": "application/pdf",1452 },1453 ]1454 )1455 ]1456 with pytest.warns(match="filename"):1457 result = convert_to_openai_messages(messages, text_format="block")1458 assert len(result) == 11459 message = result[0]1460 assert len(message["content"]) == 11461 block = message["content"][0]1462 assert block == {1463 # OpenAI Chat Completions file format1464 "type": "file",1465 "file": {1466 "file_data": "data:application/pdf;base64,<base64 string>",1467 "filename": "LC_AUTOGENERATED",1468 },1469 }147014711472def test_count_tokens_approximately_empty_messages() -> None:1473 # Test with empty message list1474 assert count_tokens_approximately([]) == 014751476 # Test with empty content1477 messages = [HumanMessage(content="")]1478 # 4 role chars -> 1 + 3 = 4 tokens1479 assert count_tokens_approximately(messages) == 4148014811482def test_count_tokens_approximately_with_names() -> None:1483 messages = [1484 # 5 chars + 4 role chars -> 3 + 3 = 6 tokens1485 # (with name: extra 4 name chars, so total = 4 + 3 = 7 tokens)1486 HumanMessage(content="Hello", name="user"),1487 # 8 chars + 9 role chars -> 5 + 3 = 8 tokens1488 # (with name: extra 9 name chars, so total = 7 + 3 = 10 tokens)1489 AIMessage(content="Hi there", name="assistant"),1490 ]1491 # With names included (default)1492 assert count_tokens_approximately(messages) == 1714931494 # Without names1495 without_names = count_tokens_approximately(messages, count_name=False)1496 assert without_names == 14149714981499def test_count_tokens_approximately_openai_format() -> None:1500 # same as test_count_tokens_approximately_with_names, but in OpenAI format1501 messages = [1502 {"role": "user", "content": "Hello", "name": "user"},1503 {"role": "assistant", "content": "Hi there", "name": "assistant"},1504 ]1505 # With names included (default)1506 assert count_tokens_approximately(messages) == 1715071508 # Without names1509 without_names = count_tokens_approximately(messages, count_name=False)1510 assert without_names == 14151115121513def test_count_tokens_approximately_string_content() -> None:1514 messages = [1515 # 5 chars + 4 role chars -> 3 + 3 = 6 tokens1516 HumanMessage(content="Hello"),1517 # 8 chars + 9 role chars -> 5 + 3 = 8 tokens1518 AIMessage(content="Hi there"),1519 # 12 chars + 4 role chars -> 4 + 3 = 7 tokens1520 HumanMessage(content="How are you?"),1521 ]1522 assert count_tokens_approximately(messages) == 21152315241525def test_count_tokens_approximately_list_content() -> None:1526 messages = [1527 # '[{"foo": "bar"}]' -> 16 chars + 4 role chars -> 5 + 3 = 8 tokens1528 HumanMessage(content=[{"foo": "bar"}]),1529 # '[{"test": 123}]' -> 15 chars + 9 role chars -> 6 + 3 = 9 tokens1530 AIMessage(content=[{"test": 123}]),1531 ]1532 assert count_tokens_approximately(messages) == 17153315341535def test_count_tokens_approximately_tool_calls() -> None:1536 tool_calls = [{"name": "test_tool", "args": {"foo": "bar"}, "id": "1"}]1537 messages = [1538 # tool calls json -> 79 chars + 9 role chars -> 22 + 3 = 25 tokens1539 AIMessage(content="", tool_calls=tool_calls),1540 # 15 chars + 4 role chars -> 5 + 3 = 8 tokens1541 HumanMessage(content="Regular message"),1542 ]1543 assert count_tokens_approximately(messages) == 331544 # AI message w/ both content and tool calls1545 # 94 chars + 9 role chars -> 26 + 3 = 29 tokens1546 messages = [1547 AIMessage(content="Regular message", tool_calls=tool_calls),1548 ]1549 assert count_tokens_approximately(messages) == 29155015511552def test_count_tokens_approximately_custom_token_length() -> None:1553 messages = [1554 # 11 chars + 4 role chars -> (4 tokens of length 4 / 8 tokens of length 2) + 31555 HumanMessage(content="Hello world"),1556 # 7 chars + 9 role chars -> (4 tokens of length 4 / 8 tokens of length 2) + 31557 AIMessage(content="Testing"),1558 ]1559 assert count_tokens_approximately(messages, chars_per_token=4) == 141560 assert count_tokens_approximately(messages, chars_per_token=2) == 22156115621563def test_count_tokens_approximately_large_message_content() -> None:1564 # Test with large content to ensure no issues1565 large_text = "x" * 100001566 messages = [HumanMessage(content=large_text)]1567 # 10,000 chars + 4 role chars -> 2501 + 3 = 2504 tokens1568 assert count_tokens_approximately(messages) == 2504156915701571def test_count_tokens_approximately_large_number_of_messages() -> None:1572 # Test with large content to ensure no issues1573 messages = [HumanMessage(content="x")] * 1_0001574 # 1 chars + 4 role chars -> 2 + 3 = 5 tokens1575 assert count_tokens_approximately(messages) == 5_000157615771578def test_count_tokens_approximately_mixed_content_types() -> None:1579 # Test with a variety of content types in the same message list1580 tool_calls = [{"name": "test_tool", "args": {"foo": "bar"}, "id": "1"}]1581 messages = [1582 # 13 chars + 6 role chars -> 5 + 3 = 8 tokens1583 SystemMessage(content="System prompt"),1584 # '[{"foo": "bar"}]' -> 16 chars + 4 role chars -> 5 + 3 = 8 tokens1585 HumanMessage(content=[{"foo": "bar"}]),1586 # tool calls json -> 79 chars + 9 role chars -> 22 + 3 = 25 tokens1587 AIMessage(content="", tool_calls=tool_calls),1588 # 13 chars + 4 role chars + 9 name chars + 1 tool call ID char ->1589 # 7 + 3 = 10 tokens1590 ToolMessage(content="Tool response", name="test_tool", tool_call_id="1"),1591 ]1592 token_count = count_tokens_approximately(messages)1593 assert token_count == 5115941595 # Ensure that count is consistent if we do one message at a time1596 assert sum(count_tokens_approximately([m]) for m in messages) == token_count159715981599def test_count_tokens_approximately_usage_metadata_scaling() -> None:1600 messages = [1601 HumanMessage("text"),1602 AIMessage(1603 "text",1604 response_metadata={"model_provider": "openai"},1605 usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 100},1606 ),1607 HumanMessage("text"),1608 AIMessage(1609 "text",1610 response_metadata={"model_provider": "openai"},1611 usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 200},1612 ),1613 ]16141615 unscaled = count_tokens_approximately(messages)1616 scaled = count_tokens_approximately(messages, use_usage_metadata_scaling=True)16171618 ratio = scaled / unscaled1619 assert 1 <= round(ratio, 1) <= 1.2 # we ceil scale token counts, so can be > 1.216201621 messages.extend([ToolMessage("text", tool_call_id="abc123")] * 3)16221623 unscaled_extended = count_tokens_approximately(messages)1624 scaled_extended = count_tokens_approximately(1625 messages, use_usage_metadata_scaling=True1626 )16271628 # scaling should still be based on the most recent AIMessage with total_tokens=2001629 assert unscaled_extended > unscaled1630 assert scaled_extended > scaled16311632 # And the scaled total should be the unscaled total multiplied by the same ratio.1633 # ratio = 200 / unscaled (as of last AI message)1634 expected_scaled_extended = math.ceil(unscaled_extended * ratio)1635 assert scaled_extended <= expected_scaled_extended <= scaled_extended + 1163616371638def test_count_tokens_approximately_usage_metadata_scaling_model_provider() -> None:1639 messages = [1640 HumanMessage("Hello"),1641 AIMessage(1642 "Hi",1643 response_metadata={"model_provider": "openai"},1644 usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 100},1645 ),1646 HumanMessage("More text"),1647 AIMessage(1648 "More response",1649 response_metadata={"model_provider": "anthropic"},1650 usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 200},1651 ),1652 ]16531654 unscaled = count_tokens_approximately(messages)1655 scaled = count_tokens_approximately(messages, use_usage_metadata_scaling=True)1656 assert scaled == unscaled165716581659def test_count_tokens_approximately_usage_metadata_scaling_total_tokens() -> None:1660 messages = [1661 HumanMessage("Hello"),1662 AIMessage(1663 "Hi",1664 response_metadata={"model_provider": "openai"},1665 # no usage metadata -> skip1666 ),1667 ]16681669 unscaled = count_tokens_approximately(messages, chars_per_token=5)1670 scaled = count_tokens_approximately(1671 messages, chars_per_token=5, use_usage_metadata_scaling=True1672 )16731674 assert scaled == unscaled167516761677def test_count_tokens_approximately_usage_metadata_scaling_floor_at_one() -> None:1678 messages = [1679 HumanMessage("text"),1680 AIMessage(1681 "text",1682 response_metadata={"model_provider": "openai"},1683 # Set total_tokens lower than the approximate count up through this message.1684 usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 1},1685 ),1686 HumanMessage("text"),1687 ]16881689 unscaled = count_tokens_approximately(messages)1690 scaled = count_tokens_approximately(messages, use_usage_metadata_scaling=True)16911692 # scale factor would be < 1, but we floor it at 1.0 to avoid decreasing counts1693 assert scaled == unscaled169416951696def test_get_buffer_string_with_structured_content() -> None:1697 """Test get_buffer_string with structured content in messages."""1698 messages = [1699 HumanMessage(content=[{"type": "text", "text": "Hello, world!"}]),1700 AIMessage(content=[{"type": "text", "text": "Hi there!"}]),1701 SystemMessage(content=[{"type": "text", "text": "System message"}]),1702 ]1703 expected = "Human: Hello, world!\nAI: Hi there!\nSystem: System message"1704 actual = get_buffer_string(messages)1705 assert actual == expected170617071708def test_get_buffer_string_with_mixed_content() -> None:1709 """Test get_buffer_string with mixed content types in messages."""1710 messages = [1711 HumanMessage(content="Simple text"),1712 AIMessage(content=[{"type": "text", "text": "Structured text"}]),1713 SystemMessage(content=[{"type": "text", "text": "Another structured text"}]),1714 ]1715 expected = (1716 "Human: Simple text\nAI: Structured text\nSystem: Another structured text"1717 )1718 actual = get_buffer_string(messages)1719 assert actual == expected172017211722def test_get_buffer_string_with_function_call() -> None:1723 """Test get_buffer_string with function call in additional_kwargs."""1724 messages = [1725 HumanMessage(content="Hello"),1726 AIMessage(1727 content="Hi",1728 additional_kwargs={1729 "function_call": {1730 "name": "test_function",1731 "arguments": '{"arg": "value"}',1732 }1733 },1734 ),1735 ]1736 # TODO: consider changing this1737 expected = (1738 "Human: Hello\n"1739 "AI: Hi{'name': 'test_function', 'arguments': '{\"arg\": \"value\"}'}"1740 )1741 actual = get_buffer_string(messages)1742 assert actual == expected174317441745def test_get_buffer_string_with_empty_content() -> None:1746 """Test get_buffer_string with empty content in messages."""1747 messages = [1748 HumanMessage(content=[]),1749 AIMessage(content=""),1750 SystemMessage(content=[]),1751 ]1752 expected = "Human: \nAI: \nSystem: "1753 actual = get_buffer_string(messages)1754 assert actual == expected175517561757def test_get_buffer_string_with_tool_calls() -> None:1758 """Test `get_buffer_string` with `tool_calls` field."""1759 messages = [1760 HumanMessage(content="What's the weather?"),1761 AIMessage(1762 content="Let me check the weather",1763 tool_calls=[1764 {1765 "name": "get_weather",1766 "args": {"city": "NYC"},1767 "id": "call_1",1768 "type": "tool_call",1769 }1770 ],1771 ),1772 ]1773 result = get_buffer_string(messages)1774 assert "Human: What's the weather?" in result1775 assert "AI: Let me check the weather" in result1776 assert "get_weather" in result1777 assert "NYC" in result177817791780def test_get_buffer_string_with_tool_calls_empty_content() -> None:1781 """Test `get_buffer_string` with `tool_calls` and empty `content`."""1782 messages = [1783 AIMessage(1784 content="",1785 tool_calls=[1786 {1787 "name": "search",1788 "args": {"query": "test"},1789 "id": "call_2",1790 "type": "tool_call",1791 }1792 ],1793 ),1794 ]1795 result = get_buffer_string(messages)1796 assert "AI: " in result1797 assert "search" in result179817991800def test_get_buffer_string_tool_calls_preferred_over_function_call() -> None:1801 """Test that `tool_calls` takes precedence over legacy `function_call`."""1802 messages = [1803 AIMessage(1804 content="Calling tools",1805 tool_calls=[1806 {1807 "name": "modern_tool",1808 "args": {"key": "value"},1809 "id": "call_3",1810 "type": "tool_call",1811 }1812 ],1813 additional_kwargs={1814 "function_call": {"name": "legacy_function", "arguments": "{}"}1815 },1816 ),1817 ]1818 result = get_buffer_string(messages)1819 assert "modern_tool" in result1820 assert "legacy_function" not in result182118221823def test_convert_to_openai_messages_reasoning_content() -> None:1824 """Test convert_to_openai_messages with reasoning content blocks."""1825 # Test reasoning block with empty summary1826 msg = AIMessage(content=[{"type": "reasoning", "summary": []}])1827 result = convert_to_openai_messages(msg, text_format="block")1828 expected = {"role": "assistant", "content": [{"type": "reasoning", "summary": []}]}1829 assert result == expected18301831 # Test reasoning block with summary content1832 msg_with_summary = AIMessage(1833 content=[1834 {1835 "type": "reasoning",1836 "summary": [1837 {"type": "text", "text": "First thought"},1838 {"type": "text", "text": "Second thought"},1839 ],1840 }1841 ]1842 )1843 result_with_summary = convert_to_openai_messages(1844 msg_with_summary, text_format="block"1845 )1846 expected_with_summary = {1847 "role": "assistant",1848 "content": [1849 {1850 "type": "reasoning",1851 "summary": [1852 {"type": "text", "text": "First thought"},1853 {"type": "text", "text": "Second thought"},1854 ],1855 }1856 ],1857 }1858 assert result_with_summary == expected_with_summary18591860 # Test mixed content with reasoning and text1861 mixed_msg = AIMessage(1862 content=[1863 {"type": "text", "text": "Regular response"},1864 {1865 "type": "reasoning",1866 "summary": [{"type": "text", "text": "My reasoning process"}],1867 },1868 ]1869 )1870 mixed_result = convert_to_openai_messages(mixed_msg, text_format="block")1871 expected_mixed = {1872 "role": "assistant",1873 "content": [1874 {"type": "text", "text": "Regular response"},1875 {1876 "type": "reasoning",1877 "summary": [{"type": "text", "text": "My reasoning process"}],1878 },1879 ],1880 }1881 assert mixed_result == expected_mixed188218831884# Tests for get_buffer_string XML format188518861887def test_get_buffer_string_xml_empty_messages_list() -> None:1888 """Test XML format with empty messages list."""1889 messages: list[BaseMessage] = []1890 result = get_buffer_string(messages, format="xml")1891 expected = ""1892 assert result == expected189318941895def test_get_buffer_string_xml_basic() -> None:1896 """Test XML format output with all message types."""1897 messages = [1898 SystemMessage(content="System message"),1899 HumanMessage(content="Human message"),1900 AIMessage(content="AI message"),1901 FunctionMessage(content="Function result", name="test_fn"),1902 ToolMessage(content="Tool result", tool_call_id="123"),1903 ]1904 result = get_buffer_string(messages, format="xml")1905 expected = (1906 '<message type="system">System message</message>\n'1907 '<message type="human">Human message</message>\n'1908 '<message type="ai">AI message</message>\n'1909 '<message type="function">Function result</message>\n'1910 '<message type="tool">Tool result</message>'1911 )1912 assert result == expected191319141915def test_get_buffer_string_xml_custom_prefixes() -> None:1916 """Test XML format with custom human and ai prefixes."""1917 messages = [1918 HumanMessage(content="Hello"),1919 AIMessage(content="Hi there"),1920 ]1921 result = get_buffer_string(1922 messages, human_prefix="User", ai_prefix="Assistant", format="xml"1923 )1924 expected = (1925 '<message type="user">Hello</message>\n'1926 '<message type="assistant">Hi there</message>'1927 )1928 assert result == expected192919301931def test_get_buffer_string_xml_custom_separator() -> None:1932 """Test XML format with custom message separator."""1933 messages = [1934 HumanMessage(content="Hello"),1935 AIMessage(content="Hi there"),1936 ]1937 result = get_buffer_string(messages, format="xml", message_separator="\n\n")1938 expected = (1939 '<message type="human">Hello</message>\n\n<message type="ai">Hi there</message>'1940 )1941 assert result == expected194219431944def test_get_buffer_string_prefix_custom_separator() -> None:1945 """Test prefix format with custom message separator."""1946 messages = [1947 HumanMessage(content="Hello"),1948 AIMessage(content="Hi there"),1949 ]1950 result = get_buffer_string(messages, format="prefix", message_separator=" | ")1951 expected = "Human: Hello | AI: Hi there"1952 assert result == expected195319541955def test_get_buffer_string_xml_escaping() -> None:1956 """Test XML format properly escapes special characters in content."""1957 messages = [1958 HumanMessage(content="Is 5 < 10 & 10 > 5?"),1959 AIMessage(content='Yes, and here\'s a "quote"'),1960 ]1961 result = get_buffer_string(messages, format="xml")1962 # xml.sax.saxutils.escape escapes <, >, & (not quotes in content)1963 expected = (1964 '<message type="human">Is 5 < 10 & 10 > 5?</message>\n'1965 '<message type="ai">Yes, and here\'s a "quote"</message>'1966 )1967 assert result == expected196819691970def test_get_buffer_string_xml_unicode_content() -> None:1971 """Test XML format with Unicode content."""1972 messages = [1973 HumanMessage(content="你好世界"), # Chinese: Hello World1974 AIMessage(content="こんにちは"), # Japanese: Hello1975 ]1976 result = get_buffer_string(messages, format="xml")1977 expected = (1978 '<message type="human">你好世界</message>\n'1979 '<message type="ai">こんにちは</message>'1980 )1981 assert result == expected198219831984def test_get_buffer_string_xml_chat_message_valid_role() -> None:1985 """Test XML format with `ChatMessage` having valid XML tag name role."""1986 messages = [1987 ChatMessage(content="Hello", role="Assistant"),1988 ]1989 result = get_buffer_string(messages, format="xml")1990 # Role is used directly as the type attribute value1991 expected = '<message type="Assistant">Hello</message>'1992 assert result == expected19931994 # Spaces in role1995 messages = [1996 ChatMessage(content="Hello", role="my custom role"),1997 ]1998 result = get_buffer_string(messages, format="xml")1999 # Custom roles with spaces use quoteattr for proper escaping2000 expected = '<message type="my custom role">Hello</message>'
Findings
✓ No findings reported for this file.