1import base642import json3import math4import re5from collections.abc import Callable, Sequence6from typing import Any, TypedDict78import pytest9from typing_extensions import NotRequired, override1011from langchain_core.language_models.fake_chat_models import FakeChatModel12from langchain_core.messages import (13 AIMessage,14 BaseMessage,15 ChatMessage,16 FunctionMessage,17 HumanMessage,18 SystemMessage,19 ToolCall,20 ToolMessage,21)22from langchain_core.messages.utils import (23 MessageLikeRepresentation,24 convert_to_messages,25 convert_to_openai_messages,26 count_tokens_approximately,27 filter_messages,28 get_buffer_string,29 merge_message_runs,30 trim_messages,31)32from langchain_core.tools import BaseTool, tool333435@pytest.mark.parametrize("msg_cls", [HumanMessage, AIMessage, SystemMessage])36def test_merge_message_runs_str(msg_cls: type[BaseMessage]) -> None:37 messages = [msg_cls("foo"), msg_cls("bar"), msg_cls("baz")]38 messages_model_copy = [m.model_copy(deep=True) for m in messages]39 expected = [msg_cls("foo\nbar\nbaz")]40 actual = merge_message_runs(messages)41 assert actual == expected42 assert messages == messages_model_copy434445@pytest.mark.parametrize("msg_cls", [HumanMessage, AIMessage, SystemMessage])46def test_merge_message_runs_str_with_specified_separator(47 msg_cls: type[BaseMessage],48) -> None:49 messages = [msg_cls("foo"), msg_cls("bar"), msg_cls("baz")]50 messages_model_copy = [m.model_copy(deep=True) for m in messages]51 expected = [msg_cls("foo<sep>bar<sep>baz")]52 actual = merge_message_runs(messages, chunk_separator="<sep>")53 assert actual == expected54 assert messages == messages_model_copy555657@pytest.mark.parametrize("msg_cls", [HumanMessage, AIMessage, SystemMessage])58def test_merge_message_runs_str_without_separator(59 msg_cls: type[BaseMessage],60) -> None:61 messages = [msg_cls("foo"), msg_cls("bar"), msg_cls("baz")]62 messages_model_copy = [m.model_copy(deep=True) for m in messages]63 expected = [msg_cls("foobarbaz")]64 actual = merge_message_runs(messages, chunk_separator="")65 assert actual == expected66 assert messages == messages_model_copy676869def test_merge_message_runs_response_metadata() -> None:70 messages = [71 AIMessage("foo", id="1", response_metadata={"input_tokens": 1}),72 AIMessage("bar", id="2", response_metadata={"input_tokens": 2}),73 ]74 expected = [75 AIMessage(76 "foo\nbar",77 id="1",78 response_metadata={"input_tokens": 1},79 )80 ]81 actual = merge_message_runs(messages)82 assert actual == expected83 # Check it's not mutated84 assert messages[1].response_metadata == {"input_tokens": 2}858687def test_merge_message_runs_content() -> None:88 messages = [89 AIMessage("foo", id="1"),90 AIMessage(91 [92 {"text": "bar", "type": "text"},93 {"image_url": "...", "type": "image_url"},94 ],95 tool_calls=[96 ToolCall(name="foo_tool", args={"x": 1}, id="tool1", type="tool_call")97 ],98 id="2",99 ),100 AIMessage(101 "baz",102 tool_calls=[103 ToolCall(name="foo_tool", args={"x": 5}, id="tool2", type="tool_call")104 ],105 id="3",106 ),107 ]108 messages_model_copy = [m.model_copy(deep=True) for m in messages]109 expected = [110 AIMessage(111 [112 "foo",113 {"text": "bar", "type": "text"},114 {"image_url": "...", "type": "image_url"},115 "baz",116 ],117 tool_calls=[118 ToolCall(name="foo_tool", args={"x": 1}, id="tool1", type="tool_call"),119 ToolCall(name="foo_tool", args={"x": 5}, id="tool2", type="tool_call"),120 ],121 id="1",122 ),123 ]124 actual = merge_message_runs(messages)125 assert actual == expected126 invoked = merge_message_runs().invoke(messages)127 assert actual == invoked128 assert messages == messages_model_copy129130131def test_merge_messages_tool_messages() -> None:132 messages = [133 ToolMessage("foo", tool_call_id="1"),134 ToolMessage("bar", tool_call_id="2"),135 ]136 messages_model_copy = [m.model_copy(deep=True) for m in messages]137 actual = merge_message_runs(messages)138 assert actual == messages139 assert messages == messages_model_copy140141142class FilterFields(TypedDict):143 include_names: NotRequired[Sequence[str]]144 exclude_names: NotRequired[Sequence[str]]145 include_types: NotRequired[Sequence[str | type[BaseMessage]]]146 exclude_types: NotRequired[Sequence[str | type[BaseMessage]]]147 include_ids: NotRequired[Sequence[str]]148 exclude_ids: NotRequired[Sequence[str]]149 exclude_tool_calls: NotRequired[Sequence[str] | bool]150151152@pytest.mark.parametrize(153 "filters",154 [155 {"include_names": ["blur"]},156 {"exclude_names": ["blah"]},157 {"include_ids": ["2"]},158 {"exclude_ids": ["1"]},159 {"include_types": "human"},160 {"include_types": ["human"]},161 {"include_types": HumanMessage},162 {"include_types": [HumanMessage]},163 {"exclude_types": "system"},164 {"exclude_types": ["system"]},165 {"exclude_types": SystemMessage},166 {"exclude_types": [SystemMessage]},167 {"include_names": ["blah", "blur"], "exclude_types": [SystemMessage]},168 ],169)170def test_filter_message(filters: FilterFields) -> None:171 messages = [172 SystemMessage("foo", name="blah", id="1"),173 HumanMessage("bar", name="blur", id="2"),174 ]175 messages_model_copy = [m.model_copy(deep=True) for m in messages]176 expected = messages[1:2]177 actual = filter_messages(messages, **filters)178 assert expected == actual179 invoked = filter_messages(**filters).invoke(messages)180 assert invoked == actual181 assert messages == messages_model_copy182183184def test_filter_message_exclude_tool_calls() -> None:185 tool_calls = [186 {"name": "foo", "id": "1", "args": {}, "type": "tool_call"},187 {"name": "bar", "id": "2", "args": {}, "type": "tool_call"},188 ]189 messages = [190 HumanMessage("foo", name="blah", id="1"),191 AIMessage("foo-response", name="blah", id="2"),192 HumanMessage("bar", name="blur", id="3"),193 AIMessage(194 "bar-response",195 tool_calls=tool_calls,196 id="4",197 ),198 ToolMessage("baz", tool_call_id="1", id="5"),199 ToolMessage("qux", tool_call_id="2", id="6"),200 ]201 messages_model_copy = [m.model_copy(deep=True) for m in messages]202 expected = messages[:3]203204 # test excluding all tool calls205 actual = filter_messages(messages, exclude_tool_calls=True)206 assert expected == actual207208 # test explicitly excluding all tool calls209 actual = filter_messages(messages, exclude_tool_calls=["1", "2"])210 assert expected == actual211212 # test excluding a specific tool call213 expected = messages[:5]214 expected[3] = expected[3].model_copy(update={"tool_calls": [tool_calls[0]]})215 actual = filter_messages(messages, exclude_tool_calls=["2"])216 assert expected == actual217218 # assert that we didn't mutate the original messages219 assert messages == messages_model_copy220221222def test_filter_message_exclude_tool_calls_content_blocks() -> None:223 tool_calls = [224 {"name": "foo", "id": "1", "args": {}, "type": "tool_call"},225 {"name": "bar", "id": "2", "args": {}, "type": "tool_call"},226 ]227 messages = [228 HumanMessage("foo", name="blah", id="1"),229 AIMessage("foo-response", name="blah", id="2"),230 HumanMessage("bar", name="blur", id="3"),231 AIMessage(232 [233 {"text": "bar-response", "type": "text"},234 {"name": "foo", "type": "tool_use", "id": "1"},235 {"name": "bar", "type": "tool_use", "id": "2"},236 ],237 tool_calls=tool_calls,238 id="4",239 ),240 ToolMessage("baz", tool_call_id="1", id="5"),241 ToolMessage("qux", tool_call_id="2", id="6"),242 ]243 messages_model_copy = [m.model_copy(deep=True) for m in messages]244 expected = messages[:3]245246 # test excluding all tool calls247 actual = filter_messages(messages, exclude_tool_calls=True)248 assert expected == actual249250 # test explicitly excluding all tool calls251 actual = filter_messages(messages, exclude_tool_calls=["1", "2"])252 assert expected == actual253254 # test excluding a specific tool call255 expected = messages[:4] + messages[-1:]256 expected[3] = expected[3].model_copy(257 update={258 "tool_calls": [tool_calls[1]],259 "content": [260 {"text": "bar-response", "type": "text"},261 {"name": "bar", "type": "tool_use", "id": "2"},262 ],263 }264 )265 actual = filter_messages(messages, exclude_tool_calls=["1"])266 assert expected == actual267268 # assert that we didn't mutate the original messages269 assert messages == messages_model_copy270271272_MESSAGES_TO_TRIM = [273 SystemMessage("This is a 4 token text."),274 HumanMessage("This is a 4 token text.", id="first"),275 AIMessage(276 [277 {"type": "text", "text": "This is the FIRST 4 token block."},278 {"type": "text", "text": "This is the SECOND 4 token block."},279 ],280 id="second",281 ),282 HumanMessage("This is a 4 token text.", id="third"),283 AIMessage("This is a 4 token text.", id="fourth"),284]285_MESSAGES_TO_TRIM_COPY = [m.model_copy(deep=True) for m in _MESSAGES_TO_TRIM]286287288def test_trim_messages_first_30() -> None:289 expected = [290 SystemMessage("This is a 4 token text."),291 HumanMessage("This is a 4 token text.", id="first"),292 ]293 actual = trim_messages(294 _MESSAGES_TO_TRIM,295 max_tokens=30,296 token_counter=dummy_token_counter,297 strategy="first",298 )299 assert actual == expected300 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY301302303def test_trim_messages_first_30_allow_partial() -> None:304 expected = [305 SystemMessage("This is a 4 token text."),306 HumanMessage("This is a 4 token text.", id="first"),307 AIMessage(308 [{"type": "text", "text": "This is the FIRST 4 token block."}], id="second"309 ),310 ]311 actual = trim_messages(312 _MESSAGES_TO_TRIM,313 max_tokens=30,314 token_counter=dummy_token_counter,315 strategy="first",316 allow_partial=True,317 )318 assert actual == expected319 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY320321322def test_trim_messages_first_30_allow_partial_end_on_human() -> None:323 expected = [324 SystemMessage("This is a 4 token text."),325 HumanMessage("This is a 4 token text.", id="first"),326 ]327328 actual = trim_messages(329 _MESSAGES_TO_TRIM,330 max_tokens=30,331 token_counter=dummy_token_counter,332 strategy="first",333 allow_partial=True,334 end_on="human",335 )336 assert actual == expected337 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY338339340def test_trim_messages_last_30_include_system() -> None:341 expected = [342 SystemMessage("This is a 4 token text."),343 HumanMessage("This is a 4 token text.", id="third"),344 AIMessage("This is a 4 token text.", id="fourth"),345 ]346347 actual = trim_messages(348 _MESSAGES_TO_TRIM,349 max_tokens=30,350 include_system=True,351 token_counter=dummy_token_counter,352 strategy="last",353 )354 assert actual == expected355 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY356357358def test_trim_messages_last_40_include_system_allow_partial() -> None:359 expected = [360 SystemMessage("This is a 4 token text."),361 AIMessage(362 [363 {"type": "text", "text": "This is the SECOND 4 token block."},364 ],365 id="second",366 ),367 HumanMessage("This is a 4 token text.", id="third"),368 AIMessage("This is a 4 token text.", id="fourth"),369 ]370371 actual = trim_messages(372 _MESSAGES_TO_TRIM,373 max_tokens=40,374 token_counter=dummy_token_counter,375 strategy="last",376 allow_partial=True,377 include_system=True,378 )379380 assert actual == expected381 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY382383384def test_trim_messages_last_30_include_system_allow_partial_end_on_human() -> None:385 expected = [386 SystemMessage("This is a 4 token text."),387 AIMessage(388 [389 {"type": "text", "text": "This is the SECOND 4 token block."},390 ],391 id="second",392 ),393 HumanMessage("This is a 4 token text.", id="third"),394 ]395396 actual = trim_messages(397 _MESSAGES_TO_TRIM,398 max_tokens=30,399 token_counter=dummy_token_counter,400 strategy="last",401 allow_partial=True,402 include_system=True,403 end_on="human",404 )405406 assert actual == expected407 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY408409410def test_trim_messages_last_40_include_system_allow_partial_start_on_human() -> None:411 expected = [412 SystemMessage("This is a 4 token text."),413 HumanMessage("This is a 4 token text.", id="third"),414 AIMessage("This is a 4 token text.", id="fourth"),415 ]416417 actual = trim_messages(418 _MESSAGES_TO_TRIM,419 max_tokens=30,420 token_counter=dummy_token_counter,421 strategy="last",422 allow_partial=True,423 include_system=True,424 start_on="human",425 )426427 assert actual == expected428 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY429430431def test_trim_messages_allow_partial_one_message() -> None:432 expected = [433 HumanMessage("Th", id="third"),434 ]435436 actual = trim_messages(437 [HumanMessage("This is a funky text.", id="third")],438 max_tokens=2,439 token_counter=lambda messages: sum(len(m.content) for m in messages),440 text_splitter=list,441 strategy="first",442 allow_partial=True,443 )444445 assert actual == expected446 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY447448449def test_trim_messages_last_allow_partial_one_message() -> None:450 expected = [451 HumanMessage("t.", id="third"),452 ]453454 actual = trim_messages(455 [HumanMessage("This is a funky text.", id="third")],456 max_tokens=2,457 token_counter=lambda messages: sum(len(m.content) for m in messages),458 text_splitter=list,459 strategy="last",460 allow_partial=True,461 )462463 assert actual == expected464 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY465466467def test_trim_messages_allow_partial_text_splitter() -> None:468 expected = [469 HumanMessage("a 4 token text.", id="third"),470 AIMessage("This is a 4 token text.", id="fourth"),471 ]472473 def count_words(msgs: list[BaseMessage]) -> int:474 count = 0475 for msg in msgs:476 if isinstance(msg.content, str):477 count += len(msg.content.split(" "))478 else:479 count += len(480 " ".join(block["text"] for block in msg.content).split(" ") # type: ignore[index]481 )482 return count483484 def _split_on_space(text: str) -> list[str]:485 splits = text.split(" ")486 return [s + " " for s in splits[:-1]] + splits[-1:]487488 actual = trim_messages(489 _MESSAGES_TO_TRIM,490 max_tokens=10,491 token_counter=count_words,492 strategy="last",493 allow_partial=True,494 text_splitter=_split_on_space,495 )496 assert actual == expected497 assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY498499500def test_trim_messages_include_system_strategy_last_empty_messages() -> None:501 expected: list[BaseMessage] = []502503 actual = trim_messages(504 max_tokens=10,505 token_counter=dummy_token_counter,506 strategy="last",507 include_system=True,508 ).invoke([])509510 assert actual == expected511512513def test_trim_messages_invoke() -> None:514 actual = trim_messages(max_tokens=10, token_counter=dummy_token_counter).invoke(515 _MESSAGES_TO_TRIM516 )517 expected = trim_messages(518 _MESSAGES_TO_TRIM, max_tokens=10, token_counter=dummy_token_counter519 )520 assert actual == expected521522523def test_trim_messages_bound_model_token_counter() -> None:524 trimmer = trim_messages(525 max_tokens=10,526 token_counter=FakeTokenCountingModel().bind(foo="bar"), # type: ignore[call-overload]527 )528 trimmer.invoke([HumanMessage("foobar")])529530531def test_trim_messages_bad_token_counter() -> None:532 trimmer = trim_messages(max_tokens=10, token_counter={}) # type: ignore[call-overload]533 with pytest.raises(534 ValueError,535 match=re.escape(536 "'token_counter' expected to be a model that implements "537 "'get_num_tokens_from_messages()' or a function. "538 "Received object of type <class 'dict'>."539 ),540 ):541 trimmer.invoke([HumanMessage("foobar")])542543544def dummy_token_counter(messages: list[BaseMessage]) -> int:545 # treat each message like it adds 3 default tokens at the beginning546 # of the message and at the end of the message. 3 + 4 + 3 = 10 tokens547 # per message.548549 default_content_len = 4550 default_msg_prefix_len = 3551 default_msg_suffix_len = 3552553 count = 0554 for msg in messages:555 if isinstance(msg.content, str):556 count += (557 default_msg_prefix_len + default_content_len + default_msg_suffix_len558 )559 if isinstance(msg.content, list):560 count += (561 default_msg_prefix_len562 + len(msg.content) * default_content_len563 + default_msg_suffix_len564 )565 return count566567568def test_trim_messages_partial_text_splitting() -> None:569 messages = [HumanMessage(content="This is a long message that needs trimming")]570 messages_copy = [m.model_copy(deep=True) for m in messages]571572 def count_characters(msgs: list[BaseMessage]) -> int:573 return sum(len(m.content) if isinstance(m.content, str) else 0 for m in msgs)574575 # Return individual characters to test text splitting576 def char_splitter(text: str) -> list[str]:577 return list(text)578579 result = trim_messages(580 messages,581 max_tokens=10, # Only allow 10 characters582 token_counter=count_characters,583 strategy="first",584 allow_partial=True,585 text_splitter=char_splitter,586 )587588 assert len(result) == 1589 assert result[0].content == "This is a " # First 10 characters590 assert messages == messages_copy591592593def test_trim_messages_mixed_content_with_partial() -> None:594 messages = [595 AIMessage(596 content=[597 {"type": "text", "text": "First part of text."},598 {"type": "text", "text": "Second part that should be trimmed."},599 ]600 )601 ]602 messages_copy = [m.model_copy(deep=True) for m in messages]603604 # Count total length of all text parts605 def count_text_length(msgs: list[BaseMessage]) -> int:606 total = 0607 for msg in msgs:608 if isinstance(msg.content, list):609 for block in msg.content:610 if isinstance(block, dict) and block.get("type") == "text":611 total += len(block["text"])612 elif isinstance(msg.content, str):613 total += len(msg.content)614 return total615616 result = trim_messages(617 messages,618 max_tokens=20, # Only allow first text block619 token_counter=count_text_length,620 strategy="first",621 allow_partial=True,622 )623624 assert len(result) == 1625 assert len(result[0].content) == 1626 content = result[0].content[0]627 assert isinstance(content, dict)628 assert content["text"] == "First part of text."629 assert messages == messages_copy630631632def test_trim_messages_exact_token_boundary() -> None:633 messages = [634 SystemMessage(content="10 tokens exactly."),635 HumanMessage(content="Another 10 tokens."),636 ]637638 # First message only639 result1 = trim_messages(640 messages,641 max_tokens=10, # Exactly the size of first message642 token_counter=dummy_token_counter,643 strategy="first",644 )645 assert len(result1) == 1646 assert result1[0].content == "10 tokens exactly."647648 # Both messages exactly fit649 result2 = trim_messages(650 messages,651 max_tokens=20, # Exactly the size of both messages652 token_counter=dummy_token_counter,653 strategy="first",654 )655 assert len(result2) == 2656 assert result2 == messages657658659def test_trim_messages_start_on_with_allow_partial() -> None:660 messages = [661 HumanMessage(content="First human message"),662 AIMessage(content="AI response"),663 HumanMessage(content="Second human message"),664 ]665 messages_copy = [m.model_copy(deep=True) for m in messages]666 result = trim_messages(667 messages,668 max_tokens=20,669 token_counter=dummy_token_counter,670 strategy="last",671 allow_partial=True,672 start_on="human",673 )674675 assert len(result) == 1676 assert result[0].content == "Second human message"677 assert messages == messages_copy678679680def test_trim_messages_token_counter_shortcut_approximate() -> None:681 """Test that `'approximate'` shortcut works for `token_counter`."""682 messages = [683 SystemMessage("This is a test message"),684 HumanMessage("Another test message", id="first"),685 AIMessage("AI response here", id="second"),686 ]687 messages_copy = [m.model_copy(deep=True) for m in messages]688689 # Test using the "approximate" shortcut690 result_shortcut = trim_messages(691 messages,692 max_tokens=50,693 token_counter="approximate",694 strategy="last",695 )696697 # Test using count_tokens_approximately directly698 result_direct = trim_messages(699 messages,700 max_tokens=50,701 token_counter=count_tokens_approximately,702 strategy="last",703 )704705 # Both should produce the same result706 assert result_shortcut == result_direct707 assert messages == messages_copy708709710def test_trim_messages_token_counter_shortcut_invalid() -> None:711 """Test that invalid `token_counter` shortcut raises `ValueError`."""712 messages = [713 SystemMessage("This is a test message"),714 HumanMessage("Another test message"),715 ]716717 # Test with invalid shortcut - intentionally passing invalid string to verify718 # runtime error handling for dynamically-constructed inputs719 with pytest.raises(ValueError, match="Invalid token_counter shortcut 'invalid'"):720 trim_messages( # type: ignore[call-overload]721 messages,722 max_tokens=50,723 token_counter="invalid",724 strategy="last",725 )726727728def test_trim_messages_token_counter_shortcut_with_options() -> None:729 """Test that `'approximate'` shortcut works with different trim options."""730 messages = [731 SystemMessage("System instructions"),732 HumanMessage("First human message", id="first"),733 AIMessage("First AI response", id="ai1"),734 HumanMessage("Second human message", id="second"),735 AIMessage("Second AI response", id="ai2"),736 ]737 messages_copy = [m.model_copy(deep=True) for m in messages]738739 # Test with various options740 result = trim_messages(741 messages,742 max_tokens=100,743 token_counter="approximate",744 strategy="last",745 include_system=True,746 start_on="human",747 )748749 # Should include system message and start on human750 assert len(result) >= 2751 assert isinstance(result[0], SystemMessage)752 assert any(isinstance(msg, HumanMessage) for msg in result[1:])753 assert messages == messages_copy754755756class FakeTokenCountingModel(FakeChatModel):757 @override758 def get_num_tokens_from_messages(759 self,760 messages: list[BaseMessage],761 tools: Sequence[dict[str, Any] | type | Callable | BaseTool] | None = None,762 ) -> int:763 return dummy_token_counter(messages)764765766def test_convert_to_messages() -> None:767 message_like: list = [768 # BaseMessage769 SystemMessage("1"),770 SystemMessage("1.1", additional_kwargs={"__openai_role__": "developer"}),771 HumanMessage([{"type": "image_url", "image_url": {"url": "2.1"}}], name="2.2"),772 AIMessage(773 [774 {"type": "text", "text": "3.1"},775 {776 "type": "tool_use",777 "id": "3.2",778 "name": "3.3",779 "input": {"3.4": "3.5"},780 },781 ]782 ),783 AIMessage(784 [785 {"type": "text", "text": "4.1"},786 {787 "type": "tool_use",788 "id": "4.2",789 "name": "4.3",790 "input": {"4.4": "4.5"},791 },792 ],793 tool_calls=[794 {795 "name": "4.3",796 "args": {"4.4": "4.5"},797 "id": "4.2",798 "type": "tool_call",799 }800 ],801 ),802 ToolMessage("5.1", tool_call_id="5.2", name="5.3"),803 # OpenAI dict804 {"role": "system", "content": "6"},805 {"role": "developer", "content": "6.1"},806 {807 "role": "user",808 "content": [{"type": "image_url", "image_url": {"url": "7.1"}}],809 "name": "7.2",810 },811 {812 "role": "assistant",813 "content": [{"type": "text", "text": "8.1"}],814 "tool_calls": [815 {816 "type": "function",817 "function": {818 "arguments": json.dumps({"8.2": "8.3"}),819 "name": "8.4",820 },821 "id": "8.5",822 }823 ],824 "name": "8.6",825 },826 {"role": "tool", "content": "10.1", "tool_call_id": "10.2"},827 # Tuple/List828 ("system", "11.1"),829 ("developer", "11.2"),830 ("human", [{"type": "image_url", "image_url": {"url": "12.1"}}]),831 (832 "ai",833 [834 {"type": "text", "text": "13.1"},835 {836 "type": "tool_use",837 "id": "13.2",838 "name": "13.3",839 "input": {"13.4": "13.5"},840 },841 ],842 ),843 # String844 "14.1",845 # LangChain dict846 {847 "role": "ai",848 "content": [{"type": "text", "text": "15.1"}],849 "tool_calls": [{"args": {"15.2": "15.3"}, "name": "15.4", "id": "15.5"}],850 "name": "15.6",851 },852 ]853 expected = [854 SystemMessage(content="1"),855 SystemMessage(856 content="1.1", additional_kwargs={"__openai_role__": "developer"}857 ),858 HumanMessage(859 content=[{"type": "image_url", "image_url": {"url": "2.1"}}], name="2.2"860 ),861 AIMessage(862 content=[863 {"type": "text", "text": "3.1"},864 {865 "type": "tool_use",866 "id": "3.2",867 "name": "3.3",868 "input": {"3.4": "3.5"},869 },870 ]871 ),872 AIMessage(873 content=[874 {"type": "text", "text": "4.1"},875 {876 "type": "tool_use",877 "id": "4.2",878 "name": "4.3",879 "input": {"4.4": "4.5"},880 },881 ],882 tool_calls=[883 {884 "name": "4.3",885 "args": {"4.4": "4.5"},886 "id": "4.2",887 "type": "tool_call",888 }889 ],890 ),891 ToolMessage(content="5.1", name="5.3", tool_call_id="5.2"),892 SystemMessage(content="6"),893 SystemMessage(894 content="6.1", additional_kwargs={"__openai_role__": "developer"}895 ),896 HumanMessage(897 content=[{"type": "image_url", "image_url": {"url": "7.1"}}], name="7.2"898 ),899 AIMessage(900 content=[{"type": "text", "text": "8.1"}],901 name="8.6",902 tool_calls=[903 {904 "name": "8.4",905 "args": {"8.2": "8.3"},906 "id": "8.5",907 "type": "tool_call",908 }909 ],910 ),911 ToolMessage(content="10.1", tool_call_id="10.2"),912 SystemMessage(content="11.1"),913 SystemMessage(914 content="11.2", additional_kwargs={"__openai_role__": "developer"}915 ),916 HumanMessage(content=[{"type": "image_url", "image_url": {"url": "12.1"}}]),917 AIMessage(918 content=[919 {"type": "text", "text": "13.1"},920 {921 "type": "tool_use",922 "id": "13.2",923 "name": "13.3",924 "input": {"13.4": "13.5"},925 },926 ]927 ),928 HumanMessage(content="14.1"),929 AIMessage(930 content=[{"type": "text", "text": "15.1"}],931 name="15.6",932 tool_calls=[933 {934 "name": "15.4",935 "args": {"15.2": "15.3"},936 "id": "15.5",937 "type": "tool_call",938 }939 ],940 ),941 ]942 actual = convert_to_messages(message_like)943 assert expected == actual944945946def test_convert_to_messages_openai_refusal() -> None:947 actual = convert_to_messages(948 [{"role": "assistant", "content": "", "refusal": "9.1"}]949 )950 expected = [AIMessage("", additional_kwargs={"refusal": "9.1"})]951 assert actual == expected952953 # Raises error if content is missing.954 with pytest.raises(955 ValueError, match="Message dict must contain 'role' and 'content' keys"956 ):957 convert_to_messages([{"role": "assistant", "refusal": "9.1"}])958959960def create_image_data() -> str:961 return "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAABAAEDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD3+iiigD//2Q==" # noqa: E501962963964def create_base64_image(image_format: str = "jpeg") -> str:965 data = create_image_data()966 return f"data:image/{image_format};base64,{data}"967968969def test_convert_to_openai_messages_string() -> None:970 message = "Hello"971 result = convert_to_openai_messages(message)972 assert result == {"role": "user", "content": "Hello"}973974975def test_convert_to_openai_messages_single_message() -> None:976 message: BaseMessage = HumanMessage(content="Hello")977 result = convert_to_openai_messages(message)978 assert result == {"role": "user", "content": "Hello"}979980 # Test IDs981 result = convert_to_openai_messages(message, include_id=True)982 assert result == {"role": "user", "content": "Hello"} # no ID983984 message = AIMessage(content="Hello", id="resp_123")985 result = convert_to_openai_messages(message)986 assert result == {"role": "assistant", "content": "Hello"}987988 result = convert_to_openai_messages(message, include_id=True)989 assert result == {"role": "assistant", "content": "Hello", "id": "resp_123"}990991992def test_convert_to_openai_messages_multiple_messages() -> None:993 messages = [994 SystemMessage(content="System message"),995 HumanMessage(content="Human message"),996 AIMessage(content="AI message"),997 ]998 result = convert_to_openai_messages(messages)999 expected = [1000 {"role": "system", "content": "System message"},1001 {"role": "user", "content": "Human message"},1002 {"role": "assistant", "content": "AI message"},1003 ]1004 assert result == expected100510061007def test_convert_to_openai_messages_openai_string() -> None:1008 messages = [1009 HumanMessage(1010 content=[1011 {"type": "text", "text": "Hello"},1012 {"type": "text", "text": "World"},1013 ]1014 ),1015 AIMessage(1016 content=[{"type": "text", "text": "Hi"}, {"type": "text", "text": "there"}]1017 ),1018 ]1019 result = convert_to_openai_messages(messages)1020 expected = [1021 {"role": "user", "content": "Hello\nWorld"},1022 {"role": "assistant", "content": "Hi\nthere"},1023 ]1024 assert result == expected102510261027def test_convert_to_openai_messages_openai_block() -> None:1028 messages = [HumanMessage(content="Hello"), AIMessage(content="Hi there")]1029 result = convert_to_openai_messages(messages, text_format="block")1030 expected = [1031 {"role": "user", "content": [{"type": "text", "text": "Hello"}]},1032 {"role": "assistant", "content": [{"type": "text", "text": "Hi there"}]},1033 ]1034 assert result == expected103510361037def test_convert_to_openai_messages_invalid_format() -> None:1038 with pytest.raises(ValueError, match="Unrecognized text_format="):1039 convert_to_openai_messages( # type: ignore[call-overload]1040 [HumanMessage(content="Hello")],1041 text_format="invalid",1042 )104310441045def test_convert_to_openai_messages_openai_image() -> None:1046 base64_image = create_base64_image()1047 messages = [1048 HumanMessage(1049 content=[1050 {"type": "text", "text": "Here's an image:"},1051 {"type": "image_url", "image_url": {"url": base64_image}},1052 ]1053 )1054 ]1055 result = convert_to_openai_messages(messages, text_format="block")1056 expected = [1057 {1058 "role": "user",1059 "content": [1060 {"type": "text", "text": "Here's an image:"},1061 {"type": "image_url", "image_url": {"url": base64_image}},1062 ],1063 }1064 ]1065 assert result == expected106610671068def test_convert_to_openai_messages_anthropic() -> None:1069 image_data = create_image_data()1070 messages = [1071 HumanMessage(1072 content=[1073 {1074 "type": "text",1075 "text": "Here's an image:",1076 "cache_control": {"type": "ephemeral"},1077 },1078 {1079 "type": "image",1080 "source": {1081 "type": "base64",1082 "media_type": "image/jpeg",1083 "data": image_data,1084 },1085 },1086 ]1087 ),1088 AIMessage(1089 content=[1090 {"type": "tool_use", "name": "foo", "input": {"bar": "baz"}, "id": "1"}1091 ]1092 ),1093 HumanMessage(1094 content=[1095 {1096 "type": "tool_result",1097 "tool_use_id": "1",1098 "is_error": False,1099 "content": [1100 {1101 "type": "image",1102 "source": {1103 "type": "base64",1104 "media_type": "image/jpeg",1105 "data": image_data,1106 },1107 },1108 ],1109 }1110 ]1111 ),1112 ]1113 result = convert_to_openai_messages(messages)1114 expected = [1115 {1116 "role": "user",1117 "content": [1118 {"type": "text", "text": "Here's an image:"},1119 {"type": "image_url", "image_url": {"url": create_base64_image()}},1120 ],1121 },1122 {1123 "role": "assistant",1124 "content": "",1125 "tool_calls": [1126 {1127 "type": "function",1128 "function": {1129 "name": "foo",1130 "arguments": json.dumps({"bar": "baz"}),1131 },1132 "id": "1",1133 }1134 ],1135 },1136 {1137 "role": "tool",1138 "content": [1139 {"type": "image_url", "image_url": {"url": create_base64_image()}}1140 ],1141 "tool_call_id": "1",1142 },1143 ]1144 assert result == expected11451146 # Test thinking blocks (pass through)1147 thinking_block = {1148 "signature": "abc123",1149 "thinking": "Thinking text.",1150 "type": "thinking",1151 }1152 text_block = {"text": "Response text.", "type": "text"}1153 messages = [AIMessage([thinking_block, text_block])]1154 result = convert_to_openai_messages(messages)1155 expected = [{"role": "assistant", "content": [thinking_block, text_block]}]1156 assert result == expected115711581159def test_convert_to_openai_messages_bedrock_converse_image() -> None:1160 image_data = create_image_data()1161 messages = [1162 HumanMessage(1163 content=[1164 {"type": "text", "text": "Here's an image:"},1165 {1166 "image": {1167 "format": "jpeg",1168 "source": {"bytes": base64.b64decode(image_data)},1169 }1170 },1171 ]1172 )1173 ]1174 result = convert_to_openai_messages(messages)1175 assert result[0]["content"][1]["type"] == "image_url"1176 assert result[0]["content"][1]["image_url"]["url"] == create_base64_image()117711781179def test_convert_to_openai_messages_vertexai_image() -> None:1180 image_data = create_image_data()1181 messages = [1182 HumanMessage(1183 content=[1184 {"type": "text", "text": "Here's an image:"},1185 {1186 "type": "media",1187 "mime_type": "image/jpeg",1188 "data": base64.b64decode(image_data),1189 },1190 ]1191 )1192 ]1193 result = convert_to_openai_messages(messages)1194 assert result[0]["content"][1]["type"] == "image_url"1195 assert result[0]["content"][1]["image_url"]["url"] == create_base64_image()119611971198def test_convert_to_openai_messages_tool_message() -> None:1199 tool_message = ToolMessage(content="Tool result", tool_call_id="123")1200 result = convert_to_openai_messages([tool_message], text_format="block")1201 assert len(result) == 11202 assert result[0]["content"] == [{"type": "text", "text": "Tool result"}]1203 assert result[0]["tool_call_id"] == "123"120412051206def test_convert_to_openai_messages_tool_use() -> None:1207 messages = [1208 AIMessage(1209 content=[1210 {1211 "type": "tool_use",1212 "id": "123",1213 "name": "calculator",1214 "input": {"a": "b"},1215 }1216 ]1217 )1218 ]1219 result = convert_to_openai_messages(messages, text_format="block")1220 assert result[0]["tool_calls"][0]["type"] == "function"1221 assert result[0]["tool_calls"][0]["id"] == "123"1222 assert result[0]["tool_calls"][0]["function"]["name"] == "calculator"1223 assert result[0]["tool_calls"][0]["function"]["arguments"] == json.dumps({"a": "b"})122412251226def test_convert_to_openai_messages_tool_use_unicode() -> None:1227 """Test that Unicode characters in tool call args are preserved correctly."""1228 messages = [1229 AIMessage(1230 content=[1231 {1232 "type": "tool_use",1233 "id": "123",1234 "name": "create_customer",1235 "input": {"customer_name": "你好啊集团"},1236 }1237 ]1238 )1239 ]1240 result = convert_to_openai_messages(messages, text_format="block")1241 assert result[0]["tool_calls"][0]["type"] == "function"1242 assert result[0]["tool_calls"][0]["id"] == "123"1243 assert result[0]["tool_calls"][0]["function"]["name"] == "create_customer"1244 # Ensure Unicode characters are preserved, not escaped as \\uXXXX1245 arguments_str = result[0]["tool_calls"][0]["function"]["arguments"]1246 parsed_args = json.loads(arguments_str)1247 assert parsed_args["customer_name"] == "你好啊集团"1248 # Also ensure the raw JSON string contains Unicode, not escaped sequences1249 assert "你好啊集团" in arguments_str1250 assert "\\u4f60" not in arguments_str # Should not contain escaped Unicode125112521253def test_convert_to_openai_messages_json() -> None:1254 json_data = {"key": "value"}1255 messages = [HumanMessage(content=[{"type": "json", "json": json_data}])]1256 result = convert_to_openai_messages(messages, text_format="block")1257 assert result[0]["content"][0]["type"] == "text"1258 assert json.loads(result[0]["content"][0]["text"]) == json_data125912601261def test_convert_to_openai_messages_guard_content() -> None:1262 messages = [1263 HumanMessage(1264 content=[1265 {1266 "type": "guard_content",1267 "guard_content": {"text": "Protected content"},1268 }1269 ]1270 )1271 ]1272 result = convert_to_openai_messages(messages, text_format="block")1273 assert result[0]["content"][0]["type"] == "text"1274 assert result[0]["content"][0]["text"] == "Protected content"127512761277def test_convert_to_openai_messages_invalid_block() -> None:1278 messages = [HumanMessage(content=[{"type": "invalid", "foo": "bar"}])]1279 with pytest.raises(ValueError, match="Unrecognized content block"):1280 convert_to_openai_messages(1281 messages,1282 text_format="block",1283 pass_through_unknown_blocks=False,1284 )1285 # Accept by default1286 result = convert_to_openai_messages(messages, text_format="block")1287 assert result == [{"role": "user", "content": [{"type": "invalid", "foo": "bar"}]}]128812891290def test_handle_openai_responses_blocks() -> None:1291 blocks: str | list[str | dict] = [1292 {"type": "reasoning", "id": "1"},1293 {1294 "type": "function_call",1295 "name": "multiply",1296 "arguments": '{"x":5,"y":4}',1297 "call_id": "call_abc123",1298 "id": "fc_abc123",1299 "status": "completed",1300 },1301 ]1302 message = AIMessage(content=blocks)13031304 expected_tool_call = {1305 "type": "function",1306 "function": {1307 "name": "multiply",1308 "arguments": '{"x":5,"y":4}',1309 },1310 "id": "call_abc123",1311 }1312 result = convert_to_openai_messages(message)1313 assert isinstance(result, dict)1314 assert result["content"] == blocks1315 assert result["tool_calls"] == [expected_tool_call]13161317 result = convert_to_openai_messages(message, pass_through_unknown_blocks=False)1318 assert isinstance(result, dict)1319 assert result["content"] == [{"type": "reasoning", "id": "1"}]1320 assert result["tool_calls"] == [expected_tool_call]132113221323def test_convert_to_openai_messages_empty_message() -> None:1324 result = convert_to_openai_messages(HumanMessage(content=""))1325 assert result == {"role": "user", "content": ""}132613271328def test_convert_to_openai_messages_empty_list() -> None:1329 result = convert_to_openai_messages([])1330 assert result == []133113321333def test_convert_to_openai_messages_mixed_content_types() -> None:1334 messages = [1335 HumanMessage(1336 content=[1337 "Text message",1338 {"type": "text", "text": "Structured text"},1339 {"type": "image_url", "image_url": {"url": create_base64_image()}},1340 ]1341 )1342 ]1343 result = convert_to_openai_messages(messages, text_format="block")1344 assert len(result[0]["content"]) == 31345 assert isinstance(result[0]["content"][0], dict)1346 assert isinstance(result[0]["content"][1], dict)1347 assert isinstance(result[0]["content"][2], dict)134813491350def test_convert_to_openai_messages_developer() -> None:1351 messages: list[MessageLikeRepresentation] = [1352 SystemMessage("a", additional_kwargs={"__openai_role__": "developer"}),1353 {"role": "developer", "content": "a"},1354 ]1355 result = convert_to_openai_messages(messages)1356 assert result == [{"role": "developer", "content": "a"}] * 2135713581359def test_convert_to_openai_messages_multimodal() -> None:1360 """v0 and v1 content to OpenAI messages conversion."""1361 messages = [1362 HumanMessage(1363 content=[1364 # Prior v0 blocks1365 {"type": "text", "text": "Text message"},1366 {1367 "type": "image",1368 "url": "https://example.com/test.png",1369 },1370 {1371 "type": "image",1372 "source_type": "base64",1373 "data": "<base64 string>",1374 "mime_type": "image/png",1375 },1376 {1377 "type": "file",1378 "source_type": "base64",1379 "data": "<base64 string>",1380 "mime_type": "application/pdf",1381 "filename": "test.pdf",1382 },1383 {1384 # OpenAI Chat Completions file format1385 "type": "file",1386 "file": {1387 "filename": "draconomicon.pdf",1388 "file_data": "data:application/pdf;base64,<base64 string>",1389 },1390 },1391 {1392 "type": "file",1393 "source_type": "id",1394 "id": "file-abc123",1395 },1396 {1397 "type": "audio",1398 "source_type": "base64",1399 "data": "<base64 string>",1400 "mime_type": "audio/wav",1401 },1402 {1403 "type": "input_audio",1404 "input_audio": {1405 "data": "<base64 string>",1406 "format": "wav",1407 },1408 },1409 # v1 Additions1410 {1411 "type": "image",1412 "source_type": "url", # backward compatibility v0 block field1413 "url": "https://example.com/test.png",1414 },1415 {1416 "type": "image",1417 "base64": "<base64 string>",1418 "mime_type": "image/png",1419 },1420 {1421 "type": "file",1422 "base64": "<base64 string>",1423 "mime_type": "application/pdf",1424 "filename": "test.pdf", # backward compatibility v0 block field1425 },1426 {1427 "type": "file",1428 "file_id": "file-abc123",1429 },1430 {1431 "type": "audio",1432 "base64": "<base64 string>",1433 "mime_type": "audio/wav",1434 },1435 ]1436 )1437 ]1438 result = convert_to_openai_messages(messages, text_format="block")1439 assert len(result) == 11440 message = result[0]1441 assert len(message["content"]) == 1314421443 # Test auto-adding filename1444 messages = [1445 HumanMessage(1446 content=[1447 {1448 "type": "file",1449 "base64": "<base64 string>",1450 "mime_type": "application/pdf",1451 },1452 ]1453 )1454 ]1455 with pytest.warns(match="filename"):1456 result = convert_to_openai_messages(messages, text_format="block")1457 assert len(result) == 11458 message = result[0]1459 assert len(message["content"]) == 11460 block = message["content"][0]1461 assert block == {1462 # OpenAI Chat Completions file format1463 "type": "file",1464 "file": {1465 "file_data": "data:application/pdf;base64,<base64 string>",1466 "filename": "LC_AUTOGENERATED",1467 },1468 }146914701471def test_count_tokens_approximately_empty_messages() -> None:1472 # Test with empty message list1473 assert count_tokens_approximately([]) == 014741475 # Test with empty content1476 messages = [HumanMessage(content="")]1477 # 4 role chars -> 1 + 3 = 4 tokens1478 assert count_tokens_approximately(messages) == 4147914801481def test_count_tokens_approximately_with_names() -> None:1482 messages = [1483 # 5 chars + 4 role chars -> 3 + 3 = 6 tokens1484 # (with name: extra 4 name chars, so total = 4 + 3 = 7 tokens)1485 HumanMessage(content="Hello", name="user"),1486 # 8 chars + 9 role chars -> 5 + 3 = 8 tokens1487 # (with name: extra 9 name chars, so total = 7 + 3 = 10 tokens)1488 AIMessage(content="Hi there", name="assistant"),1489 ]1490 # With names included (default)1491 assert count_tokens_approximately(messages) == 1714921493 # Without names1494 without_names = count_tokens_approximately(messages, count_name=False)1495 assert without_names == 14149614971498def test_count_tokens_approximately_openai_format() -> None:1499 # same as test_count_tokens_approximately_with_names, but in OpenAI format1500 messages = [1501 {"role": "user", "content": "Hello", "name": "user"},1502 {"role": "assistant", "content": "Hi there", "name": "assistant"},1503 ]1504 # With names included (default)1505 assert count_tokens_approximately(messages) == 1715061507 # Without names1508 without_names = count_tokens_approximately(messages, count_name=False)1509 assert without_names == 14151015111512def test_count_tokens_approximately_string_content() -> None:1513 messages = [1514 # 5 chars + 4 role chars -> 3 + 3 = 6 tokens1515 HumanMessage(content="Hello"),1516 # 8 chars + 9 role chars -> 5 + 3 = 8 tokens1517 AIMessage(content="Hi there"),1518 # 12 chars + 4 role chars -> 4 + 3 = 7 tokens1519 HumanMessage(content="How are you?"),1520 ]1521 assert count_tokens_approximately(messages) == 21152215231524def test_count_tokens_approximately_list_content() -> None:1525 messages = [1526 # '[{"foo": "bar"}]' -> 16 chars + 4 role chars -> 5 + 3 = 8 tokens1527 HumanMessage(content=[{"foo": "bar"}]),1528 # '[{"test": 123}]' -> 15 chars + 9 role chars -> 6 + 3 = 9 tokens1529 AIMessage(content=[{"test": 123}]),1530 ]1531 assert count_tokens_approximately(messages) == 17153215331534def test_count_tokens_approximately_tool_calls() -> None:1535 tool_calls = [{"name": "test_tool", "args": {"foo": "bar"}, "id": "1"}]1536 messages = [1537 # tool calls json -> 79 chars + 9 role chars -> 22 + 3 = 25 tokens1538 AIMessage(content="", tool_calls=tool_calls),1539 # 15 chars + 4 role chars -> 5 + 3 = 8 tokens1540 HumanMessage(content="Regular message"),1541 ]1542 assert count_tokens_approximately(messages) == 331543 # AI message w/ both content and tool calls1544 # 94 chars + 9 role chars -> 26 + 3 = 29 tokens1545 messages = [1546 AIMessage(content="Regular message", tool_calls=tool_calls),1547 ]1548 assert count_tokens_approximately(messages) == 29154915501551def test_count_tokens_approximately_custom_token_length() -> None:1552 messages = [1553 # 11 chars + 4 role chars -> (4 tokens of length 4 / 8 tokens of length 2) + 31554 HumanMessage(content="Hello world"),1555 # 7 chars + 9 role chars -> (4 tokens of length 4 / 8 tokens of length 2) + 31556 AIMessage(content="Testing"),1557 ]1558 assert count_tokens_approximately(messages, chars_per_token=4) == 141559 assert count_tokens_approximately(messages, chars_per_token=2) == 22156015611562def test_count_tokens_approximately_large_message_content() -> None:1563 # Test with large content to ensure no issues1564 large_text = "x" * 100001565 messages = [HumanMessage(content=large_text)]1566 # 10,000 chars + 4 role chars -> 2501 + 3 = 2504 tokens1567 assert count_tokens_approximately(messages) == 2504156815691570def test_count_tokens_approximately_large_number_of_messages() -> None:1571 # Test with large content to ensure no issues1572 messages = [HumanMessage(content="x")] * 1_0001573 # 1 chars + 4 role chars -> 2 + 3 = 5 tokens1574 assert count_tokens_approximately(messages) == 5_000157515761577def test_count_tokens_approximately_mixed_content_types() -> None:1578 # Test with a variety of content types in the same message list1579 tool_calls = [{"name": "test_tool", "args": {"foo": "bar"}, "id": "1"}]1580 messages = [1581 # 13 chars + 6 role chars -> 5 + 3 = 8 tokens1582 SystemMessage(content="System prompt"),1583 # '[{"foo": "bar"}]' -> 16 chars + 4 role chars -> 5 + 3 = 8 tokens1584 HumanMessage(content=[{"foo": "bar"}]),1585 # tool calls json -> 79 chars + 9 role chars -> 22 + 3 = 25 tokens1586 AIMessage(content="", tool_calls=tool_calls),1587 # 13 chars + 4 role chars + 9 name chars + 1 tool call ID char ->1588 # 7 + 3 = 10 tokens1589 ToolMessage(content="Tool response", name="test_tool", tool_call_id="1"),1590 ]1591 token_count = count_tokens_approximately(messages)1592 assert token_count == 5115931594 # Ensure that count is consistent if we do one message at a time1595 assert sum(count_tokens_approximately([m]) for m in messages) == token_count159615971598def test_count_tokens_approximately_usage_metadata_scaling() -> None:1599 messages = [1600 HumanMessage("text"),1601 AIMessage(1602 "text",1603 response_metadata={"model_provider": "openai"},1604 usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 100},1605 ),1606 HumanMessage("text"),1607 AIMessage(1608 "text",1609 response_metadata={"model_provider": "openai"},1610 usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 200},1611 ),1612 ]16131614 unscaled = count_tokens_approximately(messages)1615 scaled = count_tokens_approximately(messages, use_usage_metadata_scaling=True)16161617 ratio = scaled / unscaled1618 assert 1 <= round(ratio, 1) <= 1.2 # we ceil scale token counts, so can be > 1.216191620 messages.extend([ToolMessage("text", tool_call_id="abc123")] * 3)16211622 unscaled_extended = count_tokens_approximately(messages)1623 scaled_extended = count_tokens_approximately(1624 messages, use_usage_metadata_scaling=True1625 )16261627 # scaling should still be based on the most recent AIMessage with total_tokens=2001628 assert unscaled_extended > unscaled1629 assert scaled_extended > scaled16301631 # And the scaled total should be the unscaled total multiplied by the same ratio.1632 # ratio = 200 / unscaled (as of last AI message)1633 expected_scaled_extended = math.ceil(unscaled_extended * ratio)1634 assert scaled_extended <= expected_scaled_extended <= scaled_extended + 1163516361637def test_count_tokens_approximately_usage_metadata_scaling_model_provider() -> None:1638 messages = [1639 HumanMessage("Hello"),1640 AIMessage(1641 "Hi",1642 response_metadata={"model_provider": "openai"},1643 usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 100},1644 ),1645 HumanMessage("More text"),1646 AIMessage(1647 "More response",1648 response_metadata={"model_provider": "anthropic"},1649 usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 200},1650 ),1651 ]16521653 unscaled = count_tokens_approximately(messages)1654 scaled = count_tokens_approximately(messages, use_usage_metadata_scaling=True)1655 assert scaled == unscaled165616571658def test_count_tokens_approximately_usage_metadata_scaling_total_tokens() -> None:1659 messages = [1660 HumanMessage("Hello"),1661 AIMessage(1662 "Hi",1663 response_metadata={"model_provider": "openai"},1664 # no usage metadata -> skip1665 ),1666 ]16671668 unscaled = count_tokens_approximately(messages, chars_per_token=5)1669 scaled = count_tokens_approximately(1670 messages, chars_per_token=5, use_usage_metadata_scaling=True1671 )16721673 assert scaled == unscaled167416751676def test_count_tokens_approximately_usage_metadata_scaling_floor_at_one() -> None:1677 messages = [1678 HumanMessage("text"),1679 AIMessage(1680 "text",1681 response_metadata={"model_provider": "openai"},1682 # Set total_tokens lower than the approximate count up through this message.1683 usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 1},1684 ),1685 HumanMessage("text"),1686 ]16871688 unscaled = count_tokens_approximately(messages)1689 scaled = count_tokens_approximately(messages, use_usage_metadata_scaling=True)16901691 # scale factor would be < 1, but we floor it at 1.0 to avoid decreasing counts1692 assert scaled == unscaled169316941695def test_get_buffer_string_with_structured_content() -> None:1696 """Test get_buffer_string with structured content in messages."""1697 messages = [1698 HumanMessage(content=[{"type": "text", "text": "Hello, world!"}]),1699 AIMessage(content=[{"type": "text", "text": "Hi there!"}]),1700 SystemMessage(content=[{"type": "text", "text": "System message"}]),1701 ]1702 expected = "Human: Hello, world!\nAI: Hi there!\nSystem: System message"1703 actual = get_buffer_string(messages)1704 assert actual == expected170517061707def test_get_buffer_string_with_mixed_content() -> None:1708 """Test get_buffer_string with mixed content types in messages."""1709 messages = [1710 HumanMessage(content="Simple text"),1711 AIMessage(content=[{"type": "text", "text": "Structured text"}]),1712 SystemMessage(content=[{"type": "text", "text": "Another structured text"}]),1713 ]1714 expected = (1715 "Human: Simple text\nAI: Structured text\nSystem: Another structured text"1716 )1717 actual = get_buffer_string(messages)1718 assert actual == expected171917201721def test_get_buffer_string_with_function_call() -> None:1722 """Test get_buffer_string with function call in additional_kwargs."""1723 messages = [1724 HumanMessage(content="Hello"),1725 AIMessage(1726 content="Hi",1727 additional_kwargs={1728 "function_call": {1729 "name": "test_function",1730 "arguments": '{"arg": "value"}',1731 }1732 },1733 ),1734 ]1735 # TODO: consider changing this1736 expected = (1737 "Human: Hello\n"1738 "AI: Hi{'name': 'test_function', 'arguments': '{\"arg\": \"value\"}'}"1739 )1740 actual = get_buffer_string(messages)1741 assert actual == expected174217431744def test_get_buffer_string_with_empty_content() -> None:1745 """Test get_buffer_string with empty content in messages."""1746 messages = [1747 HumanMessage(content=[]),1748 AIMessage(content=""),1749 SystemMessage(content=[]),1750 ]1751 expected = "Human: \nAI: \nSystem: "1752 actual = get_buffer_string(messages)1753 assert actual == expected175417551756def test_get_buffer_string_with_tool_calls() -> None:1757 """Test `get_buffer_string` with `tool_calls` field."""1758 messages = [1759 HumanMessage(content="What's the weather?"),1760 AIMessage(1761 content="Let me check the weather",1762 tool_calls=[1763 {1764 "name": "get_weather",1765 "args": {"city": "NYC"},1766 "id": "call_1",1767 "type": "tool_call",1768 }1769 ],1770 ),1771 ]1772 result = get_buffer_string(messages)1773 assert "Human: What's the weather?" in result1774 assert "AI: Let me check the weather" in result1775 assert "get_weather" in result1776 assert "NYC" in result177717781779def test_get_buffer_string_with_tool_calls_empty_content() -> None:1780 """Test `get_buffer_string` with `tool_calls` and empty `content`."""1781 messages = [1782 AIMessage(1783 content="",1784 tool_calls=[1785 {1786 "name": "search",1787 "args": {"query": "test"},1788 "id": "call_2",1789 "type": "tool_call",1790 }1791 ],1792 ),1793 ]1794 result = get_buffer_string(messages)1795 assert "AI: " in result1796 assert "search" in result179717981799def test_get_buffer_string_tool_calls_preferred_over_function_call() -> None:1800 """Test that `tool_calls` takes precedence over legacy `function_call`."""1801 messages = [1802 AIMessage(1803 content="Calling tools",1804 tool_calls=[1805 {1806 "name": "modern_tool",1807 "args": {"key": "value"},1808 "id": "call_3",1809 "type": "tool_call",1810 }1811 ],1812 additional_kwargs={1813 "function_call": {"name": "legacy_function", "arguments": "{}"}1814 },1815 ),1816 ]1817 result = get_buffer_string(messages)1818 assert "modern_tool" in result1819 assert "legacy_function" not in result182018211822def test_convert_to_openai_messages_reasoning_content() -> None:1823 """Test convert_to_openai_messages with reasoning content blocks."""1824 # Test reasoning block with empty summary1825 msg = AIMessage(content=[{"type": "reasoning", "summary": []}])1826 result = convert_to_openai_messages(msg, text_format="block")1827 expected = {"role": "assistant", "content": [{"type": "reasoning", "summary": []}]}1828 assert result == expected18291830 # Test reasoning block with summary content1831 msg_with_summary = AIMessage(1832 content=[1833 {1834 "type": "reasoning",1835 "summary": [1836 {"type": "text", "text": "First thought"},1837 {"type": "text", "text": "Second thought"},1838 ],1839 }1840 ]1841 )1842 result_with_summary = convert_to_openai_messages(1843 msg_with_summary, text_format="block"1844 )1845 expected_with_summary = {1846 "role": "assistant",1847 "content": [1848 {1849 "type": "reasoning",1850 "summary": [1851 {"type": "text", "text": "First thought"},1852 {"type": "text", "text": "Second thought"},1853 ],1854 }1855 ],1856 }1857 assert result_with_summary == expected_with_summary18581859 # Test mixed content with reasoning and text1860 mixed_msg = AIMessage(1861 content=[1862 {"type": "text", "text": "Regular response"},1863 {1864 "type": "reasoning",1865 "summary": [{"type": "text", "text": "My reasoning process"}],1866 },1867 ]1868 )1869 mixed_result = convert_to_openai_messages(mixed_msg, text_format="block")1870 expected_mixed = {1871 "role": "assistant",1872 "content": [1873 {"type": "text", "text": "Regular response"},1874 {1875 "type": "reasoning",1876 "summary": [{"type": "text", "text": "My reasoning process"}],1877 },1878 ],1879 }1880 assert mixed_result == expected_mixed188118821883# Tests for get_buffer_string XML format188418851886def test_get_buffer_string_xml_empty_messages_list() -> None:1887 """Test XML format with empty messages list."""1888 messages: list[BaseMessage] = []1889 result = get_buffer_string(messages, format="xml")1890 expected = ""1891 assert result == expected189218931894def test_get_buffer_string_xml_basic() -> None:1895 """Test XML format output with all message types."""1896 messages = [1897 SystemMessage(content="System message"),1898 HumanMessage(content="Human message"),1899 AIMessage(content="AI message"),1900 FunctionMessage(content="Function result", name="test_fn"),1901 ToolMessage(content="Tool result", tool_call_id="123"),1902 ]1903 result = get_buffer_string(messages, format="xml")1904 expected = (1905 '<message type="system">System message</message>\n'1906 '<message type="human">Human message</message>\n'1907 '<message type="ai">AI message</message>\n'1908 '<message type="function">Function result</message>\n'1909 '<message type="tool">Tool result</message>'1910 )1911 assert result == expected191219131914def test_get_buffer_string_xml_custom_prefixes() -> None:1915 """Test XML format with custom human and ai prefixes."""1916 messages = [1917 HumanMessage(content="Hello"),1918 AIMessage(content="Hi there"),1919 ]1920 result = get_buffer_string(1921 messages, human_prefix="User", ai_prefix="Assistant", format="xml"1922 )1923 expected = (1924 '<message type="user">Hello</message>\n'1925 '<message type="assistant">Hi there</message>'1926 )1927 assert result == expected192819291930def test_get_buffer_string_xml_custom_separator() -> None:1931 """Test XML format with custom message separator."""1932 messages = [1933 HumanMessage(content="Hello"),1934 AIMessage(content="Hi there"),1935 ]1936 result = get_buffer_string(messages, format="xml", message_separator="\n\n")1937 expected = (1938 '<message type="human">Hello</message>\n\n<message type="ai">Hi there</message>'1939 )1940 assert result == expected194119421943def test_get_buffer_string_prefix_custom_separator() -> None:1944 """Test prefix format with custom message separator."""1945 messages = [1946 HumanMessage(content="Hello"),1947 AIMessage(content="Hi there"),1948 ]1949 result = get_buffer_string(messages, format="prefix", message_separator=" | ")1950 expected = "Human: Hello | AI: Hi there"1951 assert result == expected195219531954def test_get_buffer_string_xml_escaping() -> None:1955 """Test XML format properly escapes special characters in content."""1956 messages = [1957 HumanMessage(content="Is 5 < 10 & 10 > 5?"),1958 AIMessage(content='Yes, and here\'s a "quote"'),1959 ]1960 result = get_buffer_string(messages, format="xml")1961 # xml.sax.saxutils.escape escapes <, >, & (not quotes in content)1962 expected = (1963 '<message type="human">Is 5 < 10 & 10 > 5?</message>\n'1964 '<message type="ai">Yes, and here\'s a "quote"</message>'1965 )1966 assert result == expected196719681969def test_get_buffer_string_xml_unicode_content() -> None:1970 """Test XML format with Unicode content."""1971 messages = [1972 HumanMessage(content="你好世界"), # Chinese: Hello World1973 AIMessage(content="こんにちは"), # Japanese: Hello1974 ]1975 result = get_buffer_string(messages, format="xml")1976 expected = (1977 '<message type="human">你好世界</message>\n'1978 '<message type="ai">こんにちは</message>'1979 )1980 assert result == expected198119821983def test_get_buffer_string_xml_chat_message_valid_role() -> None:1984 """Test XML format with `ChatMessage` having valid XML tag name role."""1985 messages = [1986 ChatMessage(content="Hello", role="Assistant"),1987 ]1988 result = get_buffer_string(messages, format="xml")1989 # Role is used directly as the type attribute value1990 expected = '<message type="Assistant">Hello</message>'1991 assert result == expected19921993 # Spaces in role1994 messages = [1995 ChatMessage(content="Hello", role="my custom role"),1996 ]1997 result = get_buffer_string(messages, format="xml")1998 # Custom roles with spaces use quoteattr for proper escaping1999 expected = '<message type="my custom role">Hello</message>'2000 assert result == expected
Findings
✓ No findings reported for this file.