libs/core/tests/unit_tests/messages/test_utils.py PYTHON 3,107 lines View on github.com → Search inside
File is large — showing lines 1–2,000 of 3,107.
1import base642import json3import math4import re5from collections.abc import Callable, Sequence6from typing import Any, TypedDict78import pytest9from typing_extensions import NotRequired, override1011from langchain_core.language_models.fake_chat_models import FakeChatModel12from langchain_core.messages import (13    AIMessage,14    BaseMessage,15    ChatMessage,16    FunctionMessage,17    HumanMessage,18    SystemMessage,19    ToolCall,20    ToolMessage,21)22from langchain_core.messages.utils import (23    MessageLikeRepresentation,24    convert_to_messages,25    convert_to_openai_messages,26    count_tokens_approximately,27    filter_messages,28    get_buffer_string,29    merge_message_runs,30    trim_messages,31)32from langchain_core.tools import BaseTool, tool333435@pytest.mark.parametrize("msg_cls", [HumanMessage, AIMessage, SystemMessage])36def test_merge_message_runs_str(msg_cls: type[BaseMessage]) -> None:37    messages = [msg_cls("foo"), msg_cls("bar"), msg_cls("baz")]38    messages_model_copy = [m.model_copy(deep=True) for m in messages]39    expected = [msg_cls("foo\nbar\nbaz")]40    actual = merge_message_runs(messages)41    assert actual == expected42    assert messages == messages_model_copy434445@pytest.mark.parametrize("msg_cls", [HumanMessage, AIMessage, SystemMessage])46def test_merge_message_runs_str_with_specified_separator(47    msg_cls: type[BaseMessage],48) -> None:49    messages = [msg_cls("foo"), msg_cls("bar"), msg_cls("baz")]50    messages_model_copy = [m.model_copy(deep=True) for m in messages]51    expected = [msg_cls("foo<sep>bar<sep>baz")]52    actual = merge_message_runs(messages, chunk_separator="<sep>")53    assert actual == expected54    assert messages == messages_model_copy555657@pytest.mark.parametrize("msg_cls", [HumanMessage, AIMessage, SystemMessage])58def test_merge_message_runs_str_without_separator(59    msg_cls: type[BaseMessage],60) -> None:61    messages = [msg_cls("foo"), msg_cls("bar"), msg_cls("baz")]62    messages_model_copy = [m.model_copy(deep=True) for m in messages]63    expected = [msg_cls("foobarbaz")]64    actual = merge_message_runs(messages, chunk_separator="")65    assert actual == expected66    assert messages == messages_model_copy676869def test_merge_message_runs_response_metadata() -> None:70    messages = [71        AIMessage("foo", id="1", response_metadata={"input_tokens": 1}),72        AIMessage("bar", id="2", response_metadata={"input_tokens": 2}),73    ]74    expected = [75        AIMessage(76            "foo\nbar",77            id="1",78            response_metadata={"input_tokens": 1},79        )80    ]81    actual = merge_message_runs(messages)82    assert actual == expected83    # Check it's not mutated84    assert messages[1].response_metadata == {"input_tokens": 2}858687def test_merge_message_runs_content() -> None:88    messages = [89        AIMessage("foo", id="1"),90        AIMessage(91            [92                {"text": "bar", "type": "text"},93                {"image_url": "...", "type": "image_url"},94            ],95            tool_calls=[96                ToolCall(name="foo_tool", args={"x": 1}, id="tool1", type="tool_call")97            ],98            id="2",99        ),100        AIMessage(101            "baz",102            tool_calls=[103                ToolCall(name="foo_tool", args={"x": 5}, id="tool2", type="tool_call")104            ],105            id="3",106        ),107    ]108    messages_model_copy = [m.model_copy(deep=True) for m in messages]109    expected = [110        AIMessage(111            [112                "foo",113                {"text": "bar", "type": "text"},114                {"image_url": "...", "type": "image_url"},115                "baz",116            ],117            tool_calls=[118                ToolCall(name="foo_tool", args={"x": 1}, id="tool1", type="tool_call"),119                ToolCall(name="foo_tool", args={"x": 5}, id="tool2", type="tool_call"),120            ],121            id="1",122        ),123    ]124    actual = merge_message_runs(messages)125    assert actual == expected126    invoked = merge_message_runs().invoke(messages)127    assert actual == invoked128    assert messages == messages_model_copy129130131def test_merge_messages_tool_messages() -> None:132    messages = [133        ToolMessage("foo", tool_call_id="1"),134        ToolMessage("bar", tool_call_id="2"),135    ]136    messages_model_copy = [m.model_copy(deep=True) for m in messages]137    actual = merge_message_runs(messages)138    assert actual == messages139    assert messages == messages_model_copy140141142class FilterFields(TypedDict):143    include_names: NotRequired[Sequence[str]]144    exclude_names: NotRequired[Sequence[str]]145    include_types: NotRequired[Sequence[str | type[BaseMessage]]]146    exclude_types: NotRequired[Sequence[str | type[BaseMessage]]]147    include_ids: NotRequired[Sequence[str]]148    exclude_ids: NotRequired[Sequence[str]]149    exclude_tool_calls: NotRequired[Sequence[str] | bool]150151152@pytest.mark.parametrize(153    "filters",154    [155        {"include_names": ["blur"]},156        {"exclude_names": ["blah"]},157        {"include_ids": ["2"]},158        {"exclude_ids": ["1"]},159        {"include_types": "human"},160        {"include_types": ["human"]},161        {"include_types": HumanMessage},162        {"include_types": [HumanMessage]},163        {"exclude_types": "system"},164        {"exclude_types": ["system"]},165        {"exclude_types": SystemMessage},166        {"exclude_types": [SystemMessage]},167        {"include_names": ["blah", "blur"], "exclude_types": [SystemMessage]},168    ],169)170def test_filter_message(filters: FilterFields) -> None:171    messages = [172        SystemMessage("foo", name="blah", id="1"),173        HumanMessage("bar", name="blur", id="2"),174    ]175    messages_model_copy = [m.model_copy(deep=True) for m in messages]176    expected = messages[1:2]177    actual = filter_messages(messages, **filters)178    assert expected == actual179    invoked = filter_messages(**filters).invoke(messages)180    assert invoked == actual181    assert messages == messages_model_copy182183184def test_filter_message_exclude_tool_calls() -> None:185    tool_calls = [186        {"name": "foo", "id": "1", "args": {}, "type": "tool_call"},187        {"name": "bar", "id": "2", "args": {}, "type": "tool_call"},188    ]189    messages = [190        HumanMessage("foo", name="blah", id="1"),191        AIMessage("foo-response", name="blah", id="2"),192        HumanMessage("bar", name="blur", id="3"),193        AIMessage(194            "bar-response",195            tool_calls=tool_calls,196            id="4",197        ),198        ToolMessage("baz", tool_call_id="1", id="5"),199        ToolMessage("qux", tool_call_id="2", id="6"),200    ]201    messages_model_copy = [m.model_copy(deep=True) for m in messages]202    expected = messages[:3]203204    # test excluding all tool calls205    actual = filter_messages(messages, exclude_tool_calls=True)206    assert expected == actual207208    # test explicitly excluding all tool calls209    actual = filter_messages(messages, exclude_tool_calls=["1", "2"])210    assert expected == actual211212    # test excluding a specific tool call213    expected = messages[:5]214    expected[3] = expected[3].model_copy(update={"tool_calls": [tool_calls[0]]})215    actual = filter_messages(messages, exclude_tool_calls=["2"])216    assert expected == actual217218    # assert that we didn't mutate the original messages219    assert messages == messages_model_copy220221222def test_filter_message_exclude_tool_calls_content_blocks() -> None:223    tool_calls = [224        {"name": "foo", "id": "1", "args": {}, "type": "tool_call"},225        {"name": "bar", "id": "2", "args": {}, "type": "tool_call"},226    ]227    messages = [228        HumanMessage("foo", name="blah", id="1"),229        AIMessage("foo-response", name="blah", id="2"),230        HumanMessage("bar", name="blur", id="3"),231        AIMessage(232            [233                {"text": "bar-response", "type": "text"},234                {"name": "foo", "type": "tool_use", "id": "1"},235                {"name": "bar", "type": "tool_use", "id": "2"},236            ],237            tool_calls=tool_calls,238            id="4",239        ),240        ToolMessage("baz", tool_call_id="1", id="5"),241        ToolMessage("qux", tool_call_id="2", id="6"),242    ]243    messages_model_copy = [m.model_copy(deep=True) for m in messages]244    expected = messages[:3]245246    # test excluding all tool calls247    actual = filter_messages(messages, exclude_tool_calls=True)248    assert expected == actual249250    # test explicitly excluding all tool calls251    actual = filter_messages(messages, exclude_tool_calls=["1", "2"])252    assert expected == actual253254    # test excluding a specific tool call255    expected = messages[:4] + messages[-1:]256    expected[3] = expected[3].model_copy(257        update={258            "tool_calls": [tool_calls[1]],259            "content": [260                {"text": "bar-response", "type": "text"},261                {"name": "bar", "type": "tool_use", "id": "2"},262            ],263        }264    )265    actual = filter_messages(messages, exclude_tool_calls=["1"])266    assert expected == actual267268    # assert that we didn't mutate the original messages269    assert messages == messages_model_copy270271272_MESSAGES_TO_TRIM = [273    SystemMessage("This is a 4 token text."),274    HumanMessage("This is a 4 token text.", id="first"),275    AIMessage(276        [277            {"type": "text", "text": "This is the FIRST 4 token block."},278            {"type": "text", "text": "This is the SECOND 4 token block."},279        ],280        id="second",281    ),282    HumanMessage("This is a 4 token text.", id="third"),283    AIMessage("This is a 4 token text.", id="fourth"),284]285_MESSAGES_TO_TRIM_COPY = [m.model_copy(deep=True) for m in _MESSAGES_TO_TRIM]286287288def test_trim_messages_first_30() -> None:289    expected = [290        SystemMessage("This is a 4 token text."),291        HumanMessage("This is a 4 token text.", id="first"),292    ]293    actual = trim_messages(294        _MESSAGES_TO_TRIM,295        max_tokens=30,296        token_counter=dummy_token_counter,297        strategy="first",298    )299    assert actual == expected300    assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY301302303def test_trim_messages_first_30_allow_partial() -> None:304    expected = [305        SystemMessage("This is a 4 token text."),306        HumanMessage("This is a 4 token text.", id="first"),307        AIMessage(308            [{"type": "text", "text": "This is the FIRST 4 token block."}], id="second"309        ),310    ]311    actual = trim_messages(312        _MESSAGES_TO_TRIM,313        max_tokens=30,314        token_counter=dummy_token_counter,315        strategy="first",316        allow_partial=True,317    )318    assert actual == expected319    assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY320321322def test_trim_messages_first_30_allow_partial_end_on_human() -> None:323    expected = [324        SystemMessage("This is a 4 token text."),325        HumanMessage("This is a 4 token text.", id="first"),326    ]327328    actual = trim_messages(329        _MESSAGES_TO_TRIM,330        max_tokens=30,331        token_counter=dummy_token_counter,332        strategy="first",333        allow_partial=True,334        end_on="human",335    )336    assert actual == expected337    assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY338339340def test_trim_messages_last_30_include_system() -> None:341    expected = [342        SystemMessage("This is a 4 token text."),343        HumanMessage("This is a 4 token text.", id="third"),344        AIMessage("This is a 4 token text.", id="fourth"),345    ]346347    actual = trim_messages(348        _MESSAGES_TO_TRIM,349        max_tokens=30,350        include_system=True,351        token_counter=dummy_token_counter,352        strategy="last",353    )354    assert actual == expected355    assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY356357358def test_trim_messages_last_40_include_system_allow_partial() -> None:359    expected = [360        SystemMessage("This is a 4 token text."),361        AIMessage(362            [363                {"type": "text", "text": "This is the SECOND 4 token block."},364            ],365            id="second",366        ),367        HumanMessage("This is a 4 token text.", id="third"),368        AIMessage("This is a 4 token text.", id="fourth"),369    ]370371    actual = trim_messages(372        _MESSAGES_TO_TRIM,373        max_tokens=40,374        token_counter=dummy_token_counter,375        strategy="last",376        allow_partial=True,377        include_system=True,378    )379380    assert actual == expected381    assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY382383384def test_trim_messages_last_30_include_system_allow_partial_end_on_human() -> None:385    expected = [386        SystemMessage("This is a 4 token text."),387        AIMessage(388            [389                {"type": "text", "text": "This is the SECOND 4 token block."},390            ],391            id="second",392        ),393        HumanMessage("This is a 4 token text.", id="third"),394    ]395396    actual = trim_messages(397        _MESSAGES_TO_TRIM,398        max_tokens=30,399        token_counter=dummy_token_counter,400        strategy="last",401        allow_partial=True,402        include_system=True,403        end_on="human",404    )405406    assert actual == expected407    assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY408409410def test_trim_messages_last_40_include_system_allow_partial_start_on_human() -> None:411    expected = [412        SystemMessage("This is a 4 token text."),413        HumanMessage("This is a 4 token text.", id="third"),414        AIMessage("This is a 4 token text.", id="fourth"),415    ]416417    actual = trim_messages(418        _MESSAGES_TO_TRIM,419        max_tokens=30,420        token_counter=dummy_token_counter,421        strategy="last",422        allow_partial=True,423        include_system=True,424        start_on="human",425    )426427    assert actual == expected428    assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY429430431def test_trim_messages_allow_partial_one_message() -> None:432    expected = [433        HumanMessage("Th", id="third"),434    ]435436    actual = trim_messages(437        [HumanMessage("This is a funky text.", id="third")],438        max_tokens=2,439        token_counter=lambda messages: sum(len(m.content) for m in messages),440        text_splitter=list,441        strategy="first",442        allow_partial=True,443    )444445    assert actual == expected446    assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY447448449def test_trim_messages_last_allow_partial_one_message() -> None:450    expected = [451        HumanMessage("t.", id="third"),452    ]453454    actual = trim_messages(455        [HumanMessage("This is a funky text.", id="third")],456        max_tokens=2,457        token_counter=lambda messages: sum(len(m.content) for m in messages),458        text_splitter=list,459        strategy="last",460        allow_partial=True,461    )462463    assert actual == expected464    assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY465466467def test_trim_messages_allow_partial_text_splitter() -> None:468    expected = [469        HumanMessage("a 4 token text.", id="third"),470        AIMessage("This is a 4 token text.", id="fourth"),471    ]472473    def count_words(msgs: list[BaseMessage]) -> int:474        count = 0475        for msg in msgs:476            if isinstance(msg.content, str):477                count += len(msg.content.split(" "))478            else:479                count += len(480                    " ".join(block["text"] for block in msg.content).split(" ")  # type: ignore[index]481                )482        return count483484    def _split_on_space(text: str) -> list[str]:485        splits = text.split(" ")486        return [s + " " for s in splits[:-1]] + splits[-1:]487488    actual = trim_messages(489        _MESSAGES_TO_TRIM,490        max_tokens=10,491        token_counter=count_words,492        strategy="last",493        allow_partial=True,494        text_splitter=_split_on_space,495    )496    assert actual == expected497    assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY498499500def test_trim_messages_include_system_strategy_last_empty_messages() -> None:501    expected: list[BaseMessage] = []502503    actual = trim_messages(504        max_tokens=10,505        token_counter=dummy_token_counter,506        strategy="last",507        include_system=True,508    ).invoke([])509510    assert actual == expected511512513def test_trim_messages_invoke() -> None:514    actual = trim_messages(max_tokens=10, token_counter=dummy_token_counter).invoke(515        _MESSAGES_TO_TRIM516    )517    expected = trim_messages(518        _MESSAGES_TO_TRIM, max_tokens=10, token_counter=dummy_token_counter519    )520    assert actual == expected521522523def test_trim_messages_bound_model_token_counter() -> None:524    trimmer = trim_messages(525        max_tokens=10,526        token_counter=FakeTokenCountingModel().bind(foo="bar"),  # type: ignore[call-overload]527    )528    trimmer.invoke([HumanMessage("foobar")])529530531def test_trim_messages_bad_token_counter() -> None:532    trimmer = trim_messages(max_tokens=10, token_counter={})  # type: ignore[call-overload]533    with pytest.raises(534        ValueError,535        match=re.escape(536            "'token_counter' expected to be a model that implements "537            "'get_num_tokens_from_messages()' or a function. "538            "Received object of type <class 'dict'>."539        ),540    ):541        trimmer.invoke([HumanMessage("foobar")])542543544def dummy_token_counter(messages: list[BaseMessage]) -> int:545    # treat each message like it adds 3 default tokens at the beginning546    # of the message and at the end of the message. 3 + 4 + 3 = 10 tokens547    # per message.548549    default_content_len = 4550    default_msg_prefix_len = 3551    default_msg_suffix_len = 3552553    count = 0554    for msg in messages:555        if isinstance(msg.content, str):556            count += (557                default_msg_prefix_len + default_content_len + default_msg_suffix_len558            )559        if isinstance(msg.content, list):560            count += (561                default_msg_prefix_len562                + len(msg.content) * default_content_len563                + default_msg_suffix_len564            )565    return count566567568def test_trim_messages_partial_text_splitting() -> None:569    messages = [HumanMessage(content="This is a long message that needs trimming")]570    messages_copy = [m.model_copy(deep=True) for m in messages]571572    def count_characters(msgs: list[BaseMessage]) -> int:573        return sum(len(m.content) if isinstance(m.content, str) else 0 for m in msgs)574575    # Return individual characters to test text splitting576    def char_splitter(text: str) -> list[str]:577        return list(text)578579    result = trim_messages(580        messages,581        max_tokens=10,  # Only allow 10 characters582        token_counter=count_characters,583        strategy="first",584        allow_partial=True,585        text_splitter=char_splitter,586    )587588    assert len(result) == 1589    assert result[0].content == "This is a "  # First 10 characters590    assert messages == messages_copy591592593def test_trim_messages_mixed_content_with_partial() -> None:594    messages = [595        AIMessage(596            content=[597                {"type": "text", "text": "First part of text."},598                {"type": "text", "text": "Second part that should be trimmed."},599            ]600        )601    ]602    messages_copy = [m.model_copy(deep=True) for m in messages]603604    # Count total length of all text parts605    def count_text_length(msgs: list[BaseMessage]) -> int:606        total = 0607        for msg in msgs:608            if isinstance(msg.content, list):609                for block in msg.content:610                    if isinstance(block, dict) and block.get("type") == "text":611                        total += len(block["text"])612            elif isinstance(msg.content, str):613                total += len(msg.content)614        return total615616    result = trim_messages(617        messages,618        max_tokens=20,  # Only allow first text block619        token_counter=count_text_length,620        strategy="first",621        allow_partial=True,622    )623624    assert len(result) == 1625    assert len(result[0].content) == 1626    content = result[0].content[0]627    assert isinstance(content, dict)628    assert content["text"] == "First part of text."629    assert messages == messages_copy630631632def test_trim_messages_exact_token_boundary() -> None:633    messages = [634        SystemMessage(content="10 tokens exactly."),635        HumanMessage(content="Another 10 tokens."),636    ]637638    # First message only639    result1 = trim_messages(640        messages,641        max_tokens=10,  # Exactly the size of first message642        token_counter=dummy_token_counter,643        strategy="first",644    )645    assert len(result1) == 1646    assert result1[0].content == "10 tokens exactly."647648    # Both messages exactly fit649    result2 = trim_messages(650        messages,651        max_tokens=20,  # Exactly the size of both messages652        token_counter=dummy_token_counter,653        strategy="first",654    )655    assert len(result2) == 2656    assert result2 == messages657658659def test_trim_messages_start_on_with_allow_partial() -> None:660    messages = [661        HumanMessage(content="First human message"),662        AIMessage(content="AI response"),663        HumanMessage(content="Second human message"),664    ]665    messages_copy = [m.model_copy(deep=True) for m in messages]666    result = trim_messages(667        messages,668        max_tokens=20,669        token_counter=dummy_token_counter,670        strategy="last",671        allow_partial=True,672        start_on="human",673    )674675    assert len(result) == 1676    assert result[0].content == "Second human message"677    assert messages == messages_copy678679680def test_trim_messages_token_counter_shortcut_approximate() -> None:681    """Test that `'approximate'` shortcut works for `token_counter`."""682    messages = [683        SystemMessage("This is a test message"),684        HumanMessage("Another test message", id="first"),685        AIMessage("AI response here", id="second"),686    ]687    messages_copy = [m.model_copy(deep=True) for m in messages]688689    # Test using the "approximate" shortcut690    result_shortcut = trim_messages(691        messages,692        max_tokens=50,693        token_counter="approximate",694        strategy="last",695    )696697    # Test using count_tokens_approximately directly698    result_direct = trim_messages(699        messages,700        max_tokens=50,701        token_counter=count_tokens_approximately,702        strategy="last",703    )704705    # Both should produce the same result706    assert result_shortcut == result_direct707    assert messages == messages_copy708709710def test_trim_messages_token_counter_shortcut_invalid() -> None:711    """Test that invalid `token_counter` shortcut raises `ValueError`."""712    messages = [713        SystemMessage("This is a test message"),714        HumanMessage("Another test message"),715    ]716717    # Test with invalid shortcut - intentionally passing invalid string to verify718    # runtime error handling for dynamically-constructed inputs719    with pytest.raises(ValueError, match="Invalid token_counter shortcut 'invalid'"):720        trim_messages(  # type: ignore[call-overload]721            messages,722            max_tokens=50,723            token_counter="invalid",724            strategy="last",725        )726727728def test_trim_messages_token_counter_shortcut_with_options() -> None:729    """Test that `'approximate'` shortcut works with different trim options."""730    messages = [731        SystemMessage("System instructions"),732        HumanMessage("First human message", id="first"),733        AIMessage("First AI response", id="ai1"),734        HumanMessage("Second human message", id="second"),735        AIMessage("Second AI response", id="ai2"),736    ]737    messages_copy = [m.model_copy(deep=True) for m in messages]738739    # Test with various options740    result = trim_messages(741        messages,742        max_tokens=100,743        token_counter="approximate",744        strategy="last",745        include_system=True,746        start_on="human",747    )748749    # Should include system message and start on human750    assert len(result) >= 2751    assert isinstance(result[0], SystemMessage)752    assert any(isinstance(msg, HumanMessage) for msg in result[1:])753    assert messages == messages_copy754755756class FakeTokenCountingModel(FakeChatModel):757    @override758    def get_num_tokens_from_messages(759        self,760        messages: list[BaseMessage],761        tools: Sequence[dict[str, Any] | type | Callable[..., Any] | BaseTool]762        | None = None,763    ) -> int:764        return dummy_token_counter(messages)765766767def test_convert_to_messages() -> None:768    message_like: list = [769        # BaseMessage770        SystemMessage("1"),771        SystemMessage("1.1", additional_kwargs={"__openai_role__": "developer"}),772        HumanMessage([{"type": "image_url", "image_url": {"url": "2.1"}}], name="2.2"),773        AIMessage(774            [775                {"type": "text", "text": "3.1"},776                {777                    "type": "tool_use",778                    "id": "3.2",779                    "name": "3.3",780                    "input": {"3.4": "3.5"},781                },782            ]783        ),784        AIMessage(785            [786                {"type": "text", "text": "4.1"},787                {788                    "type": "tool_use",789                    "id": "4.2",790                    "name": "4.3",791                    "input": {"4.4": "4.5"},792                },793            ],794            tool_calls=[795                {796                    "name": "4.3",797                    "args": {"4.4": "4.5"},798                    "id": "4.2",799                    "type": "tool_call",800                }801            ],802        ),803        ToolMessage("5.1", tool_call_id="5.2", name="5.3"),804        # OpenAI dict805        {"role": "system", "content": "6"},806        {"role": "developer", "content": "6.1"},807        {808            "role": "user",809            "content": [{"type": "image_url", "image_url": {"url": "7.1"}}],810            "name": "7.2",811        },812        {813            "role": "assistant",814            "content": [{"type": "text", "text": "8.1"}],815            "tool_calls": [816                {817                    "type": "function",818                    "function": {819                        "arguments": json.dumps({"8.2": "8.3"}),820                        "name": "8.4",821                    },822                    "id": "8.5",823                }824            ],825            "name": "8.6",826        },827        {"role": "tool", "content": "10.1", "tool_call_id": "10.2"},828        # Tuple/List829        ("system", "11.1"),830        ("developer", "11.2"),831        ("human", [{"type": "image_url", "image_url": {"url": "12.1"}}]),832        (833            "ai",834            [835                {"type": "text", "text": "13.1"},836                {837                    "type": "tool_use",838                    "id": "13.2",839                    "name": "13.3",840                    "input": {"13.4": "13.5"},841                },842            ],843        ),844        # String845        "14.1",846        # LangChain dict847        {848            "role": "ai",849            "content": [{"type": "text", "text": "15.1"}],850            "tool_calls": [{"args": {"15.2": "15.3"}, "name": "15.4", "id": "15.5"}],851            "name": "15.6",852        },853    ]854    expected = [855        SystemMessage(content="1"),856        SystemMessage(857            content="1.1", additional_kwargs={"__openai_role__": "developer"}858        ),859        HumanMessage(860            content=[{"type": "image_url", "image_url": {"url": "2.1"}}], name="2.2"861        ),862        AIMessage(863            content=[864                {"type": "text", "text": "3.1"},865                {866                    "type": "tool_use",867                    "id": "3.2",868                    "name": "3.3",869                    "input": {"3.4": "3.5"},870                },871            ]872        ),873        AIMessage(874            content=[875                {"type": "text", "text": "4.1"},876                {877                    "type": "tool_use",878                    "id": "4.2",879                    "name": "4.3",880                    "input": {"4.4": "4.5"},881                },882            ],883            tool_calls=[884                {885                    "name": "4.3",886                    "args": {"4.4": "4.5"},887                    "id": "4.2",888                    "type": "tool_call",889                }890            ],891        ),892        ToolMessage(content="5.1", name="5.3", tool_call_id="5.2"),893        SystemMessage(content="6"),894        SystemMessage(895            content="6.1", additional_kwargs={"__openai_role__": "developer"}896        ),897        HumanMessage(898            content=[{"type": "image_url", "image_url": {"url": "7.1"}}], name="7.2"899        ),900        AIMessage(901            content=[{"type": "text", "text": "8.1"}],902            name="8.6",903            tool_calls=[904                {905                    "name": "8.4",906                    "args": {"8.2": "8.3"},907                    "id": "8.5",908                    "type": "tool_call",909                }910            ],911        ),912        ToolMessage(content="10.1", tool_call_id="10.2"),913        SystemMessage(content="11.1"),914        SystemMessage(915            content="11.2", additional_kwargs={"__openai_role__": "developer"}916        ),917        HumanMessage(content=[{"type": "image_url", "image_url": {"url": "12.1"}}]),918        AIMessage(919            content=[920                {"type": "text", "text": "13.1"},921                {922                    "type": "tool_use",923                    "id": "13.2",924                    "name": "13.3",925                    "input": {"13.4": "13.5"},926                },927            ]928        ),929        HumanMessage(content="14.1"),930        AIMessage(931            content=[{"type": "text", "text": "15.1"}],932            name="15.6",933            tool_calls=[934                {935                    "name": "15.4",936                    "args": {"15.2": "15.3"},937                    "id": "15.5",938                    "type": "tool_call",939                }940            ],941        ),942    ]943    actual = convert_to_messages(message_like)944    assert expected == actual945946947def test_convert_to_messages_openai_refusal() -> None:948    actual = convert_to_messages(949        [{"role": "assistant", "content": "", "refusal": "9.1"}]950    )951    expected = [AIMessage("", additional_kwargs={"refusal": "9.1"})]952    assert actual == expected953954    # Raises error if content is missing.955    with pytest.raises(956        ValueError, match="Message dict must contain 'role' and 'content' keys"957    ):958        convert_to_messages([{"role": "assistant", "refusal": "9.1"}])959960961def create_image_data() -> str:962    return "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAABAAEDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD3+iiigD//2Q=="  # noqa: E501963964965def create_base64_image(image_format: str = "jpeg") -> str:966    data = create_image_data()967    return f"data:image/{image_format};base64,{data}"968969970def test_convert_to_openai_messages_string() -> None:971    message = "Hello"972    result = convert_to_openai_messages(message)973    assert result == {"role": "user", "content": "Hello"}974975976def test_convert_to_openai_messages_single_message() -> None:977    message: BaseMessage = HumanMessage(content="Hello")978    result = convert_to_openai_messages(message)979    assert result == {"role": "user", "content": "Hello"}980981    # Test IDs982    result = convert_to_openai_messages(message, include_id=True)983    assert result == {"role": "user", "content": "Hello"}  # no ID984985    message = AIMessage(content="Hello", id="resp_123")986    result = convert_to_openai_messages(message)987    assert result == {"role": "assistant", "content": "Hello"}988989    result = convert_to_openai_messages(message, include_id=True)990    assert result == {"role": "assistant", "content": "Hello", "id": "resp_123"}991992993def test_convert_to_openai_messages_multiple_messages() -> None:994    messages = [995        SystemMessage(content="System message"),996        HumanMessage(content="Human message"),997        AIMessage(content="AI message"),998    ]999    result = convert_to_openai_messages(messages)1000    expected = [1001        {"role": "system", "content": "System message"},1002        {"role": "user", "content": "Human message"},1003        {"role": "assistant", "content": "AI message"},1004    ]1005    assert result == expected100610071008def test_convert_to_openai_messages_openai_string() -> None:1009    messages = [1010        HumanMessage(1011            content=[1012                {"type": "text", "text": "Hello"},1013                {"type": "text", "text": "World"},1014            ]1015        ),1016        AIMessage(1017            content=[{"type": "text", "text": "Hi"}, {"type": "text", "text": "there"}]1018        ),1019    ]1020    result = convert_to_openai_messages(messages)1021    expected = [1022        {"role": "user", "content": "Hello\nWorld"},1023        {"role": "assistant", "content": "Hi\nthere"},1024    ]1025    assert result == expected102610271028def test_convert_to_openai_messages_openai_block() -> None:1029    messages = [HumanMessage(content="Hello"), AIMessage(content="Hi there")]1030    result = convert_to_openai_messages(messages, text_format="block")1031    expected = [1032        {"role": "user", "content": [{"type": "text", "text": "Hello"}]},1033        {"role": "assistant", "content": [{"type": "text", "text": "Hi there"}]},1034    ]1035    assert result == expected103610371038def test_convert_to_openai_messages_invalid_format() -> None:1039    with pytest.raises(ValueError, match="Unrecognized text_format="):1040        convert_to_openai_messages(  # type: ignore[call-overload]1041            [HumanMessage(content="Hello")],1042            text_format="invalid",1043        )104410451046def test_convert_to_openai_messages_openai_image() -> None:1047    base64_image = create_base64_image()1048    messages = [1049        HumanMessage(1050            content=[1051                {"type": "text", "text": "Here's an image:"},1052                {"type": "image_url", "image_url": {"url": base64_image}},1053            ]1054        )1055    ]1056    result = convert_to_openai_messages(messages, text_format="block")1057    expected = [1058        {1059            "role": "user",1060            "content": [1061                {"type": "text", "text": "Here's an image:"},1062                {"type": "image_url", "image_url": {"url": base64_image}},1063            ],1064        }1065    ]1066    assert result == expected106710681069def test_convert_to_openai_messages_anthropic() -> None:1070    image_data = create_image_data()1071    messages = [1072        HumanMessage(1073            content=[1074                {1075                    "type": "text",1076                    "text": "Here's an image:",1077                    "cache_control": {"type": "ephemeral"},1078                },1079                {1080                    "type": "image",1081                    "source": {1082                        "type": "base64",1083                        "media_type": "image/jpeg",1084                        "data": image_data,1085                    },1086                },1087            ]1088        ),1089        AIMessage(1090            content=[1091                {"type": "tool_use", "name": "foo", "input": {"bar": "baz"}, "id": "1"}1092            ]1093        ),1094        HumanMessage(1095            content=[1096                {1097                    "type": "tool_result",1098                    "tool_use_id": "1",1099                    "is_error": False,1100                    "content": [1101                        {1102                            "type": "image",1103                            "source": {1104                                "type": "base64",1105                                "media_type": "image/jpeg",1106                                "data": image_data,1107                            },1108                        },1109                    ],1110                }1111            ]1112        ),1113    ]1114    result = convert_to_openai_messages(messages)1115    expected = [1116        {1117            "role": "user",1118            "content": [1119                {"type": "text", "text": "Here's an image:"},1120                {"type": "image_url", "image_url": {"url": create_base64_image()}},1121            ],1122        },1123        {1124            "role": "assistant",1125            "content": "",1126            "tool_calls": [1127                {1128                    "type": "function",1129                    "function": {1130                        "name": "foo",1131                        "arguments": json.dumps({"bar": "baz"}),1132                    },1133                    "id": "1",1134                }1135            ],1136        },1137        {1138            "role": "tool",1139            "content": [1140                {"type": "image_url", "image_url": {"url": create_base64_image()}}1141            ],1142            "tool_call_id": "1",1143        },1144    ]1145    assert result == expected11461147    # Test thinking blocks (pass through)1148    thinking_block = {1149        "signature": "abc123",1150        "thinking": "Thinking text.",1151        "type": "thinking",1152    }1153    text_block = {"text": "Response text.", "type": "text"}1154    messages = [AIMessage([thinking_block, text_block])]1155    result = convert_to_openai_messages(messages)1156    expected = [{"role": "assistant", "content": [thinking_block, text_block]}]1157    assert result == expected115811591160def test_convert_to_openai_messages_bedrock_converse_image() -> None:1161    image_data = create_image_data()1162    messages = [1163        HumanMessage(1164            content=[1165                {"type": "text", "text": "Here's an image:"},1166                {1167                    "image": {1168                        "format": "jpeg",1169                        "source": {"bytes": base64.b64decode(image_data)},1170                    }1171                },1172            ]1173        )1174    ]1175    result = convert_to_openai_messages(messages)1176    assert result[0]["content"][1]["type"] == "image_url"1177    assert result[0]["content"][1]["image_url"]["url"] == create_base64_image()117811791180def test_convert_to_openai_messages_vertexai_image() -> None:1181    image_data = create_image_data()1182    messages = [1183        HumanMessage(1184            content=[1185                {"type": "text", "text": "Here's an image:"},1186                {1187                    "type": "media",1188                    "mime_type": "image/jpeg",1189                    "data": base64.b64decode(image_data),1190                },1191            ]1192        )1193    ]1194    result = convert_to_openai_messages(messages)1195    assert result[0]["content"][1]["type"] == "image_url"1196    assert result[0]["content"][1]["image_url"]["url"] == create_base64_image()119711981199def test_convert_to_openai_messages_tool_message() -> None:1200    tool_message = ToolMessage(content="Tool result", tool_call_id="123")1201    result = convert_to_openai_messages([tool_message], text_format="block")1202    assert len(result) == 11203    assert result[0]["content"] == [{"type": "text", "text": "Tool result"}]1204    assert result[0]["tool_call_id"] == "123"120512061207def test_convert_to_openai_messages_tool_use() -> None:1208    messages = [1209        AIMessage(1210            content=[1211                {1212                    "type": "tool_use",1213                    "id": "123",1214                    "name": "calculator",1215                    "input": {"a": "b"},1216                }1217            ]1218        )1219    ]1220    result = convert_to_openai_messages(messages, text_format="block")1221    assert result[0]["tool_calls"][0]["type"] == "function"1222    assert result[0]["tool_calls"][0]["id"] == "123"1223    assert result[0]["tool_calls"][0]["function"]["name"] == "calculator"1224    assert result[0]["tool_calls"][0]["function"]["arguments"] == json.dumps({"a": "b"})122512261227def test_convert_to_openai_messages_tool_use_unicode() -> None:1228    """Test that Unicode characters in tool call args are preserved correctly."""1229    messages = [1230        AIMessage(1231            content=[1232                {1233                    "type": "tool_use",1234                    "id": "123",1235                    "name": "create_customer",1236                    "input": {"customer_name": "你好啊集团"},1237                }1238            ]1239        )1240    ]1241    result = convert_to_openai_messages(messages, text_format="block")1242    assert result[0]["tool_calls"][0]["type"] == "function"1243    assert result[0]["tool_calls"][0]["id"] == "123"1244    assert result[0]["tool_calls"][0]["function"]["name"] == "create_customer"1245    # Ensure Unicode characters are preserved, not escaped as \\uXXXX1246    arguments_str = result[0]["tool_calls"][0]["function"]["arguments"]1247    parsed_args = json.loads(arguments_str)1248    assert parsed_args["customer_name"] == "你好啊集团"1249    # Also ensure the raw JSON string contains Unicode, not escaped sequences1250    assert "你好啊集团" in arguments_str1251    assert "\\u4f60" not in arguments_str  # Should not contain escaped Unicode125212531254def test_convert_to_openai_messages_json() -> None:1255    json_data = {"key": "value"}1256    messages = [HumanMessage(content=[{"type": "json", "json": json_data}])]1257    result = convert_to_openai_messages(messages, text_format="block")1258    assert result[0]["content"][0]["type"] == "text"1259    assert json.loads(result[0]["content"][0]["text"]) == json_data126012611262def test_convert_to_openai_messages_guard_content() -> None:1263    messages = [1264        HumanMessage(1265            content=[1266                {1267                    "type": "guard_content",1268                    "guard_content": {"text": "Protected content"},1269                }1270            ]1271        )1272    ]1273    result = convert_to_openai_messages(messages, text_format="block")1274    assert result[0]["content"][0]["type"] == "text"1275    assert result[0]["content"][0]["text"] == "Protected content"127612771278def test_convert_to_openai_messages_invalid_block() -> None:1279    messages = [HumanMessage(content=[{"type": "invalid", "foo": "bar"}])]1280    with pytest.raises(ValueError, match="Unrecognized content block"):1281        convert_to_openai_messages(1282            messages,1283            text_format="block",1284            pass_through_unknown_blocks=False,1285        )1286    # Accept by default1287    result = convert_to_openai_messages(messages, text_format="block")1288    assert result == [{"role": "user", "content": [{"type": "invalid", "foo": "bar"}]}]128912901291def test_handle_openai_responses_blocks() -> None:1292    blocks: str | list[str | dict[str, Any]] = [1293        {"type": "reasoning", "id": "1"},1294        {1295            "type": "function_call",1296            "name": "multiply",1297            "arguments": '{"x":5,"y":4}',1298            "call_id": "call_abc123",1299            "id": "fc_abc123",1300            "status": "completed",1301        },1302    ]1303    message = AIMessage(content=blocks)13041305    expected_tool_call = {1306        "type": "function",1307        "function": {1308            "name": "multiply",1309            "arguments": '{"x":5,"y":4}',1310        },1311        "id": "call_abc123",1312    }1313    result = convert_to_openai_messages(message)1314    assert isinstance(result, dict)1315    assert result["content"] == blocks1316    assert result["tool_calls"] == [expected_tool_call]13171318    result = convert_to_openai_messages(message, pass_through_unknown_blocks=False)1319    assert isinstance(result, dict)1320    assert result["content"] == [{"type": "reasoning", "id": "1"}]1321    assert result["tool_calls"] == [expected_tool_call]132213231324def test_convert_to_openai_messages_empty_message() -> None:1325    result = convert_to_openai_messages(HumanMessage(content=""))1326    assert result == {"role": "user", "content": ""}132713281329def test_convert_to_openai_messages_empty_list() -> None:1330    result = convert_to_openai_messages([])1331    assert result == []133213331334def test_convert_to_openai_messages_mixed_content_types() -> None:1335    messages = [1336        HumanMessage(1337            content=[1338                "Text message",1339                {"type": "text", "text": "Structured text"},1340                {"type": "image_url", "image_url": {"url": create_base64_image()}},1341            ]1342        )1343    ]1344    result = convert_to_openai_messages(messages, text_format="block")1345    assert len(result[0]["content"]) == 31346    assert isinstance(result[0]["content"][0], dict)1347    assert isinstance(result[0]["content"][1], dict)1348    assert isinstance(result[0]["content"][2], dict)134913501351def test_convert_to_openai_messages_developer() -> None:1352    messages: list[MessageLikeRepresentation] = [1353        SystemMessage("a", additional_kwargs={"__openai_role__": "developer"}),1354        {"role": "developer", "content": "a"},1355    ]1356    result = convert_to_openai_messages(messages)1357    assert result == [{"role": "developer", "content": "a"}] * 2135813591360def test_convert_to_openai_messages_multimodal() -> None:1361    """v0 and v1 content to OpenAI messages conversion."""1362    messages = [1363        HumanMessage(1364            content=[1365                # Prior v0 blocks1366                {"type": "text", "text": "Text message"},1367                {1368                    "type": "image",1369                    "url": "https://example.com/test.png",1370                },1371                {1372                    "type": "image",1373                    "source_type": "base64",1374                    "data": "<base64 string>",1375                    "mime_type": "image/png",1376                },1377                {1378                    "type": "file",1379                    "source_type": "base64",1380                    "data": "<base64 string>",1381                    "mime_type": "application/pdf",1382                    "filename": "test.pdf",1383                },1384                {1385                    # OpenAI Chat Completions file format1386                    "type": "file",1387                    "file": {1388                        "filename": "draconomicon.pdf",1389                        "file_data": "data:application/pdf;base64,<base64 string>",1390                    },1391                },1392                {1393                    "type": "file",1394                    "source_type": "id",1395                    "id": "file-abc123",1396                },1397                {1398                    "type": "audio",1399                    "source_type": "base64",1400                    "data": "<base64 string>",1401                    "mime_type": "audio/wav",1402                },1403                {1404                    "type": "input_audio",1405                    "input_audio": {1406                        "data": "<base64 string>",1407                        "format": "wav",1408                    },1409                },1410                # v1 Additions1411                {1412                    "type": "image",1413                    "source_type": "url",  # backward compatibility v0 block field1414                    "url": "https://example.com/test.png",1415                },1416                {1417                    "type": "image",1418                    "base64": "<base64 string>",1419                    "mime_type": "image/png",1420                },1421                {1422                    "type": "file",1423                    "base64": "<base64 string>",1424                    "mime_type": "application/pdf",1425                    "filename": "test.pdf",  # backward compatibility v0 block field1426                },1427                {1428                    "type": "file",1429                    "file_id": "file-abc123",1430                },1431                {1432                    "type": "audio",1433                    "base64": "<base64 string>",1434                    "mime_type": "audio/wav",1435                },1436            ]1437        )1438    ]1439    result = convert_to_openai_messages(messages, text_format="block")1440    assert len(result) == 11441    message = result[0]1442    assert len(message["content"]) == 1314431444    # Test auto-adding filename1445    messages = [1446        HumanMessage(1447            content=[1448                {1449                    "type": "file",1450                    "base64": "<base64 string>",1451                    "mime_type": "application/pdf",1452                },1453            ]1454        )1455    ]1456    with pytest.warns(match="filename"):1457        result = convert_to_openai_messages(messages, text_format="block")1458    assert len(result) == 11459    message = result[0]1460    assert len(message["content"]) == 11461    block = message["content"][0]1462    assert block == {1463        # OpenAI Chat Completions file format1464        "type": "file",1465        "file": {1466            "file_data": "data:application/pdf;base64,<base64 string>",1467            "filename": "LC_AUTOGENERATED",1468        },1469    }147014711472def test_count_tokens_approximately_empty_messages() -> None:1473    # Test with empty message list1474    assert count_tokens_approximately([]) == 014751476    # Test with empty content1477    messages = [HumanMessage(content="")]1478    # 4 role chars -> 1 + 3 = 4 tokens1479    assert count_tokens_approximately(messages) == 4148014811482def test_count_tokens_approximately_with_names() -> None:1483    messages = [1484        # 5 chars + 4 role chars -> 3 + 3 = 6 tokens1485        # (with name: extra 4 name chars, so total = 4 + 3 = 7 tokens)1486        HumanMessage(content="Hello", name="user"),1487        # 8 chars + 9 role chars -> 5 + 3 = 8 tokens1488        # (with name: extra 9 name chars, so total = 7 + 3 = 10 tokens)1489        AIMessage(content="Hi there", name="assistant"),1490    ]1491    # With names included (default)1492    assert count_tokens_approximately(messages) == 1714931494    # Without names1495    without_names = count_tokens_approximately(messages, count_name=False)1496    assert without_names == 14149714981499def test_count_tokens_approximately_openai_format() -> None:1500    # same as test_count_tokens_approximately_with_names, but in OpenAI format1501    messages = [1502        {"role": "user", "content": "Hello", "name": "user"},1503        {"role": "assistant", "content": "Hi there", "name": "assistant"},1504    ]1505    # With names included (default)1506    assert count_tokens_approximately(messages) == 1715071508    # Without names1509    without_names = count_tokens_approximately(messages, count_name=False)1510    assert without_names == 14151115121513def test_count_tokens_approximately_string_content() -> None:1514    messages = [1515        # 5 chars + 4 role chars -> 3 + 3 = 6 tokens1516        HumanMessage(content="Hello"),1517        # 8 chars + 9 role chars -> 5 + 3 = 8 tokens1518        AIMessage(content="Hi there"),1519        # 12 chars + 4 role chars -> 4 + 3 = 7 tokens1520        HumanMessage(content="How are you?"),1521    ]1522    assert count_tokens_approximately(messages) == 21152315241525def test_count_tokens_approximately_list_content() -> None:1526    messages = [1527        # '[{"foo": "bar"}]' -> 16 chars + 4 role chars -> 5 + 3 = 8 tokens1528        HumanMessage(content=[{"foo": "bar"}]),1529        # '[{"test": 123}]' -> 15 chars + 9 role chars -> 6 + 3 = 9 tokens1530        AIMessage(content=[{"test": 123}]),1531    ]1532    assert count_tokens_approximately(messages) == 17153315341535def test_count_tokens_approximately_tool_calls() -> None:1536    tool_calls = [{"name": "test_tool", "args": {"foo": "bar"}, "id": "1"}]1537    messages = [1538        # tool calls json -> 79 chars + 9 role chars -> 22 + 3 = 25 tokens1539        AIMessage(content="", tool_calls=tool_calls),1540        # 15 chars + 4 role chars -> 5 + 3 = 8 tokens1541        HumanMessage(content="Regular message"),1542    ]1543    assert count_tokens_approximately(messages) == 331544    # AI message w/ both content and tool calls1545    # 94 chars + 9 role chars -> 26 + 3 = 29 tokens1546    messages = [1547        AIMessage(content="Regular message", tool_calls=tool_calls),1548    ]1549    assert count_tokens_approximately(messages) == 29155015511552def test_count_tokens_approximately_custom_token_length() -> None:1553    messages = [1554        # 11 chars + 4 role chars -> (4 tokens of length 4 / 8 tokens of length 2) + 31555        HumanMessage(content="Hello world"),1556        # 7 chars + 9 role chars -> (4 tokens of length 4 / 8 tokens of length 2) + 31557        AIMessage(content="Testing"),1558    ]1559    assert count_tokens_approximately(messages, chars_per_token=4) == 141560    assert count_tokens_approximately(messages, chars_per_token=2) == 22156115621563def test_count_tokens_approximately_large_message_content() -> None:1564    # Test with large content to ensure no issues1565    large_text = "x" * 100001566    messages = [HumanMessage(content=large_text)]1567    # 10,000 chars + 4 role chars -> 2501 + 3 = 2504 tokens1568    assert count_tokens_approximately(messages) == 2504156915701571def test_count_tokens_approximately_large_number_of_messages() -> None:1572    # Test with large content to ensure no issues1573    messages = [HumanMessage(content="x")] * 1_0001574    # 1 chars + 4 role chars -> 2 + 3 = 5 tokens1575    assert count_tokens_approximately(messages) == 5_000157615771578def test_count_tokens_approximately_mixed_content_types() -> None:1579    # Test with a variety of content types in the same message list1580    tool_calls = [{"name": "test_tool", "args": {"foo": "bar"}, "id": "1"}]1581    messages = [1582        # 13 chars + 6 role chars -> 5 + 3 = 8 tokens1583        SystemMessage(content="System prompt"),1584        # '[{"foo": "bar"}]' -> 16 chars + 4 role chars -> 5 + 3 = 8 tokens1585        HumanMessage(content=[{"foo": "bar"}]),1586        # tool calls json -> 79 chars + 9 role chars -> 22 + 3 = 25 tokens1587        AIMessage(content="", tool_calls=tool_calls),1588        # 13 chars + 4 role chars + 9 name chars + 1 tool call ID char ->1589        # 7 + 3 = 10 tokens1590        ToolMessage(content="Tool response", name="test_tool", tool_call_id="1"),1591    ]1592    token_count = count_tokens_approximately(messages)1593    assert token_count == 5115941595    # Ensure that count is consistent if we do one message at a time1596    assert sum(count_tokens_approximately([m]) for m in messages) == token_count159715981599def test_count_tokens_approximately_usage_metadata_scaling() -> None:1600    messages = [1601        HumanMessage("text"),1602        AIMessage(1603            "text",1604            response_metadata={"model_provider": "openai"},1605            usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 100},1606        ),1607        HumanMessage("text"),1608        AIMessage(1609            "text",1610            response_metadata={"model_provider": "openai"},1611            usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 200},1612        ),1613    ]16141615    unscaled = count_tokens_approximately(messages)1616    scaled = count_tokens_approximately(messages, use_usage_metadata_scaling=True)16171618    ratio = scaled / unscaled1619    assert 1 <= round(ratio, 1) <= 1.2  # we ceil scale token counts, so can be > 1.216201621    messages.extend([ToolMessage("text", tool_call_id="abc123")] * 3)16221623    unscaled_extended = count_tokens_approximately(messages)1624    scaled_extended = count_tokens_approximately(1625        messages, use_usage_metadata_scaling=True1626    )16271628    # scaling should still be based on the most recent AIMessage with total_tokens=2001629    assert unscaled_extended > unscaled1630    assert scaled_extended > scaled16311632    # And the scaled total should be the unscaled total multiplied by the same ratio.1633    # ratio = 200 / unscaled (as of last AI message)1634    expected_scaled_extended = math.ceil(unscaled_extended * ratio)1635    assert scaled_extended <= expected_scaled_extended <= scaled_extended + 1163616371638def test_count_tokens_approximately_usage_metadata_scaling_model_provider() -> None:1639    messages = [1640        HumanMessage("Hello"),1641        AIMessage(1642            "Hi",1643            response_metadata={"model_provider": "openai"},1644            usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 100},1645        ),1646        HumanMessage("More text"),1647        AIMessage(1648            "More response",1649            response_metadata={"model_provider": "anthropic"},1650            usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 200},1651        ),1652    ]16531654    unscaled = count_tokens_approximately(messages)1655    scaled = count_tokens_approximately(messages, use_usage_metadata_scaling=True)1656    assert scaled == unscaled165716581659def test_count_tokens_approximately_usage_metadata_scaling_total_tokens() -> None:1660    messages = [1661        HumanMessage("Hello"),1662        AIMessage(1663            "Hi",1664            response_metadata={"model_provider": "openai"},1665            # no usage metadata -> skip1666        ),1667    ]16681669    unscaled = count_tokens_approximately(messages, chars_per_token=5)1670    scaled = count_tokens_approximately(1671        messages, chars_per_token=5, use_usage_metadata_scaling=True1672    )16731674    assert scaled == unscaled167516761677def test_count_tokens_approximately_usage_metadata_scaling_floor_at_one() -> None:1678    messages = [1679        HumanMessage("text"),1680        AIMessage(1681            "text",1682            response_metadata={"model_provider": "openai"},1683            # Set total_tokens lower than the approximate count up through this message.1684            usage_metadata={"input_tokens": 0, "output_tokens": 0, "total_tokens": 1},1685        ),1686        HumanMessage("text"),1687    ]16881689    unscaled = count_tokens_approximately(messages)1690    scaled = count_tokens_approximately(messages, use_usage_metadata_scaling=True)16911692    # scale factor would be < 1, but we floor it at 1.0 to avoid decreasing counts1693    assert scaled == unscaled169416951696def test_get_buffer_string_with_structured_content() -> None:1697    """Test get_buffer_string with structured content in messages."""1698    messages = [1699        HumanMessage(content=[{"type": "text", "text": "Hello, world!"}]),1700        AIMessage(content=[{"type": "text", "text": "Hi there!"}]),1701        SystemMessage(content=[{"type": "text", "text": "System message"}]),1702    ]1703    expected = "Human: Hello, world!\nAI: Hi there!\nSystem: System message"1704    actual = get_buffer_string(messages)1705    assert actual == expected170617071708def test_get_buffer_string_with_mixed_content() -> None:1709    """Test get_buffer_string with mixed content types in messages."""1710    messages = [1711        HumanMessage(content="Simple text"),1712        AIMessage(content=[{"type": "text", "text": "Structured text"}]),1713        SystemMessage(content=[{"type": "text", "text": "Another structured text"}]),1714    ]1715    expected = (1716        "Human: Simple text\nAI: Structured text\nSystem: Another structured text"1717    )1718    actual = get_buffer_string(messages)1719    assert actual == expected172017211722def test_get_buffer_string_with_function_call() -> None:1723    """Test get_buffer_string with function call in additional_kwargs."""1724    messages = [1725        HumanMessage(content="Hello"),1726        AIMessage(1727            content="Hi",1728            additional_kwargs={1729                "function_call": {1730                    "name": "test_function",1731                    "arguments": '{"arg": "value"}',1732                }1733            },1734        ),1735    ]1736    # TODO: consider changing this1737    expected = (1738        "Human: Hello\n"1739        "AI: Hi{'name': 'test_function', 'arguments': '{\"arg\": \"value\"}'}"1740    )1741    actual = get_buffer_string(messages)1742    assert actual == expected174317441745def test_get_buffer_string_with_empty_content() -> None:1746    """Test get_buffer_string with empty content in messages."""1747    messages = [1748        HumanMessage(content=[]),1749        AIMessage(content=""),1750        SystemMessage(content=[]),1751    ]1752    expected = "Human: \nAI: \nSystem: "1753    actual = get_buffer_string(messages)1754    assert actual == expected175517561757def test_get_buffer_string_with_tool_calls() -> None:1758    """Test `get_buffer_string` with `tool_calls` field."""1759    messages = [1760        HumanMessage(content="What's the weather?"),1761        AIMessage(1762            content="Let me check the weather",1763            tool_calls=[1764                {1765                    "name": "get_weather",1766                    "args": {"city": "NYC"},1767                    "id": "call_1",1768                    "type": "tool_call",1769                }1770            ],1771        ),1772    ]1773    result = get_buffer_string(messages)1774    assert "Human: What's the weather?" in result1775    assert "AI: Let me check the weather" in result1776    assert "get_weather" in result1777    assert "NYC" in result177817791780def test_get_buffer_string_with_tool_calls_empty_content() -> None:1781    """Test `get_buffer_string` with `tool_calls` and empty `content`."""1782    messages = [1783        AIMessage(1784            content="",1785            tool_calls=[1786                {1787                    "name": "search",1788                    "args": {"query": "test"},1789                    "id": "call_2",1790                    "type": "tool_call",1791                }1792            ],1793        ),1794    ]1795    result = get_buffer_string(messages)1796    assert "AI: " in result1797    assert "search" in result179817991800def test_get_buffer_string_tool_calls_preferred_over_function_call() -> None:1801    """Test that `tool_calls` takes precedence over legacy `function_call`."""1802    messages = [1803        AIMessage(1804            content="Calling tools",1805            tool_calls=[1806                {1807                    "name": "modern_tool",1808                    "args": {"key": "value"},1809                    "id": "call_3",1810                    "type": "tool_call",1811                }1812            ],1813            additional_kwargs={1814                "function_call": {"name": "legacy_function", "arguments": "{}"}1815            },1816        ),1817    ]1818    result = get_buffer_string(messages)1819    assert "modern_tool" in result1820    assert "legacy_function" not in result182118221823def test_convert_to_openai_messages_reasoning_content() -> None:1824    """Test convert_to_openai_messages with reasoning content blocks."""1825    # Test reasoning block with empty summary1826    msg = AIMessage(content=[{"type": "reasoning", "summary": []}])1827    result = convert_to_openai_messages(msg, text_format="block")1828    expected = {"role": "assistant", "content": [{"type": "reasoning", "summary": []}]}1829    assert result == expected18301831    # Test reasoning block with summary content1832    msg_with_summary = AIMessage(1833        content=[1834            {1835                "type": "reasoning",1836                "summary": [1837                    {"type": "text", "text": "First thought"},1838                    {"type": "text", "text": "Second thought"},1839                ],1840            }1841        ]1842    )1843    result_with_summary = convert_to_openai_messages(1844        msg_with_summary, text_format="block"1845    )1846    expected_with_summary = {1847        "role": "assistant",1848        "content": [1849            {1850                "type": "reasoning",1851                "summary": [1852                    {"type": "text", "text": "First thought"},1853                    {"type": "text", "text": "Second thought"},1854                ],1855            }1856        ],1857    }1858    assert result_with_summary == expected_with_summary18591860    # Test mixed content with reasoning and text1861    mixed_msg = AIMessage(1862        content=[1863            {"type": "text", "text": "Regular response"},1864            {1865                "type": "reasoning",1866                "summary": [{"type": "text", "text": "My reasoning process"}],1867            },1868        ]1869    )1870    mixed_result = convert_to_openai_messages(mixed_msg, text_format="block")1871    expected_mixed = {1872        "role": "assistant",1873        "content": [1874            {"type": "text", "text": "Regular response"},1875            {1876                "type": "reasoning",1877                "summary": [{"type": "text", "text": "My reasoning process"}],1878            },1879        ],1880    }1881    assert mixed_result == expected_mixed188218831884# Tests for get_buffer_string XML format188518861887def test_get_buffer_string_xml_empty_messages_list() -> None:1888    """Test XML format with empty messages list."""1889    messages: list[BaseMessage] = []1890    result = get_buffer_string(messages, format="xml")1891    expected = ""1892    assert result == expected189318941895def test_get_buffer_string_xml_basic() -> None:1896    """Test XML format output with all message types."""1897    messages = [1898        SystemMessage(content="System message"),1899        HumanMessage(content="Human message"),1900        AIMessage(content="AI message"),1901        FunctionMessage(content="Function result", name="test_fn"),1902        ToolMessage(content="Tool result", tool_call_id="123"),1903    ]1904    result = get_buffer_string(messages, format="xml")1905    expected = (1906        '<message type="system">System message</message>\n'1907        '<message type="human">Human message</message>\n'1908        '<message type="ai">AI message</message>\n'1909        '<message type="function">Function result</message>\n'1910        '<message type="tool">Tool result</message>'1911    )1912    assert result == expected191319141915def test_get_buffer_string_xml_custom_prefixes() -> None:1916    """Test XML format with custom human and ai prefixes."""1917    messages = [1918        HumanMessage(content="Hello"),1919        AIMessage(content="Hi there"),1920    ]1921    result = get_buffer_string(1922        messages, human_prefix="User", ai_prefix="Assistant", format="xml"1923    )1924    expected = (1925        '<message type="user">Hello</message>\n'1926        '<message type="assistant">Hi there</message>'1927    )1928    assert result == expected192919301931def test_get_buffer_string_xml_custom_separator() -> None:1932    """Test XML format with custom message separator."""1933    messages = [1934        HumanMessage(content="Hello"),1935        AIMessage(content="Hi there"),1936    ]1937    result = get_buffer_string(messages, format="xml", message_separator="\n\n")1938    expected = (1939        '<message type="human">Hello</message>\n\n<message type="ai">Hi there</message>'1940    )1941    assert result == expected194219431944def test_get_buffer_string_prefix_custom_separator() -> None:1945    """Test prefix format with custom message separator."""1946    messages = [1947        HumanMessage(content="Hello"),1948        AIMessage(content="Hi there"),1949    ]1950    result = get_buffer_string(messages, format="prefix", message_separator=" | ")1951    expected = "Human: Hello | AI: Hi there"1952    assert result == expected195319541955def test_get_buffer_string_xml_escaping() -> None:1956    """Test XML format properly escapes special characters in content."""1957    messages = [1958        HumanMessage(content="Is 5 < 10 & 10 > 5?"),1959        AIMessage(content='Yes, and here\'s a "quote"'),1960    ]1961    result = get_buffer_string(messages, format="xml")1962    # xml.sax.saxutils.escape escapes <, >, & (not quotes in content)1963    expected = (1964        '<message type="human">Is 5 &lt; 10 &amp; 10 &gt; 5?</message>\n'1965        '<message type="ai">Yes, and here\'s a "quote"</message>'1966    )1967    assert result == expected196819691970def test_get_buffer_string_xml_unicode_content() -> None:1971    """Test XML format with Unicode content."""1972    messages = [1973        HumanMessage(content="你好世界"),  # Chinese: Hello World1974        AIMessage(content="こんにちは"),  # Japanese: Hello1975    ]1976    result = get_buffer_string(messages, format="xml")1977    expected = (1978        '<message type="human">你好世界</message>\n'1979        '<message type="ai">こんにちは</message>'1980    )1981    assert result == expected198219831984def test_get_buffer_string_xml_chat_message_valid_role() -> None:1985    """Test XML format with `ChatMessage` having valid XML tag name role."""1986    messages = [1987        ChatMessage(content="Hello", role="Assistant"),1988    ]1989    result = get_buffer_string(messages, format="xml")1990    # Role is used directly as the type attribute value1991    expected = '<message type="Assistant">Hello</message>'1992    assert result == expected19931994    # Spaces in role1995    messages = [1996        ChatMessage(content="Hello", role="my custom role"),1997    ]1998    result = get_buffer_string(messages, format="xml")1999    # Custom roles with spaces use quoteattr for proper escaping2000    expected = '<message type="my custom role">Hello</message>'

Findings

✓ No findings reported for this file.

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.